## Regendaten Sommer 2018

In [89]:
import pandas as pd

In [90]:
%matplotlib inline

1. Daten laden in df_original

In [103]:
df_original = pd.read_csv("RegendatenSommer2018.csv", sep=";", na_values="-")

In [104]:
df_original.dropna()

Unnamed: 0,abbr,time,rre150h0
0,BER,201804010000,1.5
1,BER,201804010100,0.5
2,BER,201804010200,0.5
3,BER,201804010300,0.1
4,BER,201804010400,0.3
5,BER,201804010500,0.0
6,BER,201804010600,0.0
7,BER,201804010700,0.0
8,BER,201804010800,0.1
9,BER,201804010900,0.1


In [105]:
df_original.head(10)

Unnamed: 0,abbr,time,rre150h0
0,BER,201804010000,1.5
1,BER,201804010100,0.5
2,BER,201804010200,0.5
3,BER,201804010300,0.1
4,BER,201804010400,0.3
5,BER,201804010500,0.0
6,BER,201804010600,0.0
7,BER,201804010700,0.0
8,BER,201804010800,0.1
9,BER,201804010900,0.1


In [106]:
df_original["rre150h0"].value_counts()

0.0     39238
0.1       558
0.2       302
0.3       207
0.4       172
0.5       134
0.7       104
0.6        93
0.8        87
0.9        76
1.2        63
1.0        58
1.1        50
1.3        44
1.6        41
1.5        39
1.4        36
1.7        34
1.8        31
2.8        26
2.2        26
2.4        25
2.1        23
2.3        22
1.9        22
2.0        20
2.5        19
3.0        17
3.7        16
2.9        16
        ...  
18.8        1
11.8        1
16.7        1
10.1        1
17.7        1
13.6        1
15.5        1
19.7        1
14.6        1
8.5         1
16.3        1
14.4        1
16.8        1
21.8        1
21.1        1
18.6        1
20.6        1
11.1        1
21.9        1
33.6        1
9.3         1
6.7         1
9.5         1
15.1        1
27.8        1
21.4        1
12.2        1
8.8         1
14.2        1
31.8        1
Name: rre150h0, Length: 138, dtype: int64

2. Time aufsplitten in separate Spalten

In [107]:
from datetime import datetime  

In [108]:
df_original['date'] = pd.to_datetime(df_original['time'], format='%Y%m%d%H%M')

In [109]:
df_original['year'] = df_original['date'].dt.year
df_original['month'] = df_original['date'].dt.month
df_original['day'] = df_original['date'].dt.day
df_original['hour'] = df_original['date'].dt.hour
df_original['minute'] = df_original['date'].dt.minute

3. Plotten

In [110]:
df_original['rre150h0'].describe()

count    41976.000000
mean         0.099350
std          0.827955
min          0.000000
25%          0.000000
50%          0.000000
75%          0.000000
max         46.900000
Name: rre150h0, dtype: float64

## Funktioniereder Code um die Grafiken zu erstellen

In [111]:
from matplotlib import pyplot as plt
stationen = ["BAS", "BER", "CHU", "GVE", "LUZ", "SIO", "SMA", "STG", "LUG"]
for station in stationen:
    # Erstelle für jede Station ein eigenes Dataframe
    df_station = df_original[df_original['abbr'] == station]
    for month in range(4,11): # Für die Monate April bis Oktober...
        # Zeichne eine Figur für meinen Monat, fixe Höhe der Grafik -3 bis 47
        df_station[df_station['month']==month]['rre150h0'].plot(figsize=(20,7)).set_ylim(-3,47)
        
        # Zeichne die Grafik und speichere sie ab
        regenfig = plt.gcf()
        regenfig.savefig('lines/regenfig_'+station+'_'+str(month)+'.eps', format='eps')
        plt.close(regenfig)




## Kalender generieren

CSVs für jeden Standort generieren

In [112]:
for station in stationen:
    round(df_original[df_original['abbr']==station].groupby(["month", "day"])["rre150h0"].sum(),1).to_csv('stationen_csv/'+station+'_sum.csv')

Funktionen

In [113]:
import datetime as dt
import matplotlib.pyplot as plt
import numpy as np

# Inspiriert von https://stackoverflow.com/users/325565/joe-kington
def main(dates, data, station):

    fig, ax = plt.subplots(figsize=(5, 15))
    calendar_heatmap(ax, dates, data)
    #plt.title(station)
    plt.savefig('calendar/cal_'+station+'.jpg', format='jpg')
    plt.close()

def calendar_array(dates, data):
    i, j = zip(*[d.isocalendar()[1:] for d in dates])
    i = np.array(i) - min(i)
    j = np.array(j) - 1
    ni = max(i) + 1

    calendar = np.nan * np.zeros((ni, 7))
    calendar[i, j] = data
    return i, j, calendar


def calendar_heatmap(ax, dates, data):
    i, j, calendar = calendar_array(dates, data)
    im = ax.imshow(calendar, interpolation='none', cmap='PuBu', vmin=0, vmax=50)
    label_days(ax, dates, i, j, calendar)
    label_months(ax, dates, i, j, calendar)
    ax.figure.colorbar(im)

def label_days(ax, dates, i, j, calendar):
    ni, nj = calendar.shape
    day_of_month = np.nan * np.zeros((ni, 7))
    day_of_month[i, j] = [d.day for d in dates]

    for (i, j), day in np.ndenumerate(day_of_month):
        if np.isfinite(day):
            ax.text(j, i, int(day), ha='center', va='center')

    ax.set(xticks=np.arange(7), 
           xticklabels=['M', 'D', 'M', 'D', 'F', 'S', 'S'])
    ax.xaxis.tick_top()

def label_months(ax, dates, i, j, calendar):
    month_labels = np.array(['Jan', 'Feb', 'Mär', 'Apr', 'Mai', 'Jun', 'Jul',
                             'Aug', 'Sep', 'Okt', 'Nov', 'Dez'])
    months = np.array([d.month for d in dates])
    uniq_months = sorted(set(months))
    yticks = [i[months == m].mean() for m in uniq_months]
    labels = [month_labels[m - 1] for m in uniq_months]
    ax.set(yticks=yticks)
    ax.set_yticklabels(labels, rotation=90)



Kalender ausgeben

In [114]:
# Kreiere die Daten-Listen der jeweiligen Stationen...
dates = []
data = []
for station in stationen:
    df_station = pd.read_csv("stationen_csv/"+station+"_sum.csv", sep=",", names = ["m", "d", "r"])
    for index, row in df_station.iterrows():
        #print(row)
        dates.append(dt.datetime(2018, int(row['m']), int(row['d']), 0, 0))
        data.append(row['r'])
    # ... und zeichne den Kalender
    main(dates, data, station)

