## Neuronales Netz zur Schätzung der Temperatur
Es werden zwei verschiedene Datentypen verwendet:
Verarbeitung der Zeitreihe (6 Messwerte), Verarbeitung der Mittelwerte über die Wochen
### 1) Laden der Daten

In [1]:
import pandas as pd
from math import ceil

pd.set_option('display.max_columns', 6)

data_url=r'https://github.com/tplusone/hanser_ml_zeitreihen/blob/master/Daten/jena_climate_complete_hourly.csv?raw=true'
df = pd.read_csv(data_url)
df['Date Time'] = pd.to_datetime(df['Date Time'])
df = df.set_index('Date Time')
df.head()

Unnamed: 0_level_0,p (mbar),T (degC),Tpot (K),...,wv (m/s),max. wv (m/s),wd (deg)
Date Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2009-01-01 00:00:00,996.528,-8.304,265.118,...,0.52,1.002,174.46
2009-01-01 01:00:00,996.525,-8.065,265.361667,...,0.316667,0.711667,172.416667
2009-01-01 02:00:00,996.745,-8.763333,264.645,...,0.248333,0.606667,196.816667
2009-01-01 03:00:00,996.986667,-8.896667,264.491667,...,0.176667,0.606667,157.083333
2009-01-01 04:00:00,997.158333,-9.348333,264.026667,...,0.29,0.67,150.093333


### 2) X/y-Daten extrahieren

In [2]:
X = df
y = df[['T (degC)']]
X.shape, y.shape

((70129, 14), (70129, 1))

### 3) Trainings/Test-Daten separieren

In [3]:
## 2. Schritt: Train/Test-Split
train_end = ceil(len(X) * 0.8)
X_train = X.iloc[:train_end]
y_train = y.iloc[:train_end]

X_test = X.iloc[train_end:]
y_test = y.iloc[train_end:]

X_train.shape, X_test.shape

((56104, 14), (14025, 14))

### 4) Datums-Index herauslösen und speichern und Spaltenbezeichnungen speichern

In [4]:
time_train = X_train.index
time_test = X_test.index
cols_train = X_train.columns

### 5) Standardisieren von X- und y-Daten

In [5]:
from sklearn.preprocessing import StandardScaler

scaler_x = StandardScaler()
scaler_x.fit(X_train)
X_train_sd = scaler_x.transform(X_train)
X_test_sd = scaler_x.transform(X_test)

scaler_y = StandardScaler()
scaler_y.fit(y_train)
y_train = scaler_y.transform(y_train)
y_test = scaler_y.transform(y_test)

### 6) Erzeugung de Mittelwertstabelle über die Kalenderwochen

In [6]:
week_means = pd.DataFrame(X_train_sd, 
                    index=time_train,
                    columns=cols_train) 

week_means = week_means \
                .groupby(week_means.index.week) \
                [week_means.columns].mean()
week_means.head().round(3)

Unnamed: 0_level_0,p (mbar),T (degC),Tpot (K),...,wv (m/s),max. wv (m/s),wd (deg)
Date Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,0.116,-0.236,-0.244,...,0.052,0.044,-0.042
2,0.26,-0.03,-0.051,...,-0.055,-0.07,-0.043
3,0.018,-1.052,-1.048,...,-0.003,-0.04,-0.098
4,-0.374,-1.36,-1.324,...,0.077,-0.015,-0.494
5,-0.178,-0.811,-0.792,...,0.209,0.183,-0.191


### 7) Data Generator

In [7]:
import numpy as np
import random

def data_generator( X: np.array, y: np.array, 
                    mean_tab: pd.DataFrame, y_index: pd.Index, 
                    window=144, horizon=24, season=24, 
                    batch_size=1, epochs=10):
    for epoch in range(epochs):
        X_s, X_m, y_ = [], [], []
        batch_counter = 0
        last_val = len(X) - (window + horizon)
        index_range = list(range(last_val))
        random.shuffle(index_range)
        for idx in index_range:
            season_range = range(window+idx-1, idx-1, -season)
            X_s.append([X[i] for i in season_range])
            y_date = y_index[idx+window+horizon]
            y_mean = mean_tab.loc[y_date.week]
            X_m.append(y_mean.values)
            y_.append(y[idx+window+horizon])
            batch_counter += 1
            if (batch_counter == batch_size or 
                index_range[-1] == idx):
                yield ([np.array(X_s), np.array(X_m)], 
                        np.array(y_))
                X_s, X_m, y_ = [], [], []
                batch_counter = 0


test_gen = data_generator(X_train_sd, y_train, 
                mean_tab=week_means, y_index=time_train)
(x_, xs_), y_ = next(test_gen)
print(x_.shape, xs_.shape, y_.shape)

(1, 6, 14) (1, 14) (1, 1)


### 8) Aufbau des Modells

In [8]:
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import (Flatten,  
                                    Input, Dense, Dropout,                                     GRU, Bidirectional,
                                    concatenate, GRU)
from tensorflow.keras.regularizers import l2

batch_size = 128
epochs = 50
window = 144
horizon = 24
indicators = 14
num_season = 6

reg = l2(0.0001)

input_sn = Input(shape=(num_season, indicators), name='in_sn')
input_mn = Input(shape=(indicators), name='in_mn') 

sn = GRU(units=32, dropout=0.3, go_backwards=True,
         kernel_regularizer=reg, name='sn_gru')(input_sn)
mn = Dense(units=14, activation='tanh', 
        kernel_regularizer=reg)(input_mn)
mn = Dropout(0.2)(mn)
comp = concatenate([sn, mn], axis=-1)
comp = Dense(units=1, kernel_regularizer=reg)(comp)

model = Model([input_sn, input_mn], comp)
model.compile(loss='mse', optimizer='adam', metrics=['mae'])
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
in_mn (InputLayer)              [(None, 14)]         0                                            
__________________________________________________________________________________________________
in_sn (InputLayer)              [(None, 6, 14)]      0                                            
__________________________________________________________________________________________________
dense (Dense)                   (None, 14)           210         in_mn[0][0]                      
__________________________________________________________________________________________________
sn_gru (GRU)                    (None, 32)           4608        in_sn[0][0]                      
______________________________________________________________________________________________

### 9) Callbacks definieren

In [9]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

early = EarlyStopping(monitor='val_loss', patience=10)
check = ModelCheckpoint(filepath='climate_model_seasonal_means.h5', 
                        monitor='val_loss', save_best_only=True)

### 10) Generatoren zum Anlernen organsisieren

In [10]:
steps_train = ceil(len(X_train-(window+horizon))/batch_size)
steps_test = ceil(len(X_test-(window+horizon))/batch_size)

gen_train = data_generator(X_train_sd, y_train, 
                        mean_tab=week_means, 
                        y_index=time_train, 
                        batch_size=batch_size,
                        epochs=epochs)
gen_test = data_generator(X_test_sd, y_test, 
                        mean_tab=week_means, 
                        y_index=time_test,
                        batch_size=batch_size,
                        epochs=epochs)

### 11) Modell anlernen

In [11]:
history = model.fit(gen_train, epochs=epochs, 
            steps_per_epoch=steps_train, 
            validation_data=(gen_test), 
            validation_steps=steps_test,
            callbacks=[early, check], verbose=1)

  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 439 steps, validate for 110 steps
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50


### 12) Funktion zur Berechnung von y_true, y_pred & simple predicitions über Testdaten

In [12]:
def predictions_vs_true(X_pred, y_true, mean_tab, y_index, 
                        model, scaler, window=144, horizon=24, 
                        season=24, num_pred=24*14):
    X_m_temp = []
    X_s_temp = []
    y_temp = []
    y_temp_simple = []
    if num_pred=='complete':
        num_pred = len(X_pred)-(window+horizon)
    for idx in range(num_pred):
        season_range = range(window+idx, idx, -season)
        X_s_temp.append([X_pred[i] for i in season_range])
        y_date = y_index[idx+window+horizon]
        y_mean = mean_tab.loc[y_date.week]
        X_m_temp.append(y_mean.values)
        y_idx = y_true[idx+window+horizon]
        y_temp.append(y_idx)
        y_temp_simple.append(y_true[idx+window])
    X_m_temp = np.array(X_m_temp)
    X_s_temp = np.array(X_s_temp)
    y_pred = model.predict([X_s_temp, X_m_temp])
    y_pred = scaler.inverse_transform(y_pred)
    y_temp = scaler.inverse_transform(y_temp)
    y_temp_simple = scaler.inverse_transform(y_temp_simple)
    return y_temp, y_pred, y_temp_simple

### 13) Mittlerer absolute Fehler für Modellschätzungen und naives Schätzverfahren erzeugen

In [13]:
from tensorflow.keras.models import load_model
from sklearn.metrics import mean_absolute_error

model = load_model('climate_model_seasonal_means.h5')
y_true, y_pred, y_temp_simple = predictions_vs_true(X_test_sd, 
                                    y_test, 
                                    mean_tab=week_means, 
                                    y_index=time_test,
                                    model=model, 
                                    scaler=scaler_y,
                                    num_pred='complete')

print('mae, predictions:', mean_absolute_error(y_true, y_pred))
print('mae, simple pred:', mean_absolute_error(y_true, y_temp_simple))

mae, predictions: 3.811684467660458
mae, simple pred: 4.209516369584565
