# Keras Model: Recurrent Neural Network (RNN)

### This script contains the following:

#### 1. Importing Libraries and Data
#### 2. Data Wrangling
#### 3. Data Split
#### 4. Keras Model and Confusion Matrices

## 1. Importing Libraries and Data

In [4]:
import pandas as pd
import numpy as np
import seaborn as sns
import os
import operator
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from numpy import unique
from numpy import reshape
from keras.models import Sequential
from keras.layers import Conv1D, Conv2D, Dense, BatchNormalization, Flatten, MaxPooling1D, Dropout
from keras.layers import LSTM
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

import warnings
warnings.filterwarnings("ignore")

In [5]:
# Define path
path = r'C:\Users\16307\Desktop\CareerFoundry\Tasks - Machine Learning\ClimateWins Analysis\Data Sets'

In [6]:
# Import cleaned data
X = pd.read_csv(os.path.join(path, 'unscaled_cleaned.csv'), index_col = False)

In [7]:
# Read in the pleasant weather data
pleasant = pd.read_csv(os.path.join(path, 'Dataset-Answers-Weather_Prediction_Pleasant_Weather.csv'), index_col = False)
pleasant

Unnamed: 0,DATE,BASEL_pleasant_weather,BELGRADE_pleasant_weather,BUDAPEST_pleasant_weather,DEBILT_pleasant_weather,DUSSELDORF_pleasant_weather,HEATHROW_pleasant_weather,KASSEL_pleasant_weather,LJUBLJANA_pleasant_weather,MAASTRICHT_pleasant_weather,MADRID_pleasant_weather,MUNCHENB_pleasant_weather,OSLO_pleasant_weather,SONNBLICK_pleasant_weather,STOCKHOLM_pleasant_weather,VALENTIA_pleasant_weather
0,19600101,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,19600102,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,19600103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,19600104,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,19600105,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22945,20221027,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
22946,20221028,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
22947,20221029,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
22948,20221030,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [8]:
X.head()

Unnamed: 0,BASEL_cloud_cover,BASEL_humidity,BASEL_pressure,BASEL_global_radiation,BASEL_precipitation,BASEL_sunshine,BASEL_temp_mean,BASEL_temp_min,BASEL_temp_max,BELGRADE_cloud_cover,...,STOCKHOLM_temp_max,VALENTIA_cloud_cover,VALENTIA_humidity,VALENTIA_pressure,VALENTIA_global_radiation,VALENTIA_precipitation,VALENTIA_sunshine,VALENTIA_temp_mean,VALENTIA_temp_min,VALENTIA_temp_max
0,7,0.85,1.018,0.32,0.09,0.7,6.5,0.8,10.9,1,...,4.9,5,0.88,1.0003,0.45,0.34,4.7,8.5,6.0,10.9
1,6,0.84,1.018,0.36,1.05,1.1,6.1,3.3,10.1,6,...,5.0,7,0.91,1.0007,0.25,0.84,0.7,8.9,5.6,12.1
2,8,0.9,1.018,0.18,0.3,0.0,8.5,5.1,9.9,6,...,4.1,7,0.91,1.0096,0.17,0.08,0.1,10.5,8.1,12.9
3,3,0.92,1.018,0.58,0.0,4.1,6.3,3.8,10.6,8,...,2.3,7,0.86,1.0184,0.13,0.98,0.0,7.4,7.3,10.6
4,6,0.95,1.018,0.65,0.14,5.4,3.0,-0.7,6.0,8,...,4.3,3,0.8,1.0328,0.46,0.0,5.7,5.7,3.0,8.4


In [9]:
pleasant.head()

Unnamed: 0,DATE,BASEL_pleasant_weather,BELGRADE_pleasant_weather,BUDAPEST_pleasant_weather,DEBILT_pleasant_weather,DUSSELDORF_pleasant_weather,HEATHROW_pleasant_weather,KASSEL_pleasant_weather,LJUBLJANA_pleasant_weather,MAASTRICHT_pleasant_weather,MADRID_pleasant_weather,MUNCHENB_pleasant_weather,OSLO_pleasant_weather,SONNBLICK_pleasant_weather,STOCKHOLM_pleasant_weather,VALENTIA_pleasant_weather
0,19600101,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,19600102,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,19600103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,19600104,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,19600105,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


### 2. Data Wrangling

In [11]:
# Drop date column for pleasant weather dataset
pleasant.drop(columns = 'DATE', inplace = True)

### 3. Data Split 

In [13]:
y = pleasant

In [14]:
X.shape # dataset has the correct shape

(22950, 135)

In [15]:
y.shape # dataset has the correct shape

(22950, 15)

In [16]:
# Turn X and y from a df to arrays
X = np.array(X)
y = np.array(y)

In [17]:
X = X.reshape(-1, 15 ,9)

In [18]:
# Verify shape
X

array([[[  7.    ,   0.85  ,   1.018 , ...,   6.5   ,   0.8   ,
          10.9   ],
        [  1.    ,   0.81  ,   1.0195, ...,   3.7   ,  -0.9   ,
           7.9   ],
        [  4.    ,   0.67  ,   1.017 , ...,   2.4   ,  -0.4   ,
           5.1   ],
        ...,
        [  4.    ,   0.73  ,   1.0304, ...,  -5.9   ,  -8.5   ,
          -3.2   ],
        [  5.    ,   0.98  ,   1.0114, ...,   4.2   ,   2.2   ,
           4.9   ],
        [  5.    ,   0.88  ,   1.0003, ...,   8.5   ,   6.    ,
          10.9   ]],

       [[  6.    ,   0.84  ,   1.018 , ...,   6.1   ,   3.3   ,
          10.1   ],
        [  6.    ,   0.84  ,   1.0172, ...,   2.9   ,   2.2   ,
           4.4   ],
        [  4.    ,   0.67  ,   1.017 , ...,   2.3   ,   1.4   ,
           3.1   ],
        ...,
        [  6.    ,   0.97  ,   1.0292, ...,  -9.5   , -10.5   ,
          -8.5   ],
        [  5.    ,   0.62  ,   1.0114, ...,   4.    ,   3.    ,
           5.    ],
        [  7.    ,   0.91  ,   1.0007, ...,   8.

In [19]:
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state = 42)

In [20]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(17212, 15, 9) (17212, 15)
(5738, 15, 9) (5738, 15)


### 4. Keras Model and Confusion Matrices

In [22]:
# Define list of stations names

stations = {
0: 'BASEL',
1: 'BELGRADE',
2: 'BUDAPEST',
3: 'DEBILT',
4: 'DUSSELDORF',
5: 'HEATHROW',
6: 'KASSEL',
7: 'LJUBLJANA',
8: 'MAASTRICHT',
9: 'MADRID',
10: 'MUNCHENB',
11: 'OSLO',
12: 'SONNBLICK',
13: 'STOCKHOLM',
14: 'VALENTIA'

}

#### Softmax Activation Type (Starting Point)

In [24]:
# softmax activation type

epochs = 30
batch_size = 16
n_hidden = 32

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(MaxPooling1D())
model.add(LSTM(n_hidden, input_shape=(timesteps, input_dim)))
model.add(Dropout(0.5))
model.add(Dense(n_classes, activation='softmax'))

In [25]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [26]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/30
1076/1076 - 7s - 6ms/step - accuracy: 0.0908 - loss: 10.0187
Epoch 2/30
1076/1076 - 3s - 3ms/step - accuracy: 0.0989 - loss: 10.3482
Epoch 3/30
1076/1076 - 3s - 3ms/step - accuracy: 0.0987 - loss: 10.6836
Epoch 4/30
1076/1076 - 3s - 3ms/step - accuracy: 0.0974 - loss: 11.0768
Epoch 5/30
1076/1076 - 3s - 3ms/step - accuracy: 0.0939 - loss: 11.5840
Epoch 6/30
1076/1076 - 3s - 3ms/step - accuracy: 0.0918 - loss: 12.1246
Epoch 7/30
1076/1076 - 3s - 3ms/step - accuracy: 0.0855 - loss: 12.5173
Epoch 8/30
1076/1076 - 3s - 3ms/step - accuracy: 0.0855 - loss: 13.1087
Epoch 9/30
1076/1076 - 3s - 3ms/step - accuracy: 0.0830 - loss: 13.5861
Epoch 10/30
1076/1076 - 3s - 3ms/step - accuracy: 0.0801 - loss: 14.1354
Epoch 11/30
1076/1076 - 3s - 3ms/step - accuracy: 0.0781 - loss: 14.6894
Epoch 12/30
1076/1076 - 3s - 3ms/step - accuracy: 0.0780 - loss: 15.2040
Epoch 13/30
1076/1076 - 3s - 3ms/step - accuracy: 0.0713 - loss: 15.6861
Epoch 14/30
1076/1076 - 3s - 3ms/step - accuracy: 0.0702 - l

<keras.src.callbacks.history.History at 0x250402f8e90>

In [27]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

# Evaluate
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
Pred        BASEL  BELGRADE  BUDAPEST  HEATHROW  MADRID  MUNCHENB  OSLO
True                                                                   
BASEL           0        22         3         1    1395         0  2261
BELGRADE        0        20        17         0     965         0    90
BUDAPEST        0         4         2         0     198         1     9
DEBILT          0         2         0         0      80         0     0
DUSSELDORF      0         1         0         0      25         0     3
HEATHROW        0         2         0         0      66         1    13
KASSEL          0         0         1         0      10         0     0
LJUBLJANA       0         5         1         0      46         1     8
MAASTRICHT      0         3         0         0       1         0     5
MADRID          1        18         6         0     212         4   217
MUNCHENB        0         0         0         0       4      

#### Softmax Activation Type (2nd test)

In [29]:
# softmax activation type with new hyperparameters

epochs = 30
batch_size = 32
n_hidden = 256

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(MaxPooling1D())
model.add(LSTM(n_hidden, input_shape=(timesteps, input_dim)))
model.add(Dropout(0.5))
model.add(Dense(n_classes, activation='softmax'))

In [30]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [31]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/30
538/538 - 14s - 26ms/step - accuracy: 0.1013 - loss: 12.3041
Epoch 2/30
538/538 - 10s - 19ms/step - accuracy: 0.1035 - loss: 13.7960
Epoch 3/30
538/538 - 10s - 19ms/step - accuracy: 0.1020 - loss: 15.3491
Epoch 4/30
538/538 - 10s - 19ms/step - accuracy: 0.1017 - loss: 16.3781
Epoch 5/30
538/538 - 11s - 20ms/step - accuracy: 0.1013 - loss: 17.1794
Epoch 6/30
538/538 - 10s - 19ms/step - accuracy: 0.0959 - loss: 18.7908
Epoch 7/30
538/538 - 10s - 19ms/step - accuracy: 0.1021 - loss: 19.6232
Epoch 8/30
538/538 - 10s - 19ms/step - accuracy: 0.0956 - loss: 20.6829
Epoch 9/30
538/538 - 10s - 19ms/step - accuracy: 0.0939 - loss: 21.7530
Epoch 10/30
538/538 - 10s - 19ms/step - accuracy: 0.0992 - loss: 22.8438
Epoch 11/30
538/538 - 10s - 19ms/step - accuracy: 0.0968 - loss: 23.9401
Epoch 12/30
538/538 - 10s - 19ms/step - accuracy: 0.0927 - loss: 25.2506
Epoch 13/30
538/538 - 10s - 19ms/step - accuracy: 0.0943 - loss: 26.5640
Epoch 14/30
538/538 - 10s - 19ms/step - accuracy: 0.0948 - l

<keras.src.callbacks.history.History at 0x25046f8ac10>

In [32]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

# Evaluate
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step
Pred        BUDAPEST  KASSEL  MADRID  OSLO  STOCKHOLM  VALENTIA
True                                                           
BASEL              0       1    1489    46          1      2145
BELGRADE           0       0    1041    13          1        37
BUDAPEST           0       0     213     1          0         0
DEBILT             0       0      82     0          0         0
DUSSELDORF         0       0      29     0          0         0
HEATHROW           0       0      77     2          0         3
KASSEL             0       0      11     0          0         0
LJUBLJANA          0       0      61     0          0         0
MAASTRICHT         0       0       6     1          0         2
MADRID             1       1     331    38          0        87
MUNCHENB           0       1       7     0          0         0
OSLO               0       0       5     0          0         0
STOCKHOLM          0       0 

#### Sigmoid Activation Type (3rd test)

In [34]:
# sigmoid activation type

epochs = 30
batch_size = 32
n_hidden = 256

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(MaxPooling1D())
model.add(LSTM(n_hidden, input_shape=(timesteps, input_dim)))
model.add(Dropout(0.5))
model.add(Dense(n_classes, activation='sigmoid'))

In [35]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [36]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/30
538/538 - 14s - 27ms/step - accuracy: 0.1175 - loss: 12.1950
Epoch 2/30
538/538 - 11s - 20ms/step - accuracy: 0.1049 - loss: 13.3608
Epoch 3/30
538/538 - 11s - 20ms/step - accuracy: 0.1128 - loss: 14.3885
Epoch 4/30
538/538 - 12s - 22ms/step - accuracy: 0.1102 - loss: 15.2795
Epoch 5/30
538/538 - 11s - 21ms/step - accuracy: 0.1106 - loss: 16.2752
Epoch 6/30
538/538 - 11s - 20ms/step - accuracy: 0.1128 - loss: 17.3793
Epoch 7/30
538/538 - 11s - 20ms/step - accuracy: 0.1067 - loss: 18.2874
Epoch 8/30
538/538 - 11s - 21ms/step - accuracy: 0.1070 - loss: 19.7069
Epoch 9/30
538/538 - 11s - 20ms/step - accuracy: 0.1015 - loss: 20.5744
Epoch 10/30
538/538 - 11s - 21ms/step - accuracy: 0.1036 - loss: 22.1492
Epoch 11/30
538/538 - 11s - 20ms/step - accuracy: 0.1013 - loss: 22.7938
Epoch 12/30
538/538 - 11s - 20ms/step - accuracy: 0.1032 - loss: 24.0678
Epoch 13/30
538/538 - 13s - 23ms/step - accuracy: 0.1030 - loss: 25.3325
Epoch 14/30
538/538 - 13s - 25ms/step - accuracy: 0.0964 - l

<keras.src.callbacks.history.History at 0x25049052b50>

In [37]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

# Evaluate
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
Pred        BASEL  MADRID
True                     
BASEL        3682       0
BELGRADE     1092       0
BUDAPEST      214       0
DEBILT         82       0
DUSSELDORF     29       0
HEATHROW       82       0
KASSEL         11       0
LJUBLJANA      61       0
MAASTRICHT      9       0
MADRID        455       3
MUNCHENB        8       0
OSLO            5       0
STOCKHOLM       4       0
VALENTIA        1       0


#### Tanh Activation Type (4th test)

In [39]:
# tanh activation type

epochs = 30
batch_size = 32
n_hidden = 256

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(MaxPooling1D())
model.add(LSTM(n_hidden, input_shape=(timesteps, input_dim)))
model.add(Dropout(0.5))
model.add(Dense(n_classes, activation='tanh'))

In [40]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [41]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/30
538/538 - 15s - 29ms/step - accuracy: 0.1486 - loss: 25.1670
Epoch 2/30
538/538 - 11s - 21ms/step - accuracy: 0.0291 - loss: 22.7342
Epoch 3/30
538/538 - 11s - 21ms/step - accuracy: 0.0117 - loss: 25.7377
Epoch 4/30
538/538 - 11s - 20ms/step - accuracy: 0.0293 - loss: 23.7311
Epoch 5/30
538/538 - 12s - 22ms/step - accuracy: 0.0429 - loss: 23.0740
Epoch 6/30
538/538 - 11s - 21ms/step - accuracy: 0.0338 - loss: 22.2165
Epoch 7/30
538/538 - 11s - 21ms/step - accuracy: 0.0366 - loss: 22.6949
Epoch 8/30
538/538 - 11s - 20ms/step - accuracy: 0.0747 - loss: 23.2015
Epoch 9/30
538/538 - 11s - 20ms/step - accuracy: 0.0647 - loss: 23.8382
Epoch 10/30
538/538 - 11s - 20ms/step - accuracy: 0.0655 - loss: 24.1238
Epoch 11/30
538/538 - 11s - 21ms/step - accuracy: 0.0730 - loss: 22.8802
Epoch 12/30
538/538 - 11s - 21ms/step - accuracy: 0.0679 - loss: 22.2949
Epoch 13/30
538/538 - 11s - 21ms/step - accuracy: 0.0670 - loss: 22.3015
Epoch 14/30
538/538 - 11s - 21ms/step - accuracy: 0.0780 - l

<keras.src.callbacks.history.History at 0x25050dc3f50>

In [42]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

# Evaluate
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
Pred        BELGRADE
True                
BASEL           3682
BELGRADE        1092
BUDAPEST         214
DEBILT            82
DUSSELDORF        29
HEATHROW          82
KASSEL            11
LJUBLJANA         61
MAASTRICHT         9
MADRID           458
MUNCHENB           8
OSLO               5
STOCKHOLM          4
VALENTIA           1
