#  Keras layered model: RNN / LSTM model

## Table of Contents

#### 1. Importing Libraries and Data
#### 2. Data Wrangling
#### 3. Reshaping for modeling
#### 4. Data Split
#### 5. Creating Keras Model
#### 6. Compiling and Running
#### 7. Creating Confusion Matrix

## 01. Importing Libraries and Data

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import os
import operator
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from numpy import unique
from numpy import reshape
from keras.models import Sequential
from keras.layers import Conv1D, Conv2D, Dense, BatchNormalization, Flatten, MaxPooling1D, Dropout
from keras.layers import LSTM
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

import warnings
warnings.filterwarnings("ignore")

In [2]:
### Import Data
# Define a path for easier importing 
path = r'C:\Users\DELL\Documents\Climatewins\Data'

In [3]:
# Set display options to show all columns

pd.set_option('display.max_columns', None)

In [4]:
# Import climate_cleaned data

climate = pd.read_csv(os.path.join(path, 'Prepared/climate_cleaned.csv'), index_col = False)

In [5]:
### Import the  weather data
weather = pd.read_csv(os.path.join(path, 'Dataset-Answers-Weather_Prediction_Pleasant_Weather.csv'))

In [6]:
climate.shape

(22950, 135)

In [7]:
weather.shape

(22950, 16)

## 02. Data Wrangling

In [8]:
# Drop DATE column from answers

weather.drop(columns = 'DATE', inplace = True)

In [9]:
weather.shape # predictions dataset has the correct shape

(22950, 15)

In [10]:
climate.shape # observations dataset has the correct shape

(22950, 135)

## 03. Reshaping for modeling
- Ensure the layers can be fed to the deep learning model correctly.
- You’ll need to split the observations (X) into 15 groups of 9 types of observations, and your labels (y) should also be in 15 groups (it doesn’t need to be transformed or reshaped).
- The final shapes should be X = (22950, 15, 9) and y = (22950, 15).

In [11]:
X = climate

In [12]:
y = weather

In [13]:
# Turn X and y from a df to arrays

X = np.array(X)
y = np.array(y)

In [14]:
X = X.reshape(-1,15,9)

In [15]:
# Verify shape
X

array([[[  7.    ,   0.85  ,   1.018 , ...,   6.5   ,   0.8   ,
          10.9   ],
        [  1.    ,   0.81  ,   1.0195, ...,   3.7   ,  -0.9   ,
           7.9   ],
        [  4.    ,   0.67  ,   1.017 , ...,   2.4   ,  -0.4   ,
           5.1   ],
        ...,
        [  4.    ,   0.73  ,   1.0304, ...,  -5.9   ,  -8.5   ,
          -3.2   ],
        [  5.    ,   0.98  ,   1.0114, ...,   4.2   ,   2.2   ,
           4.9   ],
        [  5.    ,   0.88  ,   1.0003, ...,   8.5   ,   6.    ,
          10.9   ]],

       [[  6.    ,   0.84  ,   1.018 , ...,   6.1   ,   3.3   ,
          10.1   ],
        [  6.    ,   0.84  ,   1.0172, ...,   2.9   ,   2.2   ,
           4.4   ],
        [  4.    ,   0.67  ,   1.017 , ...,   2.3   ,   1.4   ,
           3.1   ],
        ...,
        [  6.    ,   0.97  ,   1.0292, ...,  -9.5   , -10.5   ,
          -8.5   ],
        [  5.    ,   0.62  ,   1.0114, ...,   4.    ,   3.    ,
           5.    ],
        [  7.    ,   0.91  ,   1.0007, ...,   8.

## 04. Data Split

In [16]:
# Split data into train and test sets

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state = 42)

In [17]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(17212, 15, 9) (17212, 15)
(5738, 15, 9) (5738, 15)


## 05. Creating Keras Model

In [18]:
epochs = 30
batch_size = 16
n_hidden = 64

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(MaxPooling1D())
model.add(LSTM(n_hidden, input_shape=(timesteps, input_dim)))
model.add(Dropout(0.5))
model.add(Dense(n_classes, activation='tanh')) # Don't use relu here!

In [19]:
model.summary()

## 06. Compiling and Running

In [20]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [21]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/30
1076/1076 - 22s - 20ms/step - accuracy: 0.0782 - loss: 24.5252
Epoch 2/30
1076/1076 - 13s - 12ms/step - accuracy: 0.0658 - loss: 24.4837
Epoch 3/30
1076/1076 - 13s - 12ms/step - accuracy: 0.0881 - loss: 25.0586
Epoch 4/30
1076/1076 - 13s - 12ms/step - accuracy: 0.0736 - loss: 24.7445
Epoch 5/30
1076/1076 - 13s - 12ms/step - accuracy: 0.0511 - loss: 25.1092
Epoch 6/30
1076/1076 - 13s - 12ms/step - accuracy: 0.0510 - loss: 24.8289
Epoch 7/30
1076/1076 - 12s - 11ms/step - accuracy: 0.0267 - loss: 24.5668
Epoch 8/30
1076/1076 - 13s - 12ms/step - accuracy: 0.0280 - loss: 24.8879
Epoch 9/30
1076/1076 - 12s - 11ms/step - accuracy: 0.0130 - loss: 24.8415
Epoch 10/30
1076/1076 - 13s - 12ms/step - accuracy: 0.0107 - loss: 25.0344
Epoch 11/30
1076/1076 - 13s - 12ms/step - accuracy: 0.0078 - loss: 24.7568
Epoch 12/30
1076/1076 - 21s - 19ms/step - accuracy: 0.0098 - loss: 24.5851
Epoch 13/30
1076/1076 - 13s - 12ms/step - accuracy: 0.0150 - loss: 24.6720
Epoch 14/30
1076/1076 - 13s - 12ms

<keras.src.callbacks.history.History at 0x1a052a23a50>

## 07. Creating Confusion Matrix

In [22]:
# Define list of stations names

stations = {
0: 'BASEL',
1: 'BELGRADE',
2: 'BUDAPEST',
3: 'DEBILT',
4: 'DUSSELDORF',
5: 'HEATHROW',
6: 'KASSEL',
7: 'LJUBLJANA',
8: 'MAASTRICHT',
9: 'MADRID',
10: 'MUNCHENB',
11: 'OSLO',
12: 'SONNBLICK',
13: 'STOCKHOLM',
14: 'VALENTIA'

}

In [23]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [24]:
# Evaluate

print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step
Pred        DEBILT  DUSSELDORF  MAASTRICHT
True                                      
BASEL           26           5        3651
BELGRADE         0           0        1092
BUDAPEST         0           0         214
DEBILT           0           0          82
DUSSELDORF       0           0          29
HEATHROW         0           0          82
KASSEL           0           0          11
LJUBLJANA        0           0          61
MAASTRICHT       0           0           9
MADRID           0           0         458
MUNCHENB         0           0           8
OSLO             0           0           5
STOCKHOLM        0           0           4
VALENTIA         0           0           1
