# Keras Model: Convolution Neural Network (CNN)

### This script contains the following:

#### 1. Importing Libraries and Data
#### 2. Data Wrangling
#### 3. Data Split
#### 4. Keras Model and Confusion Matrices

## 1. Importing Libraries and Data

In [4]:
import pandas as pd
import numpy as np
import seaborn as sns
import os
import operator
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from numpy import unique
from numpy import reshape
from keras.models import Sequential
from keras.layers import Conv1D, Conv2D, Dense, BatchNormalization, Flatten, MaxPooling1D, Dropout
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

import warnings
warnings.filterwarnings("ignore")

In [5]:
# Define path
path = r'C:\Users\16307\Desktop\CareerFoundry\Tasks - Machine Learning\ClimateWins Analysis\Data Sets'

In [6]:
# Read in the European weather data
unscaled = pd.read_csv(os.path.join(path, 'Dataset-weather-prediction-dataset-processed.csv'))
unscaled

Unnamed: 0,DATE,MONTH,BASEL_cloud_cover,BASEL_wind_speed,BASEL_humidity,BASEL_pressure,BASEL_global_radiation,BASEL_precipitation,BASEL_snow_depth,BASEL_sunshine,...,VALENTIA_cloud_cover,VALENTIA_humidity,VALENTIA_pressure,VALENTIA_global_radiation,VALENTIA_precipitation,VALENTIA_snow_depth,VALENTIA_sunshine,VALENTIA_temp_mean,VALENTIA_temp_min,VALENTIA_temp_max
0,19600101,1,7,2.1,0.85,1.0180,0.32,0.09,0,0.7,...,5,0.88,1.0003,0.45,0.34,0,4.7,8.5,6.0,10.9
1,19600102,1,6,2.1,0.84,1.0180,0.36,1.05,0,1.1,...,7,0.91,1.0007,0.25,0.84,0,0.7,8.9,5.6,12.1
2,19600103,1,8,2.1,0.90,1.0180,0.18,0.30,0,0.0,...,7,0.91,1.0096,0.17,0.08,0,0.1,10.5,8.1,12.9
3,19600104,1,3,2.1,0.92,1.0180,0.58,0.00,0,4.1,...,7,0.86,1.0184,0.13,0.98,0,0.0,7.4,7.3,10.6
4,19600105,1,6,2.1,0.95,1.0180,0.65,0.14,0,5.4,...,3,0.80,1.0328,0.46,0.00,0,5.7,5.7,3.0,8.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22945,20221027,10,1,2.1,0.79,1.0248,1.34,0.22,0,7.7,...,5,0.82,1.0142,1.13,0.41,0,3.4,10.7,7.9,13.5
22946,20221028,10,6,2.1,0.77,1.0244,1.34,0.22,0,5.4,...,5,0.82,1.0142,1.13,0.41,0,3.4,10.7,7.9,13.5
22947,20221029,10,4,2.1,0.76,1.0227,1.34,0.22,0,6.1,...,5,0.82,1.0142,1.13,0.41,0,3.4,10.7,7.9,13.5
22948,20221030,10,5,2.1,0.80,1.0212,1.34,0.22,0,5.8,...,5,0.82,1.0142,1.13,0.41,0,3.4,10.7,7.9,13.5


In [7]:
# Read in the pleasant weather data
pleasant = pd.read_csv(os.path.join(path, 'Dataset-Answers-Weather_Prediction_Pleasant_Weather.csv'), index_col = False)
pleasant

Unnamed: 0,DATE,BASEL_pleasant_weather,BELGRADE_pleasant_weather,BUDAPEST_pleasant_weather,DEBILT_pleasant_weather,DUSSELDORF_pleasant_weather,HEATHROW_pleasant_weather,KASSEL_pleasant_weather,LJUBLJANA_pleasant_weather,MAASTRICHT_pleasant_weather,MADRID_pleasant_weather,MUNCHENB_pleasant_weather,OSLO_pleasant_weather,SONNBLICK_pleasant_weather,STOCKHOLM_pleasant_weather,VALENTIA_pleasant_weather
0,19600101,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,19600102,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,19600103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,19600104,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,19600105,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22945,20221027,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
22946,20221028,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
22947,20221029,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
22948,20221030,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


### 2. Data Wrangling
- Drop 3 weather stations (Tours, Gdansk and Rome) from the unscaled dataset for a total of 15 weather stations.
- Remove 2 observation types (columns): wind_speed and snow_depth
- Fill in three individual observations. Assume nearby stations have similar weather, then pick one to copy the data from (Ljubljana is near Kassel, Sonnblick is near Munchen, and Olso is close enough to Stockholm).

In [9]:
# Drop the columns related to Tours, Gdansk and Rome from the unscaled dataset
unscaled = unscaled.drop(
    ['GDANSK_cloud_cover', 'GDANSK_humidity', 'GDANSK_precipitation', 'GDANSK_snow_depth', 'GDANSK_temp_mean', 'GDANSK_temp_min', 'GDANSK_temp_max',
    'ROMA_cloud_cover', 'ROMA_wind_speed', 'ROMA_humidity', 'ROMA_pressure', 'ROMA_sunshine', 'ROMA_temp_mean',
    'TOURS_wind_speed', 'TOURS_humidity', 'TOURS_pressure', 'TOURS_global_radiation', 'TOURS_precipitation', 'TOURS_temp_mean', 'TOURS_temp_min', 'TOURS_temp_max'], axis=1)

In [10]:
# Drop date and month columns for unscaled dataset
unscaled.drop(['DATE', 'MONTH'], axis=1, inplace=True)
unscaled

Unnamed: 0,BASEL_cloud_cover,BASEL_wind_speed,BASEL_humidity,BASEL_pressure,BASEL_global_radiation,BASEL_precipitation,BASEL_snow_depth,BASEL_sunshine,BASEL_temp_mean,BASEL_temp_min,...,VALENTIA_cloud_cover,VALENTIA_humidity,VALENTIA_pressure,VALENTIA_global_radiation,VALENTIA_precipitation,VALENTIA_snow_depth,VALENTIA_sunshine,VALENTIA_temp_mean,VALENTIA_temp_min,VALENTIA_temp_max
0,7,2.1,0.85,1.0180,0.32,0.09,0,0.7,6.5,0.8,...,5,0.88,1.0003,0.45,0.34,0,4.7,8.5,6.0,10.9
1,6,2.1,0.84,1.0180,0.36,1.05,0,1.1,6.1,3.3,...,7,0.91,1.0007,0.25,0.84,0,0.7,8.9,5.6,12.1
2,8,2.1,0.90,1.0180,0.18,0.30,0,0.0,8.5,5.1,...,7,0.91,1.0096,0.17,0.08,0,0.1,10.5,8.1,12.9
3,3,2.1,0.92,1.0180,0.58,0.00,0,4.1,6.3,3.8,...,7,0.86,1.0184,0.13,0.98,0,0.0,7.4,7.3,10.6
4,6,2.1,0.95,1.0180,0.65,0.14,0,5.4,3.0,-0.7,...,3,0.80,1.0328,0.46,0.00,0,5.7,5.7,3.0,8.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22945,1,2.1,0.79,1.0248,1.34,0.22,0,7.7,15.9,11.4,...,5,0.82,1.0142,1.13,0.41,0,3.4,10.7,7.9,13.5
22946,6,2.1,0.77,1.0244,1.34,0.22,0,5.4,16.7,14.3,...,5,0.82,1.0142,1.13,0.41,0,3.4,10.7,7.9,13.5
22947,4,2.1,0.76,1.0227,1.34,0.22,0,6.1,16.7,13.1,...,5,0.82,1.0142,1.13,0.41,0,3.4,10.7,7.9,13.5
22948,5,2.1,0.80,1.0212,1.34,0.22,0,5.8,15.4,11.6,...,5,0.82,1.0142,1.13,0.41,0,3.4,10.7,7.9,13.5


In [11]:
# Extract the different observation types
observation_types = ['cloud_cover', 'wind_speed', 'humidity', 'pressure',
                     'global_radiation', 'precipitation', 'snow_depth', 
                     'sunshine', 'temp_mean', 'temp_min', 'temp_max']

In [12]:
# Create a dictionary to store the count of stations for each observation type
station_counts = {}

for obs in observation_types:
    # Select columns related to the current observation type
    columns = [col for col in unscaled.columns if col.endswith(obs)]
    
    # Count the number of stations (i.e., the number of columns) for the current observation type
    station_counts[obs] = len(columns)

# Print the count of stations for each observation type
print("Number of stations covered by each observation type:")
for obs, count in station_counts.items():
    print(f"{obs}: {count} stations")

Number of stations covered by each observation type:
cloud_cover: 14 stations
wind_speed: 9 stations
humidity: 14 stations
pressure: 14 stations
global_radiation: 15 stations
precipitation: 15 stations
snow_depth: 6 stations
sunshine: 15 stations
temp_mean: 15 stations
temp_min: 15 stations
temp_max: 15 stations


- Missing 1 station for each observation type: cloud_cover, humidity, and pressure (imputation)
- Missing 6 stations for wind_speed (drop column)
- Missing 9 stations for snow_depth (drop column)

#### Drop Columns

In [15]:
# Drop wind_speed and snow_depth columns from the dataset because they only have data for 9 stations and 6 stations respectively
columns_to_drop = unscaled.filter(regex='(_wind_speed|_snow_depth)$').columns
columns_to_drop

Index(['BASEL_wind_speed', 'BASEL_snow_depth', 'DEBILT_wind_speed',
       'DUSSELDORF_wind_speed', 'DUSSELDORF_snow_depth', 'HEATHROW_snow_depth',
       'KASSEL_wind_speed', 'LJUBLJANA_wind_speed', 'MAASTRICHT_wind_speed',
       'MADRID_wind_speed', 'MUNCHENB_snow_depth', 'OSLO_wind_speed',
       'OSLO_snow_depth', 'SONNBLICK_wind_speed', 'VALENTIA_snow_depth'],
      dtype='object')

In [16]:
unscaled = unscaled.drop(columns=columns_to_drop)

In [17]:
unscaled.shape

(22950, 132)

#### Imputation

In [19]:
# Create a list of all unique station names in the dataset
all_stations = set([col.split('_')[0] for col in unscaled.columns if '_' in col])
all_stations

{'BASEL',
 'BELGRADE',
 'BUDAPEST',
 'DEBILT',
 'DUSSELDORF',
 'HEATHROW',
 'KASSEL',
 'LJUBLJANA',
 'MAASTRICHT',
 'MADRID',
 'MUNCHENB',
 'OSLO',
 'SONNBLICK',
 'STOCKHOLM',
 'VALENTIA'}

In [20]:
observation_types = ['cloud_cover', 'humidity', 'pressure']

missing_stations_by_observation = {}

for obs in observation_types:
    # Select columns related to the current observation type
    columns = [col for col in unscaled.columns if col.endswith(obs)]
    
    # Extract station names by removing the observation type from the column names
    station_names = set([col.replace(f'_{obs}', '') for col in columns])
    
    # Identify stations that are in all_stations but missing from the current observation type
    missing_stations = all_stations - station_names
    
    # Store the missing station names in the dictionary
    missing_stations_by_observation[obs] = missing_stations

# Print the missing station names for each observation type
for obs, missing_stations in missing_stations_by_observation.items():
    print(f"\nStations missing from {obs}:")
    if missing_stations:
        for station in missing_stations:
            print(station)
    else:
        print("None")


Stations missing from cloud_cover:
KASSEL

Stations missing from humidity:
STOCKHOLM

Stations missing from pressure:
MUNCHENB


In [21]:
# Get the positions of nearby columns to see where we need to position the new columns

print(unscaled.columns.get_loc('HEATHROW_temp_max')) # +1
print(unscaled.columns.get_loc('STOCKHOLM_cloud_cover')) # +2
print(unscaled.columns.get_loc('MUNCHENB_humidity')) # +2

53
115
90


In [22]:
# Insert new columns into "unscaled" at specific positions.
# The data for these new columns is copied from other existing columns:
# Kassel_cloud_cover with Ljubljana_cloud_cover
# Stockholm_humidity with Oslo_humidity
# Munchenb_pressure with Sonnblick_pressure

unscaled.insert(54,'KASSEL_cloud_cover', unscaled['LJUBLJANA_cloud_cover'])
unscaled.insert(117, 'STOCKHOLM_humidity', unscaled['OSLO_humidity'])
unscaled.insert(92,'MUNCHENB_pressure',unscaled['SONNBLICK_pressure'])

In [23]:
# Verify corret placement
unscaled.columns.tolist()

['BASEL_cloud_cover',
 'BASEL_humidity',
 'BASEL_pressure',
 'BASEL_global_radiation',
 'BASEL_precipitation',
 'BASEL_sunshine',
 'BASEL_temp_mean',
 'BASEL_temp_min',
 'BASEL_temp_max',
 'BELGRADE_cloud_cover',
 'BELGRADE_humidity',
 'BELGRADE_pressure',
 'BELGRADE_global_radiation',
 'BELGRADE_precipitation',
 'BELGRADE_sunshine',
 'BELGRADE_temp_mean',
 'BELGRADE_temp_min',
 'BELGRADE_temp_max',
 'BUDAPEST_cloud_cover',
 'BUDAPEST_humidity',
 'BUDAPEST_pressure',
 'BUDAPEST_global_radiation',
 'BUDAPEST_precipitation',
 'BUDAPEST_sunshine',
 'BUDAPEST_temp_mean',
 'BUDAPEST_temp_min',
 'BUDAPEST_temp_max',
 'DEBILT_cloud_cover',
 'DEBILT_humidity',
 'DEBILT_pressure',
 'DEBILT_global_radiation',
 'DEBILT_precipitation',
 'DEBILT_sunshine',
 'DEBILT_temp_mean',
 'DEBILT_temp_min',
 'DEBILT_temp_max',
 'DUSSELDORF_cloud_cover',
 'DUSSELDORF_humidity',
 'DUSSELDORF_pressure',
 'DUSSELDORF_global_radiation',
 'DUSSELDORF_precipitation',
 'DUSSELDORF_sunshine',
 'DUSSELDORF_temp_mean',


In [24]:
unscaled.shape # dataset has the correct shape

(22950, 135)

In [25]:
# Drop date column for pleasant weather dataset
pleasant.drop(columns = 'DATE', inplace = True)
pleasant

Unnamed: 0,BASEL_pleasant_weather,BELGRADE_pleasant_weather,BUDAPEST_pleasant_weather,DEBILT_pleasant_weather,DUSSELDORF_pleasant_weather,HEATHROW_pleasant_weather,KASSEL_pleasant_weather,LJUBLJANA_pleasant_weather,MAASTRICHT_pleasant_weather,MADRID_pleasant_weather,MUNCHENB_pleasant_weather,OSLO_pleasant_weather,SONNBLICK_pleasant_weather,STOCKHOLM_pleasant_weather,VALENTIA_pleasant_weather
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22945,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
22946,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
22947,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
22948,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [26]:
pleasant.shape # dataset has the correct shape

(22950, 15)

In [27]:
# Export cleaned dataset
unscaled.to_csv(os.path.join(path, 'unscaled_cleaned.csv'), index=False)

### 3. Data Split 

In [29]:
X = pd.read_csv(os.path.join(path, 'unscaled_cleaned.csv'), index_col = False)

In [30]:
X

Unnamed: 0,BASEL_cloud_cover,BASEL_humidity,BASEL_pressure,BASEL_global_radiation,BASEL_precipitation,BASEL_sunshine,BASEL_temp_mean,BASEL_temp_min,BASEL_temp_max,BELGRADE_cloud_cover,...,STOCKHOLM_temp_max,VALENTIA_cloud_cover,VALENTIA_humidity,VALENTIA_pressure,VALENTIA_global_radiation,VALENTIA_precipitation,VALENTIA_sunshine,VALENTIA_temp_mean,VALENTIA_temp_min,VALENTIA_temp_max
0,7,0.85,1.0180,0.32,0.09,0.7,6.5,0.8,10.9,1,...,4.9,5,0.88,1.0003,0.45,0.34,4.7,8.5,6.0,10.9
1,6,0.84,1.0180,0.36,1.05,1.1,6.1,3.3,10.1,6,...,5.0,7,0.91,1.0007,0.25,0.84,0.7,8.9,5.6,12.1
2,8,0.90,1.0180,0.18,0.30,0.0,8.5,5.1,9.9,6,...,4.1,7,0.91,1.0096,0.17,0.08,0.1,10.5,8.1,12.9
3,3,0.92,1.0180,0.58,0.00,4.1,6.3,3.8,10.6,8,...,2.3,7,0.86,1.0184,0.13,0.98,0.0,7.4,7.3,10.6
4,6,0.95,1.0180,0.65,0.14,5.4,3.0,-0.7,6.0,8,...,4.3,3,0.80,1.0328,0.46,0.00,5.7,5.7,3.0,8.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22945,1,0.79,1.0248,1.34,0.22,7.7,15.9,11.4,21.4,2,...,14.2,5,0.82,1.0142,1.13,0.41,3.4,10.7,7.9,13.5
22946,6,0.77,1.0244,1.34,0.22,5.4,16.7,14.3,21.9,0,...,14.3,5,0.82,1.0142,1.13,0.41,3.4,10.7,7.9,13.5
22947,4,0.76,1.0227,1.34,0.22,6.1,16.7,13.1,22.4,2,...,14.4,5,0.82,1.0142,1.13,0.41,3.4,10.7,7.9,13.5
22948,5,0.80,1.0212,1.34,0.22,5.8,15.4,11.6,21.1,1,...,12.4,5,0.82,1.0142,1.13,0.41,3.4,10.7,7.9,13.5


In [31]:
y = pleasant

In [32]:
X.shape

(22950, 135)

In [33]:
y.shape

(22950, 15)

In [34]:
# Turn X and y from a df to arrays
X = np.array(X)
y = np.array(y)

In [35]:
X = X.reshape(-1, 15 ,9)

In [36]:
# Verify shape
X

array([[[  7.    ,   0.85  ,   1.018 , ...,   6.5   ,   0.8   ,
          10.9   ],
        [  1.    ,   0.81  ,   1.0195, ...,   3.7   ,  -0.9   ,
           7.9   ],
        [  4.    ,   0.67  ,   1.017 , ...,   2.4   ,  -0.4   ,
           5.1   ],
        ...,
        [  4.    ,   0.73  ,   1.0304, ...,  -5.9   ,  -8.5   ,
          -3.2   ],
        [  5.    ,   0.98  ,   1.0114, ...,   4.2   ,   2.2   ,
           4.9   ],
        [  5.    ,   0.88  ,   1.0003, ...,   8.5   ,   6.    ,
          10.9   ]],

       [[  6.    ,   0.84  ,   1.018 , ...,   6.1   ,   3.3   ,
          10.1   ],
        [  6.    ,   0.84  ,   1.0172, ...,   2.9   ,   2.2   ,
           4.4   ],
        [  4.    ,   0.67  ,   1.017 , ...,   2.3   ,   1.4   ,
           3.1   ],
        ...,
        [  6.    ,   0.97  ,   1.0292, ...,  -9.5   , -10.5   ,
          -8.5   ],
        [  5.    ,   0.62  ,   1.0114, ...,   4.    ,   3.    ,
           5.    ],
        [  7.    ,   0.91  ,   1.0007, ...,   8.

In [37]:
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state = 42)

In [38]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(17212, 15, 9) (17212, 15)
(5738, 15, 9) (5738, 15)


### 4. Keras Model and Confusion Matrices

In [40]:
# Define list of stations names

stations = {
0: 'BASEL',
1: 'BELGRADE',
2: 'BUDAPEST',
3: 'DEBILT',
4: 'DUSSELDORF',
5: 'HEATHROW',
6: 'KASSEL',
7: 'LJUBLJANA',
8: 'MAASTRICHT',
9: 'MADRID',
10: 'MUNCHENB',
11: 'OSLO',
12: 'SONNBLICK',
13: 'STOCKHOLM',
14: 'VALENTIA'

}

#### Softmax Activation Type (Starting Point)

In [41]:
# softmax activation type

epochs = 30
batch_size = 16
n_hidden = 32

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='softmax')) # Options: sigmoid, tanh, softmax

In [42]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [43]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/30
1076/1076 - 4s - 4ms/step - accuracy: 0.0885 - loss: 4987.0640
Epoch 2/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1160 - loss: 57739.6758
Epoch 3/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1247 - loss: 183645.0625
Epoch 4/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1251 - loss: 400452.7812
Epoch 5/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1281 - loss: 729672.1250
Epoch 6/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1271 - loss: 1130762.1250
Epoch 7/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1286 - loss: 1655683.0000
Epoch 8/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1291 - loss: 2306743.0000
Epoch 9/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1299 - loss: 3029225.0000
Epoch 10/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1287 - loss: 3962931.7500
Epoch 11/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1296 - loss: 4993978.0000
Epoch 12/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1236 - loss: 6275000.0000
Epoch 13/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1272 - loss: 7609879.5000
Epo

<keras.src.callbacks.history.History at 0x2227cd06250>

In [45]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

# Evaluate
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Pred        BELGRADE  BUDAPEST  DEBILT  DUSSELDORF  HEATHROW  KASSEL  \
True                                                                   
BASEL             26       483      95          30       605     198   
BELGRADE           1       161      16           0       128      10   
BUDAPEST           0        24       1           0        26       0   
DEBILT             0         8       0           0        12       1   
DUSSELDORF         0         2       0           0         8       0   
HEATHROW           0         1       1           0        18       2   
KASSEL             0         3       0           0         2       1   
LJUBLJANA          0        13       1           0         4       0   
MAASTRICHT         0         1       0           0         2       0   
MADRID             0        15       5           0        95      12   
MUNCHENB           0         2       0           0         0 

#### Softmax Activation Type (2nd test)

In [46]:
# softmax activation type with new hyperparameters

epochs = 30
batch_size = 32
n_hidden = 256

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='softmax')) # Options: sigmoid, tanh, softmax

In [47]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [48]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/30
538/538 - 3s - 6ms/step - accuracy: 0.1096 - loss: 5969.6279
Epoch 2/30
538/538 - 1s - 2ms/step - accuracy: 0.1272 - loss: 59415.4766
Epoch 3/30
538/538 - 1s - 3ms/step - accuracy: 0.1403 - loss: 205887.9062
Epoch 4/30
538/538 - 1s - 2ms/step - accuracy: 0.1362 - loss: 467178.4062
Epoch 5/30
538/538 - 1s - 2ms/step - accuracy: 0.1379 - loss: 812838.0625
Epoch 6/30
538/538 - 1s - 2ms/step - accuracy: 0.1324 - loss: 1341529.2500
Epoch 7/30
538/538 - 1s - 2ms/step - accuracy: 0.1292 - loss: 1964332.2500
Epoch 8/30
538/538 - 1s - 3ms/step - accuracy: 0.1286 - loss: 2687705.2500
Epoch 9/30
538/538 - 1s - 2ms/step - accuracy: 0.1265 - loss: 3626188.5000
Epoch 10/30
538/538 - 1s - 2ms/step - accuracy: 0.1229 - loss: 4576819.0000
Epoch 11/30
538/538 - 1s - 2ms/step - accuracy: 0.1208 - loss: 5808037.0000
Epoch 12/30
538/538 - 1s - 2ms/step - accuracy: 0.1221 - loss: 7097421.0000
Epoch 13/30
538/538 - 1s - 3ms/step - accuracy: 0.1146 - loss: 8541833.0000
Epoch 14/30
538/538 - 1s - 2m

<keras.src.callbacks.history.History at 0x2220835e950>

In [50]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

# Evaluate
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Pred        BELGRADE  DEBILT  DUSSELDORF  HEATHROW  KASSEL  MAASTRICHT  \
True                                                                     
BASEL           1345      43          18       544     576          51   
BELGRADE         565       1           0       163     161           1   
BUDAPEST          75       1           0        39      34           1   
DEBILT            27       0           0        12      22           0   
DUSSELDORF         5       0           0         8       4           0   
HEATHROW          10       2           0        20      18           0   
KASSEL             5       0           0         2       3           0   
LJUBLJANA         24       0           0        10       7           0   
MAASTRICHT         3       0           0         2       1           0   
MADRID            90       4           0       126      69           0   
MUNCHENB           5       0         

#### Sigmoid Activation Type (3rd test)

In [51]:
# sigmoid activation type

epochs = 30
batch_size = 32
n_hidden = 256

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='sigmoid')) # Options: sigmoid, tanh, softmax

In [52]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [53]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/30
538/538 - 3s - 6ms/step - accuracy: 0.1014 - loss: 4949.0942
Epoch 2/30
538/538 - 1s - 2ms/step - accuracy: 0.1139 - loss: 50004.9648
Epoch 3/30
538/538 - 1s - 2ms/step - accuracy: 0.1165 - loss: 168698.8438
Epoch 4/30
538/538 - 1s - 2ms/step - accuracy: 0.1180 - loss: 376577.6562
Epoch 5/30
538/538 - 1s - 2ms/step - accuracy: 0.1279 - loss: 688947.9375
Epoch 6/30
538/538 - 1s - 2ms/step - accuracy: 0.1291 - loss: 1095589.0000
Epoch 7/30
538/538 - 1s - 2ms/step - accuracy: 0.1250 - loss: 1642799.1250
Epoch 8/30
538/538 - 1s - 2ms/step - accuracy: 0.1218 - loss: 2288133.2500
Epoch 9/30
538/538 - 1s - 2ms/step - accuracy: 0.1214 - loss: 3067105.0000
Epoch 10/30
538/538 - 1s - 2ms/step - accuracy: 0.1196 - loss: 4052366.5000
Epoch 11/30
538/538 - 1s - 2ms/step - accuracy: 0.1207 - loss: 5020673.5000
Epoch 12/30
538/538 - 1s - 2ms/step - accuracy: 0.1165 - loss: 6155960.0000
Epoch 13/30
538/538 - 1s - 2ms/step - accuracy: 0.1174 - loss: 7573433.5000
Epoch 14/30
538/538 - 1s - 2m

<keras.src.callbacks.history.History at 0x2220b126510>

In [55]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

# Evaluate
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Pred        BASEL
True             
BASEL        3682
BELGRADE     1092
BUDAPEST      214
DEBILT         82
DUSSELDORF     29
HEATHROW       82
KASSEL         11
LJUBLJANA      61
MAASTRICHT      9
MADRID        458
MUNCHENB        8
OSLO            5
STOCKHOLM       4
VALENTIA        1


#### Tanh Activation Type (4th test)

In [56]:
# tanh activation type

epochs = 30
batch_size = 32
n_hidden = 256

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='tanh')) # Options: sigmoid, tanh, softmax

In [57]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [58]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/30
538/538 - 3s - 6ms/step - accuracy: 0.0235 - loss: 25.1596
Epoch 2/30
538/538 - 1s - 2ms/step - accuracy: 0.0291 - loss: 25.5718
Epoch 3/30
538/538 - 1s - 2ms/step - accuracy: 0.0379 - loss: 27.1401
Epoch 4/30
538/538 - 1s - 2ms/step - accuracy: 0.0378 - loss: 27.1373
Epoch 5/30
538/538 - 1s - 2ms/step - accuracy: 0.0375 - loss: 27.1363
Epoch 6/30
538/538 - 1s - 2ms/step - accuracy: 0.0370 - loss: 27.1363
Epoch 7/30
538/538 - 1s - 2ms/step - accuracy: 0.0369 - loss: 27.1354
Epoch 8/30
538/538 - 1s - 2ms/step - accuracy: 0.0363 - loss: 27.1326
Epoch 9/30
538/538 - 1s - 2ms/step - accuracy: 0.0360 - loss: 27.1326
Epoch 10/30
538/538 - 1s - 2ms/step - accuracy: 0.0357 - loss: 27.1326
Epoch 11/30
538/538 - 1s - 2ms/step - accuracy: 0.0358 - loss: 27.1316
Epoch 12/30
538/538 - 1s - 2ms/step - accuracy: 0.0342 - loss: 27.1307
Epoch 13/30
538/538 - 1s - 2ms/step - accuracy: 0.0332 - loss: 27.1260
Epoch 14/30
538/538 - 1s - 2ms/step - accuracy: 0.0325 - loss: 27.1223
Epoch 15/30
538

<keras.src.callbacks.history.History at 0x2220b169510>

In [60]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

# Evaluate
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Pred        BELGRADE  BUDAPEST  HEATHROW  MUNCHENB  SONNBLICK
True                                                         
BASEL             16      1257      2377        24          8
BELGRADE           9       695       388         0          0
BUDAPEST           0       136        78         0          0
DEBILT             0        77         5         0          0
DUSSELDORF         0        24         5         0          0
HEATHROW           0        43        39         0          0
KASSEL             0         8         3         0          0
LJUBLJANA          0        16        45         0          0
MAASTRICHT         0         1         8         0          0
MADRID             0        69       389         0          0
MUNCHENB           0         0         8         0          0
OSLO               0         2         3         0          0
STOCKHOLM          0         4         0         0       