## This Script Contains the Following Points:
### 1. Importing Libraries & Data
### 2. Data Wrangling
### 3. Reshaping for mMdeling
### 4. Data Split
### 5. Creating Keras Model
### 6. Compiling and Running
### 7. Creating Confusion Matrix
### 8. Keras Model Retrials

# 1. Importing Libraries & Data

In [6]:
# Installing tensorflow
!pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.18.0-cp311-cp311-macosx_12_0_arm64.whl.metadata (4.0 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Downloading absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Downloading flatbuffers-24.12.23-py2.py3-none-any.whl.metadata (876 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)
  Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-1-py2.py3-none-macosx_11_0_arm64.whl.metadata (5.2 kB)
Collecting opt-einsum>=2.3.2 (from tensorflow)
  Downloading opt_einsum-3.4.0-py3-none-any.whl.metadata (6.3 kB)
Collecting termcolor>=1.1.0 (from tensorflow)
  Downlo

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import os
import operator
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from numpy import unique
from numpy import reshape
from keras.models import Sequential
from keras.layers import Conv1D, Conv2D, Dense, BatchNormalization, Flatten, MaxPooling1D, Dropout
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

import warnings
warnings.filterwarnings("ignore")

In [3]:
# Creating folder path to project folder
path = r'/Users/C SaiVishwanath/Desktop/ClimateWins'

In [4]:
# importing unscaled dataset
unscaled = pd.read_csv(os.path.join(path, '01 Data/Original Data/weather_prediction.csv'))

In [5]:
# importing pleasant weather dataset
pleasant = pd.read_csv(os.path.join(path, '01 Data/Original Data/Pleasant_Weather.csv'))

In [6]:
unscaled.head()

Unnamed: 0,DATE,MONTH,BASEL_cloud_cover,BASEL_wind_speed,BASEL_humidity,BASEL_pressure,BASEL_global_radiation,BASEL_precipitation,BASEL_snow_depth,BASEL_sunshine,...,VALENTIA_cloud_cover,VALENTIA_humidity,VALENTIA_pressure,VALENTIA_global_radiation,VALENTIA_precipitation,VALENTIA_snow_depth,VALENTIA_sunshine,VALENTIA_temp_mean,VALENTIA_temp_min,VALENTIA_temp_max
0,19600101,1,7,2.1,0.85,1.018,0.32,0.09,0,0.7,...,5,0.88,1.0003,0.45,0.34,0,4.7,8.5,6.0,10.9
1,19600102,1,6,2.1,0.84,1.018,0.36,1.05,0,1.1,...,7,0.91,1.0007,0.25,0.84,0,0.7,8.9,5.6,12.1
2,19600103,1,8,2.1,0.9,1.018,0.18,0.3,0,0.0,...,7,0.91,1.0096,0.17,0.08,0,0.1,10.5,8.1,12.9
3,19600104,1,3,2.1,0.92,1.018,0.58,0.0,0,4.1,...,7,0.86,1.0184,0.13,0.98,0,0.0,7.4,7.3,10.6
4,19600105,1,6,2.1,0.95,1.018,0.65,0.14,0,5.4,...,3,0.8,1.0328,0.46,0.0,0,5.7,5.7,3.0,8.4


In [7]:
unscaled.shape

(22950, 170)

In [8]:
pleasant.head()

Unnamed: 0,DATE,BASEL_pleasant_weather,BELGRADE_pleasant_weather,BUDAPEST_pleasant_weather,DEBILT_pleasant_weather,DUSSELDORF_pleasant_weather,HEATHROW_pleasant_weather,KASSEL_pleasant_weather,LJUBLJANA_pleasant_weather,MAASTRICHT_pleasant_weather,MADRID_pleasant_weather,MUNCHENB_pleasant_weather,OSLO_pleasant_weather,SONNBLICK_pleasant_weather,STOCKHOLM_pleasant_weather,VALENTIA_pleasant_weather
0,19600101,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,19600102,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,19600103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,19600104,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,19600105,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [9]:
pleasant.shape

(22950, 16)

# 2. Data Wrangling

In [11]:
# To ensure data is structured with correct shape to feed the deep learning model completing the following:

# Dropping 3 weather stations not included in 'pleasant'.
# Removing 2 types of observations (columns) missing multiple entries for most stations.
# Filling in 3 individual observations assuming nearby stations have similar weather.
# Dropping DATE and MONTH from observations and DATE from unscaled df. 
# Checking X shape should be (22950, 135) and y shape should be (22950, 15).
# Exporting dataset as "Cleaned" version.

In [12]:
# Dropping the columns related to Tours, Gdansk and Rome from the unscaled dataset

unscaled = unscaled.drop(['GDANSK_cloud_cover', 'GDANSK_humidity', 'GDANSK_precipitation', 'GDANSK_snow_depth', 'GDANSK_temp_mean', 'GDANSK_temp_min', 'GDANSK_temp_max',
                        'ROMA_cloud_cover', 'ROMA_wind_speed', 'ROMA_humidity', 'ROMA_pressure', 'ROMA_sunshine', 'ROMA_temp_mean',
                        'TOURS_wind_speed', 'TOURS_humidity', 'TOURS_pressure', 'TOURS_global_radiation', 'TOURS_precipitation', 'TOURS_temp_mean', 'TOURS_temp_min', 'TOURS_temp_max'], axis=1)

In [13]:
unscaled.shape

(22950, 149)

In [14]:
# Extracting the different observation types

observation_types = ['cloud_cover', 'wind_speed', 'humidity', 'pressure',
                     'global_radiation', 'precipitation', 'snow_depth', 
                     'sunshine', 'temp_mean', 'temp_min', 'temp_max']

In [15]:
# Creating a dictionary to store the count of stations for each observation type
station_counts = {}

for obs in observation_types:
    # Select columns related to the current observation type
    columns = [col for col in unscaled.columns if col.endswith(obs)]
    
    # Count the number of stations (i.e., the number of columns) for the current observation type
    station_counts[obs] = len(columns)

# Print the count of stations for each observation type
print("Number of stations covered by each observation type:")
for obs, count in station_counts.items():
    print(f"{obs}: {count} stations")

Number of stations covered by each observation type:
cloud_cover: 14 stations
wind_speed: 9 stations
humidity: 14 stations
pressure: 14 stations
global_radiation: 15 stations
precipitation: 15 stations
snow_depth: 6 stations
sunshine: 15 stations
temp_mean: 15 stations
temp_min: 15 stations
temp_max: 15 stations


In [16]:
# The two columns missing multiple entries for most stations are: wind_speed (only 9 stations) and snow_depth (only 6 stations).
# Dropping columns that end with wind_speed and snow_depth from the dataset

columns_to_drop = unscaled.filter(regex='(_wind_speed|_snow_depth)$').columns
columns_to_drop

Index(['BASEL_wind_speed', 'BASEL_snow_depth', 'DEBILT_wind_speed',
       'DUSSELDORF_wind_speed', 'DUSSELDORF_snow_depth', 'HEATHROW_snow_depth',
       'KASSEL_wind_speed', 'LJUBLJANA_wind_speed', 'MAASTRICHT_wind_speed',
       'MADRID_wind_speed', 'MUNCHENB_snow_depth', 'OSLO_wind_speed',
       'OSLO_snow_depth', 'SONNBLICK_wind_speed', 'VALENTIA_snow_depth'],
      dtype='object')

In [17]:
unscaled = unscaled.drop(columns=columns_to_drop)

In [18]:
unscaled.shape

(22950, 134)

In [19]:
# Looking for remaining missing entry
# Creating a list of all unique station names in the dataset

all_stations = set([col.split('_')[0] for col in unscaled.columns if '_' in col])
all_stations

{'BASEL',
 'BELGRADE',
 'BUDAPEST',
 'DEBILT',
 'DUSSELDORF',
 'HEATHROW',
 'KASSEL',
 'LJUBLJANA',
 'MAASTRICHT',
 'MADRID',
 'MUNCHENB',
 'OSLO',
 'SONNBLICK',
 'STOCKHOLM',
 'VALENTIA'}

In [20]:
observation_types = ['cloud_cover', 'humidity', 'pressure']

missing_stations_by_observation = {}

for obs in observation_types:
    # Select columns related to the current observation type
    columns = [col for col in unscaled.columns if col.endswith(obs)]
    
    # Extract station names by removing the observation type from the column names
    station_names = set([col.replace(f'_{obs}', '') for col in columns])
    
    # Identify stations that are in all_stations but missing from the current observation type
    missing_stations = all_stations - station_names
    
    # Store the missing station names in the dictionary
    missing_stations_by_observation[obs] = missing_stations

# Print the missing station names for each observation type
for obs, missing_stations in missing_stations_by_observation.items():
    print(f"\nStations missing from {obs}:")
    if missing_stations:
        for station in missing_stations:
            print(station)
    else:
        print("None")


Stations missing from cloud_cover:
KASSEL

Stations missing from humidity:
STOCKHOLM

Stations missing from pressure:
MUNCHENB


In [21]:
# Finding the position of HEATHROW_temp_max to see where to position the new KASSEL_cloud_cover  (+1 next to it)

unscaled.columns.get_loc('HEATHROW_temp_max')

55

In [22]:
unscaled.columns.get_loc('STOCKHOLM_cloud_cover')

117

In [23]:
unscaled.columns.get_loc('MUNCHENB_humidity')

92

In [24]:
# Inserting new columns into "unscaled" at specific positions.
# Copying data from other existing columns:
# Kassel_cloud_cover with Dusseldorf_cloud_cover
# Stockholm_humidity with Oslo_humidity
# Munchenb_pressure with Basel_pressure

unscaled.insert(56,'KASSEL_cloud_cover', unscaled['DUSSELDORF_cloud_cover'])
unscaled.insert(119, 'STOCKHOLM_humidity', unscaled['OSLO_humidity'])
unscaled.insert(94,'MUNCHENB_pressure',unscaled['BASEL_pressure'])

In [25]:
unscaled.columns.tolist()

['DATE',
 'MONTH',
 'BASEL_cloud_cover',
 'BASEL_humidity',
 'BASEL_pressure',
 'BASEL_global_radiation',
 'BASEL_precipitation',
 'BASEL_sunshine',
 'BASEL_temp_mean',
 'BASEL_temp_min',
 'BASEL_temp_max',
 'BELGRADE_cloud_cover',
 'BELGRADE_humidity',
 'BELGRADE_pressure',
 'BELGRADE_global_radiation',
 'BELGRADE_precipitation',
 'BELGRADE_sunshine',
 'BELGRADE_temp_mean',
 'BELGRADE_temp_min',
 'BELGRADE_temp_max',
 'BUDAPEST_cloud_cover',
 'BUDAPEST_humidity',
 'BUDAPEST_pressure',
 'BUDAPEST_global_radiation',
 'BUDAPEST_precipitation',
 'BUDAPEST_sunshine',
 'BUDAPEST_temp_mean',
 'BUDAPEST_temp_min',
 'BUDAPEST_temp_max',
 'DEBILT_cloud_cover',
 'DEBILT_humidity',
 'DEBILT_pressure',
 'DEBILT_global_radiation',
 'DEBILT_precipitation',
 'DEBILT_sunshine',
 'DEBILT_temp_mean',
 'DEBILT_temp_min',
 'DEBILT_temp_max',
 'DUSSELDORF_cloud_cover',
 'DUSSELDORF_humidity',
 'DUSSELDORF_pressure',
 'DUSSELDORF_global_radiation',
 'DUSSELDORF_precipitation',
 'DUSSELDORF_sunshine',
 'DUSS

In [26]:
# Dropping unnecessary columns

unscaled.drop(['DATE', 'MONTH'], axis=1, inplace=True)

In [27]:
unscaled.shape

(22950, 135)

In [28]:
# Correct shape achieved

In [29]:
pleasant.drop(columns = 'DATE', inplace = True)

In [30]:
pleasant.shape

(22950, 15)

In [31]:
# Correct shape achieved

In [32]:
# Exporting cleaned dataset

unscaled.to_csv(os.path.join(path, '01 Data/Prepared Data/cleaned.csv'), index=False)

# 3. Reshaping for Modeling

In [34]:
# To ensure the layers can be fed to the deep learning model correctly, completing the following:

# Splitting the observations (X) into 15 groups of 9 types of observations, and (y) should be in 15 groups 
# Checking the final shapes are:  X = (22950, 15, 9) and y = (22950, 15)

In [35]:
clean = pd.read_csv(os.path.join(path, '01 Data/Prepared Data/cleaned.csv'), index_col = False)

In [36]:
clean.head()

Unnamed: 0,BASEL_cloud_cover,BASEL_humidity,BASEL_pressure,BASEL_global_radiation,BASEL_precipitation,BASEL_sunshine,BASEL_temp_mean,BASEL_temp_min,BASEL_temp_max,BELGRADE_cloud_cover,...,STOCKHOLM_temp_max,VALENTIA_cloud_cover,VALENTIA_humidity,VALENTIA_pressure,VALENTIA_global_radiation,VALENTIA_precipitation,VALENTIA_sunshine,VALENTIA_temp_mean,VALENTIA_temp_min,VALENTIA_temp_max
0,7,0.85,1.018,0.32,0.09,0.7,6.5,0.8,10.9,1,...,4.9,5,0.88,1.0003,0.45,0.34,4.7,8.5,6.0,10.9
1,6,0.84,1.018,0.36,1.05,1.1,6.1,3.3,10.1,6,...,5.0,7,0.91,1.0007,0.25,0.84,0.7,8.9,5.6,12.1
2,8,0.9,1.018,0.18,0.3,0.0,8.5,5.1,9.9,6,...,4.1,7,0.91,1.0096,0.17,0.08,0.1,10.5,8.1,12.9
3,3,0.92,1.018,0.58,0.0,4.1,6.3,3.8,10.6,8,...,2.3,7,0.86,1.0184,0.13,0.98,0.0,7.4,7.3,10.6
4,6,0.95,1.018,0.65,0.14,5.4,3.0,-0.7,6.0,8,...,4.3,3,0.8,1.0328,0.46,0.0,5.7,5.7,3.0,8.4


In [37]:
X = clean

In [38]:
X.head()

Unnamed: 0,BASEL_cloud_cover,BASEL_humidity,BASEL_pressure,BASEL_global_radiation,BASEL_precipitation,BASEL_sunshine,BASEL_temp_mean,BASEL_temp_min,BASEL_temp_max,BELGRADE_cloud_cover,...,STOCKHOLM_temp_max,VALENTIA_cloud_cover,VALENTIA_humidity,VALENTIA_pressure,VALENTIA_global_radiation,VALENTIA_precipitation,VALENTIA_sunshine,VALENTIA_temp_mean,VALENTIA_temp_min,VALENTIA_temp_max
0,7,0.85,1.018,0.32,0.09,0.7,6.5,0.8,10.9,1,...,4.9,5,0.88,1.0003,0.45,0.34,4.7,8.5,6.0,10.9
1,6,0.84,1.018,0.36,1.05,1.1,6.1,3.3,10.1,6,...,5.0,7,0.91,1.0007,0.25,0.84,0.7,8.9,5.6,12.1
2,8,0.9,1.018,0.18,0.3,0.0,8.5,5.1,9.9,6,...,4.1,7,0.91,1.0096,0.17,0.08,0.1,10.5,8.1,12.9
3,3,0.92,1.018,0.58,0.0,4.1,6.3,3.8,10.6,8,...,2.3,7,0.86,1.0184,0.13,0.98,0.0,7.4,7.3,10.6
4,6,0.95,1.018,0.65,0.14,5.4,3.0,-0.7,6.0,8,...,4.3,3,0.8,1.0328,0.46,0.0,5.7,5.7,3.0,8.4


In [39]:
Y = pleasant

In [40]:
Y.head()

Unnamed: 0,BASEL_pleasant_weather,BELGRADE_pleasant_weather,BUDAPEST_pleasant_weather,DEBILT_pleasant_weather,DUSSELDORF_pleasant_weather,HEATHROW_pleasant_weather,KASSEL_pleasant_weather,LJUBLJANA_pleasant_weather,MAASTRICHT_pleasant_weather,MADRID_pleasant_weather,MUNCHENB_pleasant_weather,OSLO_pleasant_weather,SONNBLICK_pleasant_weather,STOCKHOLM_pleasant_weather,VALENTIA_pleasant_weather
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [41]:
X.shape

(22950, 135)

In [42]:
# Turning X and y from a df to arrays

X = np.array(X)
Y = np.array(Y)

In [43]:
X = X.reshape(-1,15,9)

In [44]:
# checking shape

X

array([[[  7.    ,   0.85  ,   1.018 , ...,   6.5   ,   0.8   ,
          10.9   ],
        [  1.    ,   0.81  ,   1.0195, ...,   3.7   ,  -0.9   ,
           7.9   ],
        [  4.    ,   0.67  ,   1.017 , ...,   2.4   ,  -0.4   ,
           5.1   ],
        ...,
        [  4.    ,   0.73  ,   1.0304, ...,  -5.9   ,  -8.5   ,
          -3.2   ],
        [  5.    ,   0.98  ,   1.0114, ...,   4.2   ,   2.2   ,
           4.9   ],
        [  5.    ,   0.88  ,   1.0003, ...,   8.5   ,   6.    ,
          10.9   ]],

       [[  6.    ,   0.84  ,   1.018 , ...,   6.1   ,   3.3   ,
          10.1   ],
        [  6.    ,   0.84  ,   1.0172, ...,   2.9   ,   2.2   ,
           4.4   ],
        [  4.    ,   0.67  ,   1.017 , ...,   2.3   ,   1.4   ,
           3.1   ],
        ...,
        [  6.    ,   0.97  ,   1.0292, ...,  -9.5   , -10.5   ,
          -8.5   ],
        [  5.    ,   0.62  ,   1.0114, ...,   4.    ,   3.    ,
           5.    ],
        [  7.    ,   0.91  ,   1.0007, ...,   8.

# 4. Data Split

In [46]:
# Splitting data into train and test sets

X_train, X_test, Y_train, Y_test = train_test_split(X,Y,random_state = 42)

In [47]:
print(X_train.shape, Y_train.shape)
print(X_test.shape, Y_test.shape)

(17212, 15, 9) (17212, 15)
(5738, 15, 9) (5738, 15)


# 5. Creating Keras Model

In [49]:
epochs = 30
batch_size = 32
n_hidden = 256

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(Y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='softmax')) # Options: sigmoid, tanh, softmax, relu

In [50]:
model.summary()

# 6. Compiling and Running

In [52]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [53]:
model.fit(X_train, Y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/30
538/538 - 1s - 2ms/step - accuracy: 0.1597 - loss: 6711.8203
Epoch 2/30
538/538 - 1s - 1ms/step - accuracy: 0.1414 - loss: 59757.5117
Epoch 3/30
538/538 - 1s - 1ms/step - accuracy: 0.1414 - loss: 193704.8594
Epoch 4/30
538/538 - 1s - 1ms/step - accuracy: 0.1382 - loss: 422084.7188
Epoch 5/30
538/538 - 1s - 1ms/step - accuracy: 0.1405 - loss: 785951.9375
Epoch 6/30
538/538 - 1s - 1ms/step - accuracy: 0.1384 - loss: 1284460.0000
Epoch 7/30
538/538 - 1s - 1ms/step - accuracy: 0.1344 - loss: 1900663.1250
Epoch 8/30
538/538 - 1s - 1ms/step - accuracy: 0.1372 - loss: 2696174.0000
Epoch 9/30
538/538 - 1s - 1ms/step - accuracy: 0.1353 - loss: 3596585.5000
Epoch 10/30
538/538 - 1s - 1ms/step - accuracy: 0.1366 - loss: 4701590.0000
Epoch 11/30
538/538 - 1s - 1ms/step - accuracy: 0.1367 - loss: 5901306.5000
Epoch 12/30
538/538 - 1s - 1ms/step - accuracy: 0.1331 - loss: 7287593.0000
Epoch 13/30
538/538 - 1s - 1ms/step - accuracy: 0.1330 - loss: 8919533.0000
Epoch 14/30
538/538 - 1s - 1m

<keras.src.callbacks.history.History at 0x17cc1cbd0>

# 7. Creating Confusion Matrix

In [55]:
# Defining the list of stations 

stations = {
0: 'BASEL',
1: 'BELGRADE',
2: 'BUDAPEST',
3: 'DEBILT',
4: 'DUSSELDORF',
5: 'HEATHROW',
6: 'KASSEL',
7: 'LJUBLJANA',
8: 'MAASTRICHT',
9: 'MADRID',
10: 'MUNCHENB',
11: 'OSLO',
12: 'SONNBLICK',
13: 'STOCKHOLM',
14: 'VALENTIA'

}

In [56]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [57]:
# Evaluating

print(confusion_matrix(Y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 548us/step
Pred        BASEL  BUDAPEST  DEBILT  HEATHROW  KASSEL  MAASTRICHT  MADRID  \
True                                                                        
BASEL        1458       726     327        10       3         384     213   
BELGRADE      585       273      61         1       0          36      11   
BUDAPEST      121        40      12         0       0           5       2   
DEBILT         35        12      10         0       0           1       0   
DUSSELDORF     10         4       6         0       0           0       0   
HEATHROW       35         3      21         0       0           0       5   
KASSEL          3         4       3         0       0           0       0   
LJUBLJANA      34        15       2         1       0           0       6   
MAASTRICHT      5         1       0         0       0           0       0   
MADRID        191        53      42         6       0          35      80   

# 8. Keras Model Retrials

In [110]:
epochs = 30
batch_size = 16
n_hidden = 4

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(Y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='softmax')) # Options: sigmoid, tanh, softmax, relu

In [112]:
model.summary()

In [114]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [118]:
model.fit(X_train, Y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/30
1076/1076 - 1s - 793us/step - accuracy: 0.1027 - loss: 256.9754
Epoch 2/30
1076/1076 - 1s - 513us/step - accuracy: 0.1278 - loss: 2114.7234
Epoch 3/30
1076/1076 - 1s - 505us/step - accuracy: 0.1424 - loss: 6771.0864
Epoch 4/30
1076/1076 - 1s - 514us/step - accuracy: 0.1437 - loss: 15563.2510
Epoch 5/30
1076/1076 - 1s - 517us/step - accuracy: 0.1404 - loss: 28021.3809
Epoch 6/30
1076/1076 - 1s - 489us/step - accuracy: 0.1380 - loss: 44682.3008
Epoch 7/30
1076/1076 - 1s - 489us/step - accuracy: 0.1396 - loss: 67023.3516
Epoch 8/30
1076/1076 - 1s - 536us/step - accuracy: 0.1404 - loss: 94656.1641
Epoch 9/30
1076/1076 - 1s - 494us/step - accuracy: 0.1385 - loss: 127959.3516
Epoch 10/30
1076/1076 - 1s - 532us/step - accuracy: 0.1340 - loss: 165738.8281
Epoch 11/30
1076/1076 - 1s - 490us/step - accuracy: 0.1317 - loss: 215610.2812
Epoch 12/30
1076/1076 - 1s - 511us/step - accuracy: 0.1329 - loss: 268283.2812
Epoch 13/30
1076/1076 - 1s - 492us/step - accuracy: 0.1337 - loss: 327731

<keras.src.callbacks.history.History at 0x17cc86d10>

In [120]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [122]:
# Evaluating

print(confusion_matrix(Y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 434us/step
Pred        BASEL  BELGRADE  BUDAPEST  DEBILT  DUSSELDORF  HEATHROW  KASSEL  \
True                                                                          
BASEL         524      1152       280      76         235       134     142   
BELGRADE        1       821        10      19          76         1      18   
BUDAPEST        0       129         0       7          28         0       7   
DEBILT          0        53         0       2          11         0       7   
DUSSELDORF      0        11         0       1           8         0       1   
HEATHROW        0        24         0       6          10         0       9   
KASSEL          0         8         0       1           0         0       1   
LJUBLJANA       0        22         0       1          11         0       3   
MAASTRICHT      0         3         0       0           2         0       0   
MADRID         10        71         0      29         

In [124]:
# Softmax is not producing good results, trial with tanh, sigmoid, and relu

In [126]:
epochs = 30
batch_size = 16
n_hidden = 128

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(Y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='tanh')) # Options: sigmoid, tanh, softmax, relu

In [128]:
model.summary()

In [130]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [132]:
model.fit(X_train, Y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/30
1076/1076 - 1s - 953us/step - accuracy: 0.1750 - loss: 28.2067
Epoch 2/30
1076/1076 - 1s - 685us/step - accuracy: 0.1778 - loss: 28.1448
Epoch 3/30
1076/1076 - 1s - 650us/step - accuracy: 0.1782 - loss: 28.1120
Epoch 4/30
1076/1076 - 1s - 637us/step - accuracy: 0.1794 - loss: 28.0137
Epoch 5/30
1076/1076 - 1s - 639us/step - accuracy: 0.1797 - loss: 27.9885
Epoch 6/30
1076/1076 - 1s - 682us/step - accuracy: 0.1802 - loss: 27.9641
Epoch 7/30
1076/1076 - 1s - 635us/step - accuracy: 0.1802 - loss: 27.9388
Epoch 8/30
1076/1076 - 1s - 629us/step - accuracy: 0.1803 - loss: 27.9341
Epoch 9/30
1076/1076 - 1s - 640us/step - accuracy: 0.1803 - loss: 27.9341
Epoch 10/30
1076/1076 - 1s - 627us/step - accuracy: 0.1803 - loss: 27.9341
Epoch 11/30
1076/1076 - 1s - 629us/step - accuracy: 0.1803 - loss: 27.9341
Epoch 12/30
1076/1076 - 1s - 632us/step - accuracy: 0.1803 - loss: 27.9341
Epoch 13/30
1076/1076 - 1s - 632us/step - accuracy: 0.1803 - loss: 27.9341
Epoch 14/30
1076/1076 - 1s - 634us

<keras.src.callbacks.history.History at 0x307d584d0>

In [134]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [138]:
# Evaluating

print(confusion_matrix(Y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 526us/step
Pred        BELGRADE  DEBILT  HEATHROW  KASSEL  MAASTRICHT  MADRID  STOCKHOLM
True                                                                         
BASEL           1180      31         7      21           8     762       1673
BELGRADE         708      49         0       9           1     314         11
BUDAPEST         120      14         0       6           0      74          0
DEBILT            62       4         0       4           0      12          0
DUSSELDORF        18       2         0       1           0       8          0
HEATHROW          30       1         0       4           3      42          2
KASSEL             5       2         0       0           0       4          0
LJUBLJANA          6       3         0       6           0      46          0
MAASTRICHT         1       0         0       0           0       6          2
MADRID            63       3         0       6           1     31

In [140]:
# Better loss but accuracy is low, will try adjusting layers

In [142]:
epochs = 30
batch_size = 16
n_hidden = 64

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(Y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='tanh')) # Options: sigmoid, tanh, softmax, relu

In [144]:
model.summary()

In [146]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [148]:
model.fit(X_train, Y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/30
1076/1076 - 1s - 1ms/step - accuracy: 0.3471 - loss: 25.0556
Epoch 2/30
1076/1076 - 1s - 687us/step - accuracy: 0.4555 - loss: 22.9356
Epoch 3/30
1076/1076 - 1s - 669us/step - accuracy: 0.4762 - loss: 22.8567
Epoch 4/30
1076/1076 - 1s - 658us/step - accuracy: 0.4664 - loss: 22.8614
Epoch 5/30
1076/1076 - 1s - 653us/step - accuracy: 0.4619 - loss: 22.8624
Epoch 6/30
1076/1076 - 1s - 693us/step - accuracy: 0.4568 - loss: 22.8624
Epoch 7/30
1076/1076 - 1s - 649us/step - accuracy: 0.4542 - loss: 22.8624
Epoch 8/30
1076/1076 - 1s - 645us/step - accuracy: 0.4529 - loss: 22.8624
Epoch 9/30
1076/1076 - 1s - 640us/step - accuracy: 0.4511 - loss: 22.8633
Epoch 10/30
1076/1076 - 1s - 645us/step - accuracy: 0.4498 - loss: 22.8633
Epoch 11/30
1076/1076 - 1s - 655us/step - accuracy: 0.4477 - loss: 22.8642
Epoch 12/30
1076/1076 - 1s - 647us/step - accuracy: 0.4422 - loss: 22.8642
Epoch 13/30
1076/1076 - 1s - 648us/step - accuracy: 0.4422 - loss: 22.8642
Epoch 14/30
1076/1076 - 1s - 650us/s

<keras.src.callbacks.history.History at 0x311dded10>

In [150]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [152]:
# Evaluating

print(confusion_matrix(Y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 576us/step
Pred        BASEL  BUDAPEST  DEBILT  HEATHROW  MUNCHENB  STOCKHOLM  VALENTIA
True                                                                        
BASEL        2511       510     282        19       118        237         5
BELGRADE      819       206      44         2        21          0         0
BUDAPEST      156        43       8         0         7          0         0
DEBILT         76         6       0         0         0          0         0
DUSSELDORF     24         4       0         0         1          0         0
HEATHROW       62        19       0         0         0          0         1
KASSEL          9         2       0         0         0          0         0
LJUBLJANA      35        22       1         0         3          0         0
MAASTRICHT      3         5       0         0         1          0         0
MADRID        251       161      22         0        15          9         0

In [154]:
# Loss is not as stable but not as low as preferred
# Accuracy is much higher
# Will try with sigmoid and relu

In [156]:
epochs = 30
batch_size = 16
n_hidden = 64

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(Y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='sigmoid')) # Options: sigmoid, tanh, softmax, relu

In [158]:
model.summary()

In [160]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [162]:
model.fit(X_train, Y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/30
1076/1076 - 1s - 970us/step - accuracy: 0.1401 - loss: 8183.4829
Epoch 2/30
1076/1076 - 1s - 719us/step - accuracy: 0.1471 - loss: 82414.1875
Epoch 3/30
1076/1076 - 1s - 638us/step - accuracy: 0.1445 - loss: 265926.6875
Epoch 4/30
1076/1076 - 1s - 642us/step - accuracy: 0.1469 - loss: 587712.8750
Epoch 5/30
1076/1076 - 1s - 657us/step - accuracy: 0.1375 - loss: 1072629.3750
Epoch 6/30
1076/1076 - 1s - 679us/step - accuracy: 0.1388 - loss: 1684067.5000
Epoch 7/30
1076/1076 - 1s - 625us/step - accuracy: 0.1352 - loss: 2440198.0000
Epoch 8/30
1076/1076 - 1s - 623us/step - accuracy: 0.1347 - loss: 3453624.7500
Epoch 9/30
1076/1076 - 1s - 623us/step - accuracy: 0.1302 - loss: 4595881.5000
Epoch 10/30
1076/1076 - 1s - 623us/step - accuracy: 0.1278 - loss: 5990125.0000
Epoch 11/30
1076/1076 - 1s - 630us/step - accuracy: 0.1310 - loss: 7627711.5000
Epoch 12/30
1076/1076 - 1s - 627us/step - accuracy: 0.1268 - loss: 9572574.0000
Epoch 13/30
1076/1076 - 1s - 622us/step - accuracy: 0.13

<keras.src.callbacks.history.History at 0x311ddead0>

In [164]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [166]:
# Evaluating

print(confusion_matrix(Y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 486us/step
Pred        BASEL
True             
BASEL        3682
BELGRADE     1092
BUDAPEST      214
DEBILT         82
DUSSELDORF     29
HEATHROW       82
KASSEL         11
LJUBLJANA      61
MAASTRICHT      9
MADRID        458
MUNCHENB        8
OSLO            5
STOCKHOLM       4
VALENTIA        1


In [168]:
# Loss and accuracy not as good as they were with tahn
# Will try with relu

In [170]:
epochs = 15
batch_size = 4
n_hidden = 4

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(Y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='relu')) # Options: sigmoid, tanh, softmax, relu

In [172]:
model.summary()

In [174]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [176]:
model.fit(X_train, Y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/15
4303/4303 - 2s - 541us/step - accuracy: 0.2970 - loss: nan
Epoch 2/15
4303/4303 - 2s - 449us/step - accuracy: 0.6440 - loss: nan
Epoch 3/15
4303/4303 - 2s - 463us/step - accuracy: 0.6440 - loss: nan
Epoch 4/15
4303/4303 - 2s - 450us/step - accuracy: 0.6440 - loss: nan
Epoch 5/15
4303/4303 - 2s - 449us/step - accuracy: 0.6440 - loss: nan
Epoch 6/15
4303/4303 - 2s - 451us/step - accuracy: 0.6440 - loss: nan
Epoch 7/15
4303/4303 - 2s - 458us/step - accuracy: 0.6440 - loss: nan
Epoch 8/15
4303/4303 - 2s - 459us/step - accuracy: 0.6440 - loss: nan
Epoch 9/15
4303/4303 - 2s - 449us/step - accuracy: 0.6440 - loss: nan
Epoch 10/15
4303/4303 - 2s - 449us/step - accuracy: 0.6440 - loss: nan
Epoch 11/15
4303/4303 - 2s - 448us/step - accuracy: 0.6440 - loss: nan
Epoch 12/15
4303/4303 - 2s - 448us/step - accuracy: 0.6440 - loss: nan
Epoch 13/15
4303/4303 - 2s - 448us/step - accuracy: 0.6440 - loss: nan
Epoch 14/15
4303/4303 - 2s - 448us/step - accuracy: 0.6440 - loss: nan
Epoch 15/15
430

<keras.src.callbacks.history.History at 0x307a11650>

In [178]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [180]:
# Evaluating

print(confusion_matrix(Y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 422us/step
Pred        BASEL
True             
BASEL        3682
BELGRADE     1092
BUDAPEST      214
DEBILT         82
DUSSELDORF     29
HEATHROW       82
KASSEL         11
LJUBLJANA      61
MAASTRICHT      9
MADRID        458
MUNCHENB        8
OSLO            5
STOCKHOLM       4
VALENTIA        1
