## Import Libraries and Data

In [8]:
import pandas as pd
import numpy as np
import seaborn as sns
import os
import operator
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from numpy import unique
from numpy import reshape
from keras.models import Sequential
from keras.layers import Conv1D, Conv2D, Dense, BatchNormalization, Flatten, MaxPooling1D, Dropout, LSTM
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [10]:
path = r'C:\Users\sstorer\OneDrive\ML Specialization'

In [12]:
climate = pd.read_csv(os.path.join(path, 'Data Sets', 'weather.csv'))

In [14]:
pleasantweather = pd.read_csv(os.path.join(path, 'Data Sets', 'Dataset-Answers-Weather_Prediction_Pleasant_Weather.csv'))

## Data Cleaning for Deep Learning

In [17]:
climate.head()

Unnamed: 0,DATE,MONTH,BASEL_cloud_cover,BASEL_wind_speed,BASEL_humidity,BASEL_pressure,BASEL_global_radiation,BASEL_precipitation,BASEL_snow_depth,BASEL_sunshine,...,VALENTIA_cloud_cover,VALENTIA_humidity,VALENTIA_pressure,VALENTIA_global_radiation,VALENTIA_precipitation,VALENTIA_snow_depth,VALENTIA_sunshine,VALENTIA_temp_mean,VALENTIA_temp_min,VALENTIA_temp_max
0,19600101,1,7,2.1,0.85,1.018,0.32,0.09,0,0.7,...,5,0.88,1.0003,0.45,0.34,0,4.7,8.5,6.0,10.9
1,19600102,1,6,2.1,0.84,1.018,0.36,1.05,0,1.1,...,7,0.91,1.0007,0.25,0.84,0,0.7,8.9,5.6,12.1
2,19600103,1,8,2.1,0.9,1.018,0.18,0.3,0,0.0,...,7,0.91,1.0096,0.17,0.08,0,0.1,10.5,8.1,12.9
3,19600104,1,3,2.1,0.92,1.018,0.58,0.0,0,4.1,...,7,0.86,1.0184,0.13,0.98,0,0.0,7.4,7.3,10.6
4,19600105,1,6,2.1,0.95,1.018,0.65,0.14,0,5.4,...,3,0.8,1.0328,0.46,0.0,0,5.7,5.7,3.0,8.4


In [19]:
pleasantweather.head()

Unnamed: 0,DATE,BASEL_pleasant_weather,BELGRADE_pleasant_weather,BUDAPEST_pleasant_weather,DEBILT_pleasant_weather,DUSSELDORF_pleasant_weather,HEATHROW_pleasant_weather,KASSEL_pleasant_weather,LJUBLJANA_pleasant_weather,MAASTRICHT_pleasant_weather,MADRID_pleasant_weather,MUNCHENB_pleasant_weather,OSLO_pleasant_weather,SONNBLICK_pleasant_weather,STOCKHOLM_pleasant_weather,VALENTIA_pleasant_weather
0,19600101,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,19600102,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,19600103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,19600104,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,19600105,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [25]:
climate.shape

(22950, 168)

In [27]:
pleasantweather.shape

(22950, 16)

In [21]:
# Drop date and month columns from climate
climate = climate.drop(columns=['DATE', 'MONTH'], axis=1)
climate.head()

Unnamed: 0,BASEL_cloud_cover,BASEL_wind_speed,BASEL_humidity,BASEL_pressure,BASEL_global_radiation,BASEL_precipitation,BASEL_snow_depth,BASEL_sunshine,BASEL_temp_mean,BASEL_temp_min,...,VALENTIA_cloud_cover,VALENTIA_humidity,VALENTIA_pressure,VALENTIA_global_radiation,VALENTIA_precipitation,VALENTIA_snow_depth,VALENTIA_sunshine,VALENTIA_temp_mean,VALENTIA_temp_min,VALENTIA_temp_max
0,7,2.1,0.85,1.018,0.32,0.09,0,0.7,6.5,0.8,...,5,0.88,1.0003,0.45,0.34,0,4.7,8.5,6.0,10.9
1,6,2.1,0.84,1.018,0.36,1.05,0,1.1,6.1,3.3,...,7,0.91,1.0007,0.25,0.84,0,0.7,8.9,5.6,12.1
2,8,2.1,0.9,1.018,0.18,0.3,0,0.0,8.5,5.1,...,7,0.91,1.0096,0.17,0.08,0,0.1,10.5,8.1,12.9
3,3,2.1,0.92,1.018,0.58,0.0,0,4.1,6.3,3.8,...,7,0.86,1.0184,0.13,0.98,0,0.0,7.4,7.3,10.6
4,6,2.1,0.95,1.018,0.65,0.14,0,5.4,3.0,-0.7,...,3,0.8,1.0328,0.46,0.0,0,5.7,5.7,3.0,8.4


In [29]:
# Drop date column from pleasantweather
pleasantweather = pleasantweather.drop(columns=['DATE'], axis=1)
pleasantweather.head()

Unnamed: 0,BASEL_pleasant_weather,BELGRADE_pleasant_weather,BUDAPEST_pleasant_weather,DEBILT_pleasant_weather,DUSSELDORF_pleasant_weather,HEATHROW_pleasant_weather,KASSEL_pleasant_weather,LJUBLJANA_pleasant_weather,MAASTRICHT_pleasant_weather,MADRID_pleasant_weather,MUNCHENB_pleasant_weather,OSLO_pleasant_weather,SONNBLICK_pleasant_weather,STOCKHOLM_pleasant_weather,VALENTIA_pleasant_weather
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [31]:
# Drop all Gdansk, Roma,Tours columns from climate since they are not included in pleasant weather data
cols_to_drop = [col for col in climate.columns if col.startswith(('GDANSK', 'ROMA', 'TOURS'))]
climate = climate.drop(columns=cols_to_drop)

In [33]:
climate.shape

(22950, 147)

In [35]:
# Trying to find out all the different measurement types for each location

# Extract location names 
locations = set([col.split('_')[0] for col in climate.columns])

# Create a dictionary to store measurement counts for each location
measurement_counts = {location: {} for location in locations}

# Count occurrences of each measurement type for each location
for col in climate.columns:
    parts = col.split('_') 
    location = parts[0] 
    measurement = '_'.join(parts[1:])  # Join remaining parts if there are more than two

    if measurement not in measurement_counts[location]:
        measurement_counts[location][measurement] = 1
    else:
        measurement_counts[location][measurement] += 1

# Print the measurement counts for each location
for location, measurements in measurement_counts.items():
    print(f"Location: {location}")
    for measurement, count in measurements.items():
        print(f"  - {measurement}: {count}")
    print()

Location: DUSSELDORF
  - cloud_cover: 1
  - wind_speed: 1
  - humidity: 1
  - pressure: 1
  - global_radiation: 1
  - precipitation: 1
  - snow_depth: 1
  - sunshine: 1
  - temp_mean: 1
  - temp_min: 1
  - temp_max: 1

Location: STOCKHOLM
  - cloud_cover: 1
  - pressure: 1
  - global_radiation: 1
  - precipitation: 1
  - sunshine: 1
  - temp_mean: 1
  - temp_min: 1
  - temp_max: 1

Location: MADRID
  - cloud_cover: 1
  - wind_speed: 1
  - humidity: 1
  - pressure: 1
  - global_radiation: 1
  - precipitation: 1
  - sunshine: 1
  - temp_mean: 1
  - temp_min: 1
  - temp_max: 1

Location: BASEL
  - cloud_cover: 1
  - wind_speed: 1
  - humidity: 1
  - pressure: 1
  - global_radiation: 1
  - precipitation: 1
  - snow_depth: 1
  - sunshine: 1
  - temp_mean: 1
  - temp_min: 1
  - temp_max: 1

Location: LJUBLJANA
  - cloud_cover: 1
  - wind_speed: 1
  - humidity: 1
  - pressure: 1
  - global_radiation: 1
  - precipitation: 1
  - sunshine: 1
  - temp_mean: 1
  - temp_min: 1
  - temp_max: 1

Loca

In [37]:
# Drop columns for wind_speed and snow_depth measurements since they are missing so often

# Create a list of columns to drop
cols_to_drop = [col for col in climate.columns if col.endswith(('wind_speed', 'snow_depth'))]

# Dropping
climate = climate.drop(columns=cols_to_drop)

In [39]:
# There are missing measurements for Kassel's cloud cover, Stockholm's humidity, and Munchenb's pressure
# We know that Ljubljana is near Kassel, Sonnblick is near Munchenb, and Olso is close enough to Stockholm

# Define relationships between locations
location_pairs = {
    'KASSEL': 'LJUBLJANA',
    'STOCKHOLM': 'OSLO',
    'MUNCHENB': 'SONNBLICK'
}

# Define the desired order of measurements
measurement_order = ['cloud_cover', 'humidity', 'pressure', 'global_radiation', 
                     'precipitation', 'sunshine', 'temp_mean', 'temp_min', 'temp_max']

# Function to fill missing values and insert in correct position
def fill_missing_values(climate, location, measurement, neighbor):
    """
    Fills missing values for a given location and measurement using data from a neighbor location.
    Inserts the new column in the correct position based on the measurement order.

    Args:
        climate: The DataFrame containing the weather data.
        location: The location with missing values.
        measurement: The measurement with missing values.
        neighbor: The neighboring location to use for filling.

    Returns:
        The updated DataFrame with filled missing values and columns in the correct order.
    """
    source_col = f'{neighbor}_{measurement}'
    target_col = f'{location}_{measurement}'

    # Determine the insertion index 
    if measurement == measurement_order[0]:  # If it's the first measurement for the location
        # Find the index of the first column for the location (or 0 if no location columns exist)
        location_columns = [col for col in climate.columns if col.startswith(location)]
        if location_columns:
            insert_index = climate.columns.get_loc(location_columns[0]) 
        else:
            insert_index = 0
    else:
        insert_index = climate.columns.get_loc(f'{location}_{measurement_order[measurement_order.index(measurement) - 1]}') + 1 

    # Create the new column with missing values and insert it at the correct position
    climate.insert(insert_index, target_col, np.nan) 

    # Fill missing values in the new column
    climate[target_col].fillna(climate[source_col], inplace=True) 

    return climate

# Fill missing values for each location and measurement
for location, neighbor in location_pairs.items():
    for measurement in measurement_order:
        if f'{location}_{measurement}' not in climate.columns:  # Check if column already exists
            climate = fill_missing_values(climate, location, measurement, neighbor)

# Checking new columns for existance and location
selected_columns = [col for col in climate.columns if col.startswith(('KASSEL', 'STOCKHOLM', 'MUNCHENB'))]
print(climate[selected_columns])

       KASSEL_cloud_cover  KASSEL_humidity  KASSEL_pressure  \
0                     8.0             0.82           1.0094   
1                     6.0             0.86           1.0086   
2                     8.0             0.91           1.0129   
3                     6.0             0.87           1.0290   
4                     7.0             0.86           1.0262   
...                   ...              ...              ...   
22945                 4.0             0.77           1.0161   
22946                 3.0             0.77           1.0161   
22947                 3.0             0.77           1.0161   
22948                 3.0             0.77           1.0161   
22949                 3.0             0.77           1.0161   

       KASSEL_global_radiation  KASSEL_precipitation  KASSEL_sunshine  \
0                         0.28                  0.48              1.6   
1                         0.12                  0.27              0.0   
2                       

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  climate[target_col].fillna(climate[source_col], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  climate[target_col].fillna(climate[source_col], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object 

In [41]:
climate.shape

(22950, 135)

In [43]:
pleasantweather.shape

(22950, 15)

In [45]:
# Export cleaned weather data
climate.to_csv(os.path.join(path, 'Data Sets', 'weather_cleaned.csv'), index=False)

## Reshaping

In [48]:
# Rename dataframes
X = climate
y = pleasantweather

In [50]:
# Convert to arrays
X = np.array(X)
y = np.array(y)

In [52]:
# Reshaping X as a 3D object
X = X.reshape(-1,15,9)

In [54]:
X.shape

(22950, 15, 9)

In [56]:
y.shape

(22950, 15)

## Splitting Data into Testing and Training Sets

In [59]:
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state = 42)

## RNN Model

In [62]:
epochs = 20
batch_size = 16
n_hidden = 16

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(LSTM(n_hidden, input_shape=(timesteps, input_dim)))
model.add(Dropout(0.5))
model.add(Dense(n_classes, activation='sigmoid'))

  super().__init__(**kwargs)


In [64]:
model.summary()

In [66]:
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [68]:
model.fit(X_train,
          y_train,
          batch_size=batch_size,
          validation_data=(X_test, y_test),
          epochs=epochs)

Epoch 1/20
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 11ms/step - accuracy: 0.1515 - loss: 9.7477 - val_accuracy: 0.0422 - val_loss: 8.7401
Epoch 2/20
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 9ms/step - accuracy: 0.1004 - loss: 10.0803 - val_accuracy: 0.0777 - val_loss: 8.9277
Epoch 3/20
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 9ms/step - accuracy: 0.0849 - loss: 10.2032 - val_accuracy: 0.0800 - val_loss: 9.1776
Epoch 4/20
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 10ms/step - accuracy: 0.0793 - loss: 10.2366 - val_accuracy: 0.0802 - val_loss: 9.1496
Epoch 5/20
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 9ms/step - accuracy: 0.0853 - loss: 9.9961 - val_accuracy: 0.0802 - val_loss: 9.1804
Epoch 6/20
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 9ms/step - accuracy: 0.0835 - loss: 10.7334 - val_accuracy: 0.0410 - val_loss: 9.6245
Epoc

<keras.src.callbacks.history.History at 0x204d081e830>

### Confusion Matrix of Model Results

In [71]:
# Define list of stations names
stations = {
0: 'BASEL',
1: 'BELGRADE',
2: 'BUDAPEST',
3: 'DEBILT',
4: 'DUSSELDORF',
5: 'HEATHROW',
6: 'KASSEL',
7: 'LJUBLJANA',
8: 'MAASTRICHT',
9: 'MADRID',
10: 'MUNCHENB',
11: 'OSLO',
12: 'SONNBLICK',
13: 'STOCKHOLM',
14: 'VALENTIA'
}

In [73]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [75]:
# Evaluate
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
Pred        BASEL  BUDAPEST  MADRID
True                               
BASEL        3681         1       0
BELGRADE     1090         2       0
BUDAPEST      212         0       2
DEBILT         82         0       0
DUSSELDORF     29         0       0
HEATHROW       81         0       1
KASSEL         11         0       0
LJUBLJANA      61         0       0
MAASTRICHT      9         0       0
MADRID        455         0       3
MUNCHENB        8         0       0
OSLO            5         0       0
STOCKHOLM       4         0       0
VALENTIA        1         0       0


### Adjusting Hyperparameters

In [78]:
epochs = 30
batch_size = 16
n_hidden = 32

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(LSTM(n_hidden, input_shape=(timesteps, input_dim)))
model.add(Dropout(0.5))
model.add(Dense(n_classes, activation='sigmoid'))

  super().__init__(**kwargs)


In [80]:
model.summary()

In [82]:
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [84]:
model.fit(X_train,
          y_train,
          batch_size=batch_size,
          validation_data=(X_test, y_test),
          epochs=epochs)

Epoch 1/30
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 9ms/step - accuracy: 0.0922 - loss: 10.6418 - val_accuracy: 0.0504 - val_loss: 9.0690
Epoch 2/30
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - accuracy: 0.0783 - loss: 10.9792 - val_accuracy: 0.0251 - val_loss: 9.4380
Epoch 3/30
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - accuracy: 0.0800 - loss: 11.0539 - val_accuracy: 0.0385 - val_loss: 9.7108
Epoch 4/30
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - accuracy: 0.0809 - loss: 11.0800 - val_accuracy: 0.0441 - val_loss: 9.9823
Epoch 5/30
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - accuracy: 0.0776 - loss: 11.2000 - val_accuracy: 0.0303 - val_loss: 10.4279
Epoch 6/30
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - accuracy: 0.0747 - loss: 11.5713 - val_accuracy: 0.0425 - val_loss: 10.7741
Epoch 7

<keras.src.callbacks.history.History at 0x204d6ffb7c0>

In [86]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [88]:
# Evaluate
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
Pred        BASEL  LJUBLJANA
True                        
BASEL        3682          0
BELGRADE     1091          1
BUDAPEST      214          0
DEBILT         82          0
DUSSELDORF     29          0
HEATHROW       82          0
KASSEL         11          0
LJUBLJANA      61          0
MAASTRICHT      9          0
MADRID        458          0
MUNCHENB        8          0
OSLO            5          0
STOCKHOLM       4          0
VALENTIA        1          0


### Adjusting Hyperparameters and Changing Activation Type

In [91]:
epochs = 30
batch_size = 16
n_hidden = 64

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(LSTM(n_hidden, input_shape=(timesteps, input_dim)))
model.add(Dropout(0.5))
model.add(Dense(n_classes, activation='tanh'))

  super().__init__(**kwargs)


In [93]:
model.summary()

In [95]:
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [97]:
model.fit(X_train,
          y_train,
          batch_size=batch_size,
          validation_data=(X_test, y_test),
          epochs=epochs)

Epoch 1/30
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 9ms/step - accuracy: 0.0847 - loss: 24.8205 - val_accuracy: 0.1727 - val_loss: 25.6061
Epoch 2/30
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - accuracy: 0.0994 - loss: 25.5715 - val_accuracy: 0.0155 - val_loss: 27.3377
Epoch 3/30
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - accuracy: 0.1142 - loss: 25.1129 - val_accuracy: 0.2342 - val_loss: 23.2789
Epoch 4/30
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - accuracy: 0.0675 - loss: 25.1905 - val_accuracy: 0.1872 - val_loss: 22.8916
Epoch 5/30
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - accuracy: 0.1105 - loss: 24.8118 - val_accuracy: 0.1720 - val_loss: 26.9098
Epoch 6/30
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - accuracy: 0.0531 - loss: 24.5261 - val_accuracy: 0.2121 - val_loss: 24.7811
Epo

<keras.src.callbacks.history.History at 0x204d80219c0>

In [99]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [101]:
# Evaluate
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
Pred        DUSSELDORF  KASSEL  STOCKHOLM  VALENTIA
True                                               
BASEL                1     173       3507         1
BELGRADE             0      11       1081         0
BUDAPEST             0       0        214         0
DEBILT               0       0         82         0
DUSSELDORF           0       0         29         0
HEATHROW             0       0         82         0
KASSEL               0       0         11         0
LJUBLJANA            0       0         61         0
MAASTRICHT           0       0          9         0
MADRID               0       1        457         0
MUNCHENB             0       0          8         0
OSLO                 0       0          5         0
STOCKHOLM            0       0          4         0
VALENTIA             0       0          1         0


## CNN Model

In [107]:
epochs = 10
batch_size = 4
n_hidden = 4

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='softmax'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [109]:
model.summary()

In [111]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [113]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/10
4303/4303 - 7s - 2ms/step - accuracy: 0.1336 - loss: 28208.8418
Epoch 2/10
4303/4303 - 6s - 1ms/step - accuracy: 0.1304 - loss: 245570.5781
Epoch 3/10
4303/4303 - 6s - 1ms/step - accuracy: 0.1346 - loss: 797000.2500
Epoch 4/10
4303/4303 - 6s - 1ms/step - accuracy: 0.1357 - loss: 1824339.7500
Epoch 5/10
4303/4303 - 6s - 1ms/step - accuracy: 0.1368 - loss: 3410285.7500
Epoch 6/10
4303/4303 - 6s - 1ms/step - accuracy: 0.1324 - loss: 5716430.5000
Epoch 7/10
4303/4303 - 6s - 1ms/step - accuracy: 0.1384 - loss: 8799454.0000
Epoch 8/10
4303/4303 - 6s - 1ms/step - accuracy: 0.1364 - loss: 12934822.0000
Epoch 9/10
4303/4303 - 6s - 1ms/step - accuracy: 0.1372 - loss: 17995124.0000
Epoch 10/10
4303/4303 - 6s - 1ms/step - accuracy: 0.1346 - loss: 24199484.0000


<keras.src.callbacks.history.History at 0x204db241db0>

In [115]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [117]:
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Pred        BASEL  BELGRADE  DEBILT  DUSSELDORF  HEATHROW  LJUBLJANA  MADRID  \
True                                                                           
BASEL         259        14      19        1044         2         32    2266   
BELGRADE       72         0       0          14         0          0    1004   
BUDAPEST       11         0       0           1         0          0     201   
DEBILT          5         0       0           0         0          0      77   
DUSSELDORF      1         0       0           0         0          0      28   
HEATHROW        1         0       0           2         0          0      79   
KASSEL          1         0       0           0         0          0      10   
LJUBLJANA       7         0       0           0         0          0      54   
MAASTRICHT      2         0       0           0         0          0       7   
MADRID          4         0       0          

### Adjusting Hyperparameters

In [120]:
epochs = 15
batch_size = 8
n_hidden = 8

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='softmax'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [122]:
model.summary()

In [124]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [126]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/15
2152/2152 - 4s - 2ms/step - accuracy: 0.1117 - loss: 9217.1299
Epoch 2/15
2152/2152 - 3s - 1ms/step - accuracy: 0.1188 - loss: 90244.0234
Epoch 3/15
2152/2152 - 3s - 1ms/step - accuracy: 0.1212 - loss: 299616.3438
Epoch 4/15
2152/2152 - 3s - 1ms/step - accuracy: 0.1232 - loss: 643961.6250
Epoch 5/15
2152/2152 - 3s - 1ms/step - accuracy: 0.1236 - loss: 1182601.1250
Epoch 6/15
2152/2152 - 3s - 1ms/step - accuracy: 0.1262 - loss: 1911028.1250
Epoch 7/15
2152/2152 - 3s - 1ms/step - accuracy: 0.1266 - loss: 2849412.2500
Epoch 8/15
2152/2152 - 3s - 1ms/step - accuracy: 0.1274 - loss: 4057620.0000
Epoch 9/15
2152/2152 - 3s - 1ms/step - accuracy: 0.1296 - loss: 5516591.0000
Epoch 10/15
2152/2152 - 3s - 1ms/step - accuracy: 0.1254 - loss: 7352393.0000
Epoch 11/15
2152/2152 - 3s - 1ms/step - accuracy: 0.1282 - loss: 9489660.0000
Epoch 12/15
2152/2152 - 3s - 1ms/step - accuracy: 0.1289 - loss: 11970567.0000
Epoch 13/15
2152/2152 - 3s - 1ms/step - accuracy: 0.1296 - loss: 14919623.0000


<keras.src.callbacks.history.History at 0x204dd432260>

In [128]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [130]:
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Pred        BASEL  BELGRADE  DEBILT  HEATHROW  KASSEL  LJUBLJANA  MAASTRICHT  \
True                                                                           
BASEL           9      1399      18        87      29        716          78   
BELGRADE        0       837       0         1       0        126           1   
BUDAPEST        0       136       0         1       0         38           0   
DEBILT          0        45       0         0       0         20           0   
DUSSELDORF      0        12       0         0       0          8           0   
HEATHROW        0        20       0         0       0          8           0   
KASSEL          0         9       0         0       0          1           0   
LJUBLJANA       0        18       0         0       0         25           0   
MAASTRICHT      0         3       0         0       0          4           0   
MADRID          0        91       0         3