# Keras Layered Model (CNN)

## Contents:
##### 1. Importing libraries and data¶
##### 2. Data wrangling
##### 3. Data reshaping
##### 4. Splitting training and testing data
##### 5. Creating Keras model
##### 6. Compiling and running the model
##### 7. Creating confusion matrix
##### 8. CNN retrials

### 1. Import libraries and data

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import os
import operator
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from numpy import unique
from numpy import reshape
from keras.models import Sequential
from keras.layers import Conv1D, Conv2D, Dense, BatchNormalization, Flatten, MaxPooling1D, Dropout
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

import warnings
warnings.filterwarnings("ignore")

In [2]:
# Set display options to show all columns

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [3]:
# Create Path

path = r'C:\Users\lisad\OneDrive\Homework\Machine Learning with Python\Achievement 2'

In [4]:
# Import unscaled dataset

unscaled = pd.read_csv(os.path.join(path, 'Data Sets', 'Dataset-weather-prediction-dataset-processed.csv'))

In [5]:
# check shape
unscaled.shape

(22950, 170)

In [6]:
# Import pleasant weather dataset

prediction = pd.read_csv(os.path.join(path, 'Data Sets', 'Pleasant_Weather.csv'))

In [7]:
# check shape
prediction.shape

(22950, 16)

### 2. Data wrangling

In [8]:
# Remove weather stations not included in "pleasant weather" answers

unscaled = unscaled.drop(['GDANSK_cloud_cover', 'GDANSK_humidity', 'GDANSK_precipitation', 'GDANSK_snow_depth', 'GDANSK_temp_mean', 'GDANSK_temp_min', 'GDANSK_temp_max',
                        'ROMA_cloud_cover', 'ROMA_wind_speed', 'ROMA_humidity', 'ROMA_pressure', 'ROMA_sunshine', 'ROMA_temp_mean',
                        'TOURS_wind_speed', 'TOURS_humidity', 'TOURS_pressure', 'TOURS_global_radiation', 'TOURS_precipitation', 'TOURS_temp_mean', 'TOURS_temp_min', 'TOURS_temp_max'], axis=1)

In [9]:
# confirm
unscaled.shape

(22950, 149)

In [10]:
unscaled.isnull().sum()

DATE                           0
MONTH                          0
BASEL_cloud_cover              0
BASEL_wind_speed               0
BASEL_humidity                 0
BASEL_pressure                 0
BASEL_global_radiation         0
BASEL_precipitation            0
BASEL_snow_depth               0
BASEL_sunshine                 0
BASEL_temp_mean                0
BASEL_temp_min                 0
BASEL_temp_max                 0
BELGRADE_cloud_cover           0
BELGRADE_humidity              0
BELGRADE_pressure              0
BELGRADE_global_radiation      0
BELGRADE_precipitation         0
BELGRADE_sunshine              0
BELGRADE_temp_mean             0
BELGRADE_temp_min              0
BELGRADE_temp_max              0
BUDAPEST_cloud_cover           0
BUDAPEST_humidity              0
BUDAPEST_pressure              0
BUDAPEST_global_radiation      0
BUDAPEST_precipitation         0
BUDAPEST_sunshine              0
BUDAPEST_temp_mean             0
BUDAPEST_temp_min              0
BUDAPEST_t

In [11]:
# Extract the different observation types

observation_types = ['cloud_cover', 'wind_speed', 'humidity', 'pressure',
                     'global_radiation', 'precipitation', 'snow_depth', 
                     'sunshine', 'temp_mean', 'temp_min', 'temp_max']

In [12]:
# Create a dictionary to store the count of stations for each observation type
station_counts = {}

for obs in observation_types:
    # Select columns related to the current observation type
    columns = [col for col in unscaled.columns if col.endswith(obs)]
    
    # Count the number of stations (i.e., the number of columns) for the current observation type
    station_counts[obs] = len(columns)

# Print the count of stations for each observation type
print("Number of stations covered by each observation type:")
for obs, count in station_counts.items():
    print(f"{obs}: {count} stations")


Number of stations covered by each observation type:
cloud_cover: 14 stations
wind_speed: 9 stations
humidity: 14 stations
pressure: 14 stations
global_radiation: 15 stations
precipitation: 15 stations
snow_depth: 6 stations
sunshine: 15 stations
temp_mean: 15 stations
temp_min: 15 stations
temp_max: 15 stations


##### wind_speed and snow_depth are the 2 with multiple missing entries. They will be removed.

In [13]:
# Get a list of columns containing 'wind_speed' or 'snow_depth'

cols_to_drop = [col for col in unscaled.columns if '_wind_speed' in col or '_snow_depth' in col]

# Drop the columns
unscaled = unscaled.drop(cols_to_drop, axis=1)

In [14]:
unscaled.shape 

(22950, 134)

##### There is one missing entry for each of cloud_cover, humidity, and pressure. 

In [15]:
# Find the stations with the above entries missing
# Get all column names
all_columns = unscaled.columns.tolist()
# Exclude 'DATE' and 'MONTH' columns
all_columns = [col for col in all_columns if col not in ['DATE', 'MONTH']]  
# Extract unique weather station names
weather_stations = set()  # Use a set to automatically store only unique values
for col in all_columns:
    station_name = col.split('_')[0]  # Split the column name at the underscore and take the first part
    weather_stations.add(station_name)

# Print the list of weather stations
print(weather_stations)

{'MUNCHENB', 'BELGRADE', 'OSLO', 'SONNBLICK', 'VALENTIA', 'HEATHROW', 'KASSEL', 'BUDAPEST', 'DEBILT', 'BASEL', 'LJUBLJANA', 'MAASTRICHT', 'MADRID', 'STOCKHOLM', 'DUSSELDORF'}


In [16]:
# Find stations missing observation types
observation_types = ['cloud_cover', 'humidity', 'pressure']

missing_stations_by_observation = {}

for obs in observation_types:
    # Select columns related to the current observation type
    columns = [col for col in unscaled.columns if col.endswith(obs)]
    
    # Extract station names by removing the observation type from the column names
    station_names = set([col.replace(f'_{obs}', '') for col in columns])
    
    # Identify stations that are in all_stations but missing from the current observation type
    missing_stations = weather_stations - station_names
    
    # Store the missing station names in the dictionary
    missing_stations_by_observation[obs] = missing_stations

# Print the missing station names for each observation type
for obs, missing_stations in missing_stations_by_observation.items():
    print(f"\nStations missing from {obs}:")
    if missing_stations:
        for station in missing_stations:
            print(station)
    else:
        print("None")


Stations missing from cloud_cover:
KASSEL

Stations missing from humidity:
STOCKHOLM

Stations missing from pressure:
MUNCHENB


In [17]:
unscaled.head()

Unnamed: 0,DATE,MONTH,BASEL_cloud_cover,BASEL_humidity,BASEL_pressure,BASEL_global_radiation,BASEL_precipitation,BASEL_sunshine,BASEL_temp_mean,BASEL_temp_min,BASEL_temp_max,BELGRADE_cloud_cover,BELGRADE_humidity,BELGRADE_pressure,BELGRADE_global_radiation,BELGRADE_precipitation,BELGRADE_sunshine,BELGRADE_temp_mean,BELGRADE_temp_min,BELGRADE_temp_max,BUDAPEST_cloud_cover,BUDAPEST_humidity,BUDAPEST_pressure,BUDAPEST_global_radiation,BUDAPEST_precipitation,BUDAPEST_sunshine,BUDAPEST_temp_mean,BUDAPEST_temp_min,BUDAPEST_temp_max,DEBILT_cloud_cover,DEBILT_humidity,DEBILT_pressure,DEBILT_global_radiation,DEBILT_precipitation,DEBILT_sunshine,DEBILT_temp_mean,DEBILT_temp_min,DEBILT_temp_max,DUSSELDORF_cloud_cover,DUSSELDORF_humidity,DUSSELDORF_pressure,DUSSELDORF_global_radiation,DUSSELDORF_precipitation,DUSSELDORF_sunshine,DUSSELDORF_temp_mean,DUSSELDORF_temp_min,DUSSELDORF_temp_max,HEATHROW_cloud_cover,HEATHROW_humidity,HEATHROW_pressure,HEATHROW_global_radiation,HEATHROW_precipitation,HEATHROW_sunshine,HEATHROW_temp_mean,HEATHROW_temp_min,HEATHROW_temp_max,KASSEL_humidity,KASSEL_pressure,KASSEL_global_radiation,KASSEL_precipitation,KASSEL_sunshine,KASSEL_temp_mean,KASSEL_temp_min,KASSEL_temp_max,LJUBLJANA_cloud_cover,LJUBLJANA_humidity,LJUBLJANA_pressure,LJUBLJANA_global_radiation,LJUBLJANA_precipitation,LJUBLJANA_sunshine,LJUBLJANA_temp_mean,LJUBLJANA_temp_min,LJUBLJANA_temp_max,MAASTRICHT_cloud_cover,MAASTRICHT_humidity,MAASTRICHT_pressure,MAASTRICHT_global_radiation,MAASTRICHT_precipitation,MAASTRICHT_sunshine,MAASTRICHT_temp_mean,MAASTRICHT_temp_min,MAASTRICHT_temp_max,MADRID_cloud_cover,MADRID_humidity,MADRID_pressure,MADRID_global_radiation,MADRID_precipitation,MADRID_sunshine,MADRID_temp_mean,MADRID_temp_min,MADRID_temp_max,MUNCHENB_cloud_cover,MUNCHENB_humidity,MUNCHENB_global_radiation,MUNCHENB_precipitation,MUNCHENB_sunshine,MUNCHENB_temp_mean,MUNCHENB_temp_min,MUNCHENB_temp_max,OSLO_cloud_cover,OSLO_humidity,OSLO_pressure,OSLO_global_radiation,OSLO_precipitation,OSLO_sunshine,OSLO_temp_mean,OSLO_temp_min,OSLO_temp_max,SONNBLICK_cloud_cover,SONNBLICK_humidity,SONNBLICK_pressure,SONNBLICK_global_radiation,SONNBLICK_precipitation,SONNBLICK_sunshine,SONNBLICK_temp_mean,SONNBLICK_temp_min,SONNBLICK_temp_max,STOCKHOLM_cloud_cover,STOCKHOLM_pressure,STOCKHOLM_global_radiation,STOCKHOLM_precipitation,STOCKHOLM_sunshine,STOCKHOLM_temp_mean,STOCKHOLM_temp_min,STOCKHOLM_temp_max,VALENTIA_cloud_cover,VALENTIA_humidity,VALENTIA_pressure,VALENTIA_global_radiation,VALENTIA_precipitation,VALENTIA_sunshine,VALENTIA_temp_mean,VALENTIA_temp_min,VALENTIA_temp_max
0,19600101,1,7,0.85,1.018,0.32,0.09,0.7,6.5,0.8,10.9,1,0.81,1.0195,0.88,0.0,7.0,3.7,-0.9,7.9,4,0.67,1.017,0.44,0.01,2.3,2.4,-0.4,5.1,7,0.85,1.0032,0.07,0.25,0.0,9.3,7.4,11.0,8,0.83,1.0161,0.12,0.08,0.0,10.0,7.0,11.5,7,0.91,1.001,0.13,0.22,0.0,10.6,9.4,8.3,0.82,1.0094,0.28,0.48,1.6,7.9,3.9,9.4,8,1.0,1.0173,0.2,0.0,0.0,-0.6,-1.9,0.5,7,0.83,1.0063,0.22,0.32,1.0,9.5,8.5,11.1,6,0.92,1.026,0.53,0.0,1.4,7.6,4.4,10.8,5,0.67,0.2,0.1,0.0,6.9,1.1,10.4,8,0.98,0.9978,0.04,1.14,0.0,4.9,3.8,5.9,4,0.73,1.0304,0.48,0.01,2.3,-5.9,-8.5,-3.2,5,1.0114,0.05,0.32,0.0,4.2,2.2,4.9,5,0.88,1.0003,0.45,0.34,4.7,8.5,6.0,10.9
1,19600102,1,6,0.84,1.018,0.36,1.05,1.1,6.1,3.3,10.1,6,0.84,1.0172,0.25,0.0,0.0,2.9,2.2,4.4,4,0.67,1.017,0.18,0.31,0.0,2.3,1.4,3.1,8,0.9,1.0056,0.14,0.06,0.1,7.7,6.4,8.3,8,0.89,1.0161,0.18,0.66,0.5,8.2,7.4,11.0,7,0.98,1.0051,0.13,0.23,0.0,6.1,3.9,10.6,0.86,1.0086,0.12,0.27,0.0,7.7,6.8,9.1,6,0.94,1.0173,0.56,0.13,3.2,2.1,-1.3,5.5,8,0.92,1.0062,0.17,1.34,0.4,8.6,7.5,9.9,7,0.86,1.0254,0.46,0.0,0.9,9.8,7.4,12.2,6,0.72,0.61,0.3,5.1,6.2,4.2,10.2,8,0.62,1.0139,0.04,0.0,0.0,3.4,2.8,4.9,6,0.97,1.0292,0.21,0.61,0.0,-9.5,-10.5,-8.5,5,1.0114,0.05,0.06,0.0,4.0,3.0,5.0,7,0.91,1.0007,0.25,0.84,0.7,8.9,5.6,12.1
2,19600103,1,8,0.9,1.018,0.18,0.3,0.0,8.5,5.1,9.9,6,0.77,1.0179,0.67,0.0,3.5,3.1,-0.5,6.4,4,0.67,1.017,0.3,0.0,0.6,2.7,1.7,5.3,6,0.92,1.0165,0.28,0.01,3.0,6.8,4.6,9.9,7,0.95,1.0161,0.12,0.07,0.0,7.1,6.9,9.1,8,0.96,1.0166,0.15,0.07,0.1,8.4,6.1,12.2,0.91,1.0129,0.12,0.6,0.0,6.5,6.0,8.0,8,0.96,1.0173,0.2,0.12,0.0,4.6,0.9,6.3,7,0.97,1.0167,0.12,0.46,0.0,6.9,5.5,9.9,5,0.9,1.0287,0.63,0.0,2.3,8.6,6.4,10.8,6,0.91,0.2,0.3,0.0,5.8,4.0,8.0,8,0.69,1.0234,0.04,0.08,0.0,1.9,0.6,3.1,8,0.93,1.032,0.21,3.2,0.0,-9.5,-10.0,-8.9,5,1.0114,0.05,0.02,0.0,2.4,1.3,4.1,7,0.91,1.0096,0.17,0.08,0.1,10.5,8.1,12.9
3,19600104,1,3,0.92,1.018,0.58,0.0,4.1,6.3,3.8,10.6,8,0.93,1.0268,0.25,0.0,0.0,2.0,-2.0,3.0,4,0.67,1.017,0.19,0.0,0.0,2.0,0.4,4.4,8,0.95,1.0265,0.08,0.09,0.0,6.7,3.6,10.1,8,0.86,1.0161,0.12,0.02,0.0,6.8,3.6,8.0,8,0.98,1.023,0.13,0.0,0.0,9.4,6.7,8.9,0.87,1.029,0.12,0.0,0.0,5.8,5.2,6.5,6,0.94,1.0173,0.49,0.0,2.2,3.2,1.0,7.0,7,0.89,1.0277,0.16,0.0,0.3,7.0,3.0,10.0,0,0.75,1.0281,1.16,0.0,8.7,10.3,4.5,16.1,6,0.9,0.2,0.01,0.0,3.9,3.2,5.4,8,0.98,1.0244,0.04,0.35,0.0,3.0,0.4,4.9,5,0.93,1.0443,0.22,1.1,0.0,-11.5,-12.9,-10.0,5,1.0114,0.05,0.0,0.0,1.2,0.4,2.3,7,0.86,1.0184,0.13,0.98,0.0,7.4,7.3,10.6
4,19600105,1,6,0.95,1.018,0.65,0.14,5.4,3.0,-0.7,6.0,8,0.99,1.0286,0.25,0.06,0.0,2.0,0.7,2.8,4,0.67,1.017,0.19,0.0,0.0,2.5,1.1,5.3,6,0.9,1.0243,0.04,0.39,0.0,8.0,2.4,11.2,7,0.92,1.0161,0.12,0.62,0.0,7.7,6.2,11.0,5,0.84,1.0275,0.3,0.0,2.1,8.9,8.9,7.2,0.86,1.0262,0.13,0.71,0.0,5.4,3.7,6.0,7,0.94,1.0173,0.2,0.0,0.0,3.6,0.4,4.8,7,0.92,1.0259,0.12,0.56,0.0,8.1,2.5,11.1,2,0.64,1.0269,1.1,0.0,7.8,12.1,8.2,16.0,5,0.85,0.65,0.96,5.6,1.8,-3.0,6.0,8,0.96,1.0092,0.05,0.26,0.0,3.7,2.9,4.9,2,0.75,1.043,0.72,0.01,6.1,-9.3,-12.0,-6.5,5,1.0114,0.05,1.32,0.0,3.3,0.8,4.3,3,0.8,1.0328,0.46,0.0,5.7,5.7,3.0,8.4


In [18]:
# Cloud cover is the start of a stations data, Kassel is next to Heathrow, find the position of Heathrow_temp_max for the insertion of Kassel_cloud_cover
unscaled.columns.get_loc('HEATHROW_temp_max')

55

In [19]:
# Find the position for insertion of Stockholm humidity
unscaled.columns.get_loc('STOCKHOLM_cloud_cover') #humidity is 1 after cloud cover so (result +1)

117

In [20]:
# Find position for Munchenb pressure
unscaled.columns.get_loc('MUNCHENB_cloud_cover') # pressure is 2 after cloud cover so (result +2)

91

In [21]:
# Insert new columns into "unscaled" at specific positions.
# The data for these new columns is taken from weather stations they are close to

unscaled.insert(56,'KASSEL_cloud_cover', unscaled['LJUBLJANA_cloud_cover'])
unscaled.insert(119, 'STOCKHOLM_humidity', unscaled['OSLO_humidity'])
unscaled.insert(94,'MUNCHENB_pressure',unscaled['SONNBLICK_pressure'])

In [22]:
unscaled.columns.tolist()

['DATE',
 'MONTH',
 'BASEL_cloud_cover',
 'BASEL_humidity',
 'BASEL_pressure',
 'BASEL_global_radiation',
 'BASEL_precipitation',
 'BASEL_sunshine',
 'BASEL_temp_mean',
 'BASEL_temp_min',
 'BASEL_temp_max',
 'BELGRADE_cloud_cover',
 'BELGRADE_humidity',
 'BELGRADE_pressure',
 'BELGRADE_global_radiation',
 'BELGRADE_precipitation',
 'BELGRADE_sunshine',
 'BELGRADE_temp_mean',
 'BELGRADE_temp_min',
 'BELGRADE_temp_max',
 'BUDAPEST_cloud_cover',
 'BUDAPEST_humidity',
 'BUDAPEST_pressure',
 'BUDAPEST_global_radiation',
 'BUDAPEST_precipitation',
 'BUDAPEST_sunshine',
 'BUDAPEST_temp_mean',
 'BUDAPEST_temp_min',
 'BUDAPEST_temp_max',
 'DEBILT_cloud_cover',
 'DEBILT_humidity',
 'DEBILT_pressure',
 'DEBILT_global_radiation',
 'DEBILT_precipitation',
 'DEBILT_sunshine',
 'DEBILT_temp_mean',
 'DEBILT_temp_min',
 'DEBILT_temp_max',
 'DUSSELDORF_cloud_cover',
 'DUSSELDORF_humidity',
 'DUSSELDORF_pressure',
 'DUSSELDORF_global_radiation',
 'DUSSELDORF_precipitation',
 'DUSSELDORF_sunshine',
 'DUSS

In [24]:
unscaled.shape

(22950, 137)

In [26]:
# Export cleaned dataset with date and month included

unscaled.to_csv(os.path.join(path, 'Data Sets', 'unscaled_weather_cleaned_withdate.csv'), index=False)

In [27]:
# Drop unnecessary columns
unscaled.drop(['DATE', 'MONTH'], axis=1, inplace=True)

In [28]:
# confirm drop
unscaled.shape

(22950, 135)

In [29]:

prediction.head()

Unnamed: 0,DATE,BASEL_pleasant_weather,BELGRADE_pleasant_weather,BUDAPEST_pleasant_weather,DEBILT_pleasant_weather,DUSSELDORF_pleasant_weather,HEATHROW_pleasant_weather,KASSEL_pleasant_weather,LJUBLJANA_pleasant_weather,MAASTRICHT_pleasant_weather,MADRID_pleasant_weather,MUNCHENB_pleasant_weather,OSLO_pleasant_weather,SONNBLICK_pleasant_weather,STOCKHOLM_pleasant_weather,VALENTIA_pleasant_weather
0,19600101,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,19600102,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,19600103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,19600104,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,19600105,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [30]:
# Export cleaned dataset without date and month

unscaled.to_csv(os.path.join(path, 'Data Sets', 'unscaled_weather_cleaned_withoutdate.csv'), index=False)

In [33]:
# drop unneeded column from pleasant weather dataset
prediction.drop(columns = 'DATE', inplace = True)

In [34]:
prediction.shape

(22950, 15)

### 3. Data reshaping

In [35]:
# Creat an 'X' matrix by reloading and naming our data 'X'
X=pd.read_csv(os.path.join(path, 'Data Sets', 'unscaled_weather_cleaned.csv'), index_col=False)

In [36]:
X.head()

Unnamed: 0,BASEL_cloud_cover,BASEL_humidity,BASEL_pressure,BASEL_global_radiation,BASEL_precipitation,BASEL_sunshine,BASEL_temp_mean,BASEL_temp_min,BASEL_temp_max,BELGRADE_cloud_cover,BELGRADE_humidity,BELGRADE_pressure,BELGRADE_global_radiation,BELGRADE_precipitation,BELGRADE_sunshine,BELGRADE_temp_mean,BELGRADE_temp_min,BELGRADE_temp_max,BUDAPEST_cloud_cover,BUDAPEST_humidity,BUDAPEST_pressure,BUDAPEST_global_radiation,BUDAPEST_precipitation,BUDAPEST_sunshine,BUDAPEST_temp_mean,BUDAPEST_temp_min,BUDAPEST_temp_max,DEBILT_cloud_cover,DEBILT_humidity,DEBILT_pressure,DEBILT_global_radiation,DEBILT_precipitation,DEBILT_sunshine,DEBILT_temp_mean,DEBILT_temp_min,DEBILT_temp_max,DUSSELDORF_cloud_cover,DUSSELDORF_humidity,DUSSELDORF_pressure,DUSSELDORF_global_radiation,DUSSELDORF_precipitation,DUSSELDORF_sunshine,DUSSELDORF_temp_mean,DUSSELDORF_temp_min,DUSSELDORF_temp_max,HEATHROW_cloud_cover,HEATHROW_humidity,HEATHROW_pressure,HEATHROW_global_radiation,HEATHROW_precipitation,HEATHROW_sunshine,HEATHROW_temp_mean,HEATHROW_temp_min,HEATHROW_temp_max,KASSEL_cloud_cover,KASSEL_humidity,KASSEL_pressure,KASSEL_global_radiation,KASSEL_precipitation,KASSEL_sunshine,KASSEL_temp_mean,KASSEL_temp_min,KASSEL_temp_max,LJUBLJANA_cloud_cover,LJUBLJANA_humidity,LJUBLJANA_pressure,LJUBLJANA_global_radiation,LJUBLJANA_precipitation,LJUBLJANA_sunshine,LJUBLJANA_temp_mean,LJUBLJANA_temp_min,LJUBLJANA_temp_max,MAASTRICHT_cloud_cover,MAASTRICHT_humidity,MAASTRICHT_pressure,MAASTRICHT_global_radiation,MAASTRICHT_precipitation,MAASTRICHT_sunshine,MAASTRICHT_temp_mean,MAASTRICHT_temp_min,MAASTRICHT_temp_max,MADRID_cloud_cover,MADRID_humidity,MADRID_pressure,MADRID_global_radiation,MADRID_precipitation,MADRID_sunshine,MADRID_temp_mean,MADRID_temp_min,MADRID_temp_max,MUNCHENB_cloud_cover,MUNCHENB_humidity,MUNCHENB_pressure,MUNCHENB_global_radiation,MUNCHENB_precipitation,MUNCHENB_sunshine,MUNCHENB_temp_mean,MUNCHENB_temp_min,MUNCHENB_temp_max,OSLO_cloud_cover,OSLO_humidity,OSLO_pressure,OSLO_global_radiation,OSLO_precipitation,OSLO_sunshine,OSLO_temp_mean,OSLO_temp_min,OSLO_temp_max,SONNBLICK_cloud_cover,SONNBLICK_humidity,SONNBLICK_pressure,SONNBLICK_global_radiation,SONNBLICK_precipitation,SONNBLICK_sunshine,SONNBLICK_temp_mean,SONNBLICK_temp_min,SONNBLICK_temp_max,STOCKHOLM_cloud_cover,STOCKHOLM_humidity,STOCKHOLM_pressure,STOCKHOLM_global_radiation,STOCKHOLM_precipitation,STOCKHOLM_sunshine,STOCKHOLM_temp_mean,STOCKHOLM_temp_min,STOCKHOLM_temp_max,VALENTIA_cloud_cover,VALENTIA_humidity,VALENTIA_pressure,VALENTIA_global_radiation,VALENTIA_precipitation,VALENTIA_sunshine,VALENTIA_temp_mean,VALENTIA_temp_min,VALENTIA_temp_max
0,7,0.85,1.018,0.32,0.09,0.7,6.5,0.8,10.9,1,0.81,1.0195,0.88,0.0,7.0,3.7,-0.9,7.9,4,0.67,1.017,0.44,0.01,2.3,2.4,-0.4,5.1,7,0.85,1.0032,0.07,0.25,0.0,9.3,7.4,11.0,8,0.83,1.0161,0.12,0.08,0.0,10.0,7.0,11.5,7,0.91,1.001,0.13,0.22,0.0,10.6,9.4,8.3,8,0.82,1.0094,0.28,0.48,1.6,7.9,3.9,9.4,8,1.0,1.0173,0.2,0.0,0.0,-0.6,-1.9,0.5,7,0.83,1.0063,0.22,0.32,1.0,9.5,8.5,11.1,6,0.92,1.026,0.53,0.0,1.4,7.6,4.4,10.8,5,0.67,1.0304,0.2,0.1,0.0,6.9,1.1,10.4,8,0.98,0.9978,0.04,1.14,0.0,4.9,3.8,5.9,4,0.73,1.0304,0.48,0.01,2.3,-5.9,-8.5,-3.2,5,0.98,1.0114,0.05,0.32,0.0,4.2,2.2,4.9,5,0.88,1.0003,0.45,0.34,4.7,8.5,6.0,10.9
1,6,0.84,1.018,0.36,1.05,1.1,6.1,3.3,10.1,6,0.84,1.0172,0.25,0.0,0.0,2.9,2.2,4.4,4,0.67,1.017,0.18,0.31,0.0,2.3,1.4,3.1,8,0.9,1.0056,0.14,0.06,0.1,7.7,6.4,8.3,8,0.89,1.0161,0.18,0.66,0.5,8.2,7.4,11.0,7,0.98,1.0051,0.13,0.23,0.0,6.1,3.9,10.6,6,0.86,1.0086,0.12,0.27,0.0,7.7,6.8,9.1,6,0.94,1.0173,0.56,0.13,3.2,2.1,-1.3,5.5,8,0.92,1.0062,0.17,1.34,0.4,8.6,7.5,9.9,7,0.86,1.0254,0.46,0.0,0.9,9.8,7.4,12.2,6,0.72,1.0292,0.61,0.3,5.1,6.2,4.2,10.2,8,0.62,1.0139,0.04,0.0,0.0,3.4,2.8,4.9,6,0.97,1.0292,0.21,0.61,0.0,-9.5,-10.5,-8.5,5,0.62,1.0114,0.05,0.06,0.0,4.0,3.0,5.0,7,0.91,1.0007,0.25,0.84,0.7,8.9,5.6,12.1
2,8,0.9,1.018,0.18,0.3,0.0,8.5,5.1,9.9,6,0.77,1.0179,0.67,0.0,3.5,3.1,-0.5,6.4,4,0.67,1.017,0.3,0.0,0.6,2.7,1.7,5.3,6,0.92,1.0165,0.28,0.01,3.0,6.8,4.6,9.9,7,0.95,1.0161,0.12,0.07,0.0,7.1,6.9,9.1,8,0.96,1.0166,0.15,0.07,0.1,8.4,6.1,12.2,8,0.91,1.0129,0.12,0.6,0.0,6.5,6.0,8.0,8,0.96,1.0173,0.2,0.12,0.0,4.6,0.9,6.3,7,0.97,1.0167,0.12,0.46,0.0,6.9,5.5,9.9,5,0.9,1.0287,0.63,0.0,2.3,8.6,6.4,10.8,6,0.91,1.032,0.2,0.3,0.0,5.8,4.0,8.0,8,0.69,1.0234,0.04,0.08,0.0,1.9,0.6,3.1,8,0.93,1.032,0.21,3.2,0.0,-9.5,-10.0,-8.9,5,0.69,1.0114,0.05,0.02,0.0,2.4,1.3,4.1,7,0.91,1.0096,0.17,0.08,0.1,10.5,8.1,12.9
3,3,0.92,1.018,0.58,0.0,4.1,6.3,3.8,10.6,8,0.93,1.0268,0.25,0.0,0.0,2.0,-2.0,3.0,4,0.67,1.017,0.19,0.0,0.0,2.0,0.4,4.4,8,0.95,1.0265,0.08,0.09,0.0,6.7,3.6,10.1,8,0.86,1.0161,0.12,0.02,0.0,6.8,3.6,8.0,8,0.98,1.023,0.13,0.0,0.0,9.4,6.7,8.9,6,0.87,1.029,0.12,0.0,0.0,5.8,5.2,6.5,6,0.94,1.0173,0.49,0.0,2.2,3.2,1.0,7.0,7,0.89,1.0277,0.16,0.0,0.3,7.0,3.0,10.0,0,0.75,1.0281,1.16,0.0,8.7,10.3,4.5,16.1,6,0.9,1.0443,0.2,0.01,0.0,3.9,3.2,5.4,8,0.98,1.0244,0.04,0.35,0.0,3.0,0.4,4.9,5,0.93,1.0443,0.22,1.1,0.0,-11.5,-12.9,-10.0,5,0.98,1.0114,0.05,0.0,0.0,1.2,0.4,2.3,7,0.86,1.0184,0.13,0.98,0.0,7.4,7.3,10.6
4,6,0.95,1.018,0.65,0.14,5.4,3.0,-0.7,6.0,8,0.99,1.0286,0.25,0.06,0.0,2.0,0.7,2.8,4,0.67,1.017,0.19,0.0,0.0,2.5,1.1,5.3,6,0.9,1.0243,0.04,0.39,0.0,8.0,2.4,11.2,7,0.92,1.0161,0.12,0.62,0.0,7.7,6.2,11.0,5,0.84,1.0275,0.3,0.0,2.1,8.9,8.9,7.2,7,0.86,1.0262,0.13,0.71,0.0,5.4,3.7,6.0,7,0.94,1.0173,0.2,0.0,0.0,3.6,0.4,4.8,7,0.92,1.0259,0.12,0.56,0.0,8.1,2.5,11.1,2,0.64,1.0269,1.1,0.0,7.8,12.1,8.2,16.0,5,0.85,1.043,0.65,0.96,5.6,1.8,-3.0,6.0,8,0.96,1.0092,0.05,0.26,0.0,3.7,2.9,4.9,2,0.75,1.043,0.72,0.01,6.1,-9.3,-12.0,-6.5,5,0.96,1.0114,0.05,1.32,0.0,3.3,0.8,4.3,3,0.8,1.0328,0.46,0.0,5.7,5.7,3.0,8.4


In [37]:
y = prediction

In [38]:
X.shape

(22950, 135)

In [39]:
# Turn X and y into arrays
X = np.array(X)
y = np.array(y)
X

array([[ 7.    ,  0.85  ,  1.018 , ...,  8.5   ,  6.    , 10.9   ],
       [ 6.    ,  0.84  ,  1.018 , ...,  8.9   ,  5.6   , 12.1   ],
       [ 8.    ,  0.9   ,  1.018 , ..., 10.5   ,  8.1   , 12.9   ],
       ...,
       [ 4.    ,  0.76  ,  1.0227, ..., 10.7   ,  7.9   , 13.5   ],
       [ 5.    ,  0.8   ,  1.0212, ..., 10.7   ,  7.9   , 13.5   ],
       [ 5.    ,  0.84  ,  1.0193, ..., 10.7   ,  7.9   , 13.5   ]],
      shape=(22950, 135))

In [40]:
X = X.reshape(-1,15,9)

In [41]:
# Verify Shape
X.shape

(22950, 15, 9)

In [42]:
# Verify Shape
y.shape

(22950, 15)

In [43]:
X

array([[[  7.    ,   0.85  ,   1.018 , ...,   6.5   ,   0.8   ,
          10.9   ],
        [  1.    ,   0.81  ,   1.0195, ...,   3.7   ,  -0.9   ,
           7.9   ],
        [  4.    ,   0.67  ,   1.017 , ...,   2.4   ,  -0.4   ,
           5.1   ],
        ...,
        [  4.    ,   0.73  ,   1.0304, ...,  -5.9   ,  -8.5   ,
          -3.2   ],
        [  5.    ,   0.98  ,   1.0114, ...,   4.2   ,   2.2   ,
           4.9   ],
        [  5.    ,   0.88  ,   1.0003, ...,   8.5   ,   6.    ,
          10.9   ]],

       [[  6.    ,   0.84  ,   1.018 , ...,   6.1   ,   3.3   ,
          10.1   ],
        [  6.    ,   0.84  ,   1.0172, ...,   2.9   ,   2.2   ,
           4.4   ],
        [  4.    ,   0.67  ,   1.017 , ...,   2.3   ,   1.4   ,
           3.1   ],
        ...,
        [  6.    ,   0.97  ,   1.0292, ...,  -9.5   , -10.5   ,
          -8.5   ],
        [  5.    ,   0.62  ,   1.0114, ...,   4.    ,   3.    ,
           5.    ],
        [  7.    ,   0.91  ,   1.0007, ...,   8.

### 4. Splitting training and testing data

In [44]:
# Split data into train and test sets

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state = 42)

In [45]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(17212, 15, 9) (17212, 15)
(5738, 15, 9) (5738, 15)


### 5. Creating Keras model

In [53]:
epochs = 10
batch_size = 4
n_hidden = 4

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='softmax')) # Options: sigmoid, tanh, softmax, relu

In [54]:
model.summary()

### 6. Compiling and running the model

In [55]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [56]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/10
4303/4303 - 8s - 2ms/step - accuracy: 0.1286 - loss: 13106.0039
Epoch 2/10
4303/4303 - 7s - 2ms/step - accuracy: 0.1317 - loss: 118195.1328
Epoch 3/10
4303/4303 - 7s - 2ms/step - accuracy: 0.1323 - loss: 382819.1875
Epoch 4/10
4303/4303 - 7s - 2ms/step - accuracy: 0.1334 - loss: 873586.6250
Epoch 5/10
4303/4303 - 7s - 2ms/step - accuracy: 0.1371 - loss: 1616127.1250
Epoch 6/10
4303/4303 - 7s - 2ms/step - accuracy: 0.1369 - loss: 2724686.2500
Epoch 7/10
4303/4303 - 6s - 1ms/step - accuracy: 0.1285 - loss: 4204708.0000
Epoch 8/10
4303/4303 - 6s - 1ms/step - accuracy: 0.1302 - loss: 6051809.5000
Epoch 9/10
4303/4303 - 7s - 2ms/step - accuracy: 0.1336 - loss: 8456878.0000
Epoch 10/10
4303/4303 - 6s - 1ms/step - accuracy: 0.1366 - loss: 11333088.0000


<keras.src.callbacks.history.History at 0x1520be7e490>

### 7. Creating confusion matrix

In [57]:
# Define list of stations names

stations = {
0: 'BASEL',
1: 'BELGRADE',
2: 'BUDAPEST',
3: 'DEBILT',
4: 'DUSSELDORF',
5: 'HEATHROW',
6: 'KASSEL',
7: 'LJUBLJANA',
8: 'MAASTRICHT',
9: 'MADRID',
10: 'MUNCHENB',
11: 'OSLO',
12: 'SONNBLICK',
13: 'STOCKHOLM',
14: 'VALENTIA'

}

In [58]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [59]:
# Evaluate
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Pred        BASEL  BELGRADE  BUDAPEST  DEBILT  HEATHROW  MAASTRICHT  MADRID  \
True                                                                          
BASEL           2      1322        75      18      1650           6     144   
BELGRADE        0       790         0       0       145           0      37   
BUDAPEST        0       123         0       0        52           0      21   
DEBILT          0        38         0       0        33           0       1   
DUSSELDORF      0        11         0       0        13           0       2   
HEATHROW        0        15         0       0        51           0      10   
KASSEL          0         7         0       0         4           0       0   
LJUBLJANA       0        29         0       0        14           0      18   
MAASTRICHT      0         2         0       0         7           0       0   
MADRID          1       104         1       0       206 

### 8. CNN retrials

In [60]:
epochs = 20
batch_size = 8
n_hidden = 8

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='softmax')) # Options: sigmoid, tanh, softmax, relu

In [61]:
model.summary()

In [62]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [63]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/20
2152/2152 - 4s - 2ms/step - accuracy: 0.1147 - loss: 10313.1494
Epoch 2/20
2152/2152 - 3s - 2ms/step - accuracy: 0.1236 - loss: 90579.9688
Epoch 3/20
2152/2152 - 3s - 2ms/step - accuracy: 0.1318 - loss: 311653.8438
Epoch 4/20
2152/2152 - 3s - 2ms/step - accuracy: 0.1237 - loss: 632103.5000
Epoch 5/20
2152/2152 - 3s - 2ms/step - accuracy: 0.1301 - loss: 1170589.5000
Epoch 6/20
2152/2152 - 4s - 2ms/step - accuracy: 0.1291 - loss: 1914333.3750
Epoch 7/20
2152/2152 - 4s - 2ms/step - accuracy: 0.1317 - loss: 2862101.0000
Epoch 8/20
2152/2152 - 3s - 2ms/step - accuracy: 0.1320 - loss: 4103921.7500
Epoch 9/20
2152/2152 - 3s - 2ms/step - accuracy: 0.1307 - loss: 5585829.5000
Epoch 10/20
2152/2152 - 4s - 2ms/step - accuracy: 0.1301 - loss: 7424013.0000
Epoch 11/20
2152/2152 - 4s - 2ms/step - accuracy: 0.1298 - loss: 9596111.0000
Epoch 12/20
2152/2152 - 4s - 2ms/step - accuracy: 0.1328 - loss: 12271355.0000
Epoch 13/20
2152/2152 - 4s - 2ms/step - accuracy: 0.1274 - loss: 14934322.0000

<keras.src.callbacks.history.History at 0x1520be7f890>

In [64]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [65]:
# Evaluate

print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Pred        BELGRADE  DUSSELDORF  HEATHROW  KASSEL  LJUBLJANA  MAASTRICHT  \
True                                                                        
BASEL            214        2018        13      89         19          73   
BELGRADE          79         579         1      13          0           0   
BUDAPEST           7         117         1       0          0           0   
DEBILT             0          69         0       0          0           0   
DUSSELDORF         0          22         0       0          0           0   
HEATHROW           1          46         3       0          0           0   
KASSEL             1           7         0       0          0           0   
LJUBLJANA          5          12         0       0          0           0   
MAASTRICHT         0           6         0       0          0           0   
MADRID             4         135         1       4          0           2   
M

##### Trying with tanh, sigmoid, and relu; keeping epochs, batch size and hidden layers the same.

In [66]:
# tanh
epochs = 20
batch_size = 8
n_hidden = 8

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='tanh')) # Options: sigmoid, tanh, softmax, relu

In [67]:
model.summary()

In [68]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [69]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/20
2152/2152 - 4s - 2ms/step - accuracy: 0.0507 - loss: 22.9417
Epoch 2/20
2152/2152 - 3s - 2ms/step - accuracy: 0.0528 - loss: 25.6904
Epoch 3/20
2152/2152 - 4s - 2ms/step - accuracy: 0.0446 - loss: 25.8009
Epoch 4/20
2152/2152 - 4s - 2ms/step - accuracy: 0.0357 - loss: 25.8140
Epoch 5/20
2152/2152 - 4s - 2ms/step - accuracy: 0.0357 - loss: 25.8140
Epoch 6/20
2152/2152 - 4s - 2ms/step - accuracy: 0.0356 - loss: 25.8140
Epoch 7/20
2152/2152 - 3s - 2ms/step - accuracy: 0.0356 - loss: 25.8140
Epoch 8/20
2152/2152 - 4s - 2ms/step - accuracy: 0.0356 - loss: 25.8140
Epoch 9/20
2152/2152 - 4s - 2ms/step - accuracy: 0.0356 - loss: 25.8140
Epoch 10/20
2152/2152 - 3s - 2ms/step - accuracy: 0.0357 - loss: 25.8140
Epoch 11/20
2152/2152 - 3s - 2ms/step - accuracy: 0.0357 - loss: 25.8140
Epoch 12/20
2152/2152 - 3s - 2ms/step - accuracy: 0.0359 - loss: 25.8140
Epoch 13/20
2152/2152 - 3s - 2ms/step - accuracy: 0.0358 - loss: 25.8159
Epoch 14/20
2152/2152 - 3s - 2ms/step - accuracy: 0.0359 - l

<keras.src.callbacks.history.History at 0x15201b688a0>

In [70]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [71]:
# Evaluate

print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Pred        BUDAPEST  DEBILT  KASSEL  MAASTRICHT  MADRID  MUNCHENB  VALENTIA
True                                                                        
BASEL           1554      43     202         676    1128        74         5
BELGRADE         971      15      65          21      19         1         0
BUDAPEST         203       3       7           1       0         0         0
DEBILT            78       1       3           0       0         0         0
DUSSELDORF        26       0       3           0       0         0         0
HEATHROW          61       9      10           2       0         0         0
KASSEL            10       1       0           0       0         0         0
LJUBLJANA         46       3      11           1       0         0         0
MAASTRICHT         2       1       6           0       0         0         0
MADRID           157      25     127         132      17         0         0
M

In [72]:
# sigmoid
epochs = 20
batch_size = 8
n_hidden = 8

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='sigmoid')) # Options: sigmoid, tanh, softmax, relu

In [73]:
model.summary()

In [74]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [75]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/20
2152/2152 - 4s - 2ms/step - accuracy: 0.6250 - loss: 8055.6646
Epoch 2/20
2152/2152 - 3s - 2ms/step - accuracy: 0.6434 - loss: 82826.2812
Epoch 3/20
2152/2152 - 3s - 2ms/step - accuracy: 0.6433 - loss: 261887.5469
Epoch 4/20
2152/2152 - 3s - 2ms/step - accuracy: 0.6432 - loss: 567314.2500
Epoch 5/20
2152/2152 - 3s - 2ms/step - accuracy: 0.6434 - loss: 1029808.5625
Epoch 6/20
2152/2152 - 3s - 2ms/step - accuracy: 0.6433 - loss: 1637887.0000
Epoch 7/20
2152/2152 - 3s - 2ms/step - accuracy: 0.6434 - loss: 2485490.0000
Epoch 8/20
2152/2152 - 3s - 2ms/step - accuracy: 0.6434 - loss: 3553445.7500
Epoch 9/20
2152/2152 - 4s - 2ms/step - accuracy: 0.6434 - loss: 4816032.0000
Epoch 10/20
2152/2152 - 4s - 2ms/step - accuracy: 0.6434 - loss: 6419264.5000
Epoch 11/20
2152/2152 - 4s - 2ms/step - accuracy: 0.6434 - loss: 8249144.0000
Epoch 12/20
2152/2152 - 3s - 2ms/step - accuracy: 0.6434 - loss: 10473919.0000
Epoch 13/20
2152/2152 - 3s - 2ms/step - accuracy: 0.6434 - loss: 12977730.0000


<keras.src.callbacks.history.History at 0x15201b6a2c0>

In [76]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [77]:
# Evaluate

print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Pred        BASEL  VALENTIA
True                       
BASEL        3678         4
BELGRADE     1092         0
BUDAPEST      214         0
DEBILT         82         0
DUSSELDORF     29         0
HEATHROW       82         0
KASSEL         11         0
LJUBLJANA      61         0
MAASTRICHT      9         0
MADRID        458         0
MUNCHENB        8         0
OSLO            5         0
STOCKHOLM       4         0
VALENTIA        1         0


In [78]:
# relu
epochs = 20
batch_size = 8
n_hidden = 8

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='relu')) # Options: sigmoid, tanh, softmax, relu

In [79]:
model.summary()

In [80]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [81]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/20
2152/2152 - 5s - 2ms/step - accuracy: 0.0845 - loss: 12.6905
Epoch 2/20
2152/2152 - 4s - 2ms/step - accuracy: 0.2092 - loss: nan
Epoch 3/20
2152/2152 - 4s - 2ms/step - accuracy: 0.6440 - loss: nan
Epoch 4/20
2152/2152 - 4s - 2ms/step - accuracy: 0.6440 - loss: nan
Epoch 5/20
2152/2152 - 4s - 2ms/step - accuracy: 0.6440 - loss: nan
Epoch 6/20
2152/2152 - 4s - 2ms/step - accuracy: 0.6440 - loss: nan
Epoch 7/20
2152/2152 - 4s - 2ms/step - accuracy: 0.6440 - loss: nan
Epoch 8/20
2152/2152 - 4s - 2ms/step - accuracy: 0.6440 - loss: nan
Epoch 9/20
2152/2152 - 4s - 2ms/step - accuracy: 0.6440 - loss: nan
Epoch 10/20
2152/2152 - 4s - 2ms/step - accuracy: 0.6440 - loss: nan
Epoch 11/20
2152/2152 - 4s - 2ms/step - accuracy: 0.6440 - loss: nan
Epoch 12/20
2152/2152 - 4s - 2ms/step - accuracy: 0.6440 - loss: nan
Epoch 13/20
2152/2152 - 4s - 2ms/step - accuracy: 0.6440 - loss: nan
Epoch 14/20
2152/2152 - 4s - 2ms/step - accuracy: 0.6440 - loss: nan
Epoch 15/20
2152/2152 - 3s - 2ms/step -

<keras.src.callbacks.history.History at 0x15210167d10>

In [82]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [83]:
# Evaluate

print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Pred        BASEL
True             
BASEL        3682
BELGRADE     1092
BUDAPEST      214
DEBILT         82
DUSSELDORF     29
HEATHROW       82
KASSEL         11
LJUBLJANA      61
MAASTRICHT      9
MADRID        458
MUNCHENB        8
OSLO            5
STOCKHOLM       4
VALENTIA        1


##### Softmax recognized the highest number of stations, so I'm going back to that activation and adjusting numbers to explore further. 

In [84]:
epochs = 30
batch_size = 16
n_hidden = 16

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='softmax')) # Options: sigmoid, tanh, softmax, relu

In [85]:
model.summary()

In [86]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [87]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/30
1076/1076 - 3s - 2ms/step - accuracy: 0.1016 - loss: 1594.1324
Epoch 2/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1269 - loss: 18389.1621
Epoch 3/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1304 - loss: 62678.9258
Epoch 4/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1354 - loss: 140029.5625
Epoch 5/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1362 - loss: 258640.5312
Epoch 6/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1366 - loss: 398247.4688
Epoch 7/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1379 - loss: 600168.9375
Epoch 8/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1382 - loss: 829549.3750
Epoch 9/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1321 - loss: 1116144.8750
Epoch 10/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1350 - loss: 1462159.5000
Epoch 11/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1328 - loss: 1864353.2500
Epoch 12/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1345 - loss: 2312617.5000
Epoch 13/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1332 - loss: 2840677.0000
Epoch 1

<keras.src.callbacks.history.History at 0x152101b1040>

In [88]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [89]:
# Evaluate

print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Pred        BELGRADE  BUDAPEST  DEBILT  DUSSELDORF  HEATHROW  KASSEL  \
True                                                                   
BASEL              1      1496      40         919        18     285   
BELGRADE           0       571       1         300         5      47   
BUDAPEST           0        89       2          62         0      15   
DEBILT             0        26       3          28         0       9   
DUSSELDORF         0         5       0          14         1       2   
HEATHROW           0         9       1          23         1      17   
KASSEL             0         5       0           3         0       2   
LJUBLJANA          0        26       1           8         0       2   
MAASTRICHT         0         4       0           3         0       0   
MADRID             0       101      11          99        10      45   
MUNCHENB           0         7       0           0         0 

In [90]:
epochs = 30
batch_size = 16
n_hidden = 32

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='softmax')) # Options: sigmoid, tanh, softmax, relu

In [91]:
model.summary()

In [92]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [93]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/30
1076/1076 - 3s - 3ms/step - accuracy: 0.1162 - loss: 5014.6328
Epoch 2/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1260 - loss: 51841.8945
Epoch 3/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1276 - loss: 168736.2500
Epoch 4/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1291 - loss: 373501.0000
Epoch 5/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1280 - loss: 654931.7500
Epoch 6/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1293 - loss: 1047205.0625
Epoch 7/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1297 - loss: 1519238.0000
Epoch 8/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1331 - loss: 2112881.7500
Epoch 9/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1336 - loss: 2863987.2500
Epoch 10/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1338 - loss: 3644012.7500
Epoch 11/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1307 - loss: 4593299.5000
Epoch 12/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1294 - loss: 5780297.5000
Epoch 13/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1292 - loss: 7081434.0000
Epo

<keras.src.callbacks.history.History at 0x152101b38a0>

In [94]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [95]:
# Evaluate

print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Pred        BELGRADE  BUDAPEST  DEBILT  DUSSELDORF  HEATHROW  KASSEL  \
True                                                                   
BASEL           1566        94     155          54        22     250   
BELGRADE         745         0       6           0         0       6   
BUDAPEST         111         0       7           0         0       0   
DEBILT            40         0       2           0         0       1   
DUSSELDORF         7         0       0           0         0       0   
HEATHROW          13         0       3           1         0       0   
KASSEL             7         0       1           0         0       0   
LJUBLJANA         28         0       1           0         0       0   
MAASTRICHT         4         0       0           0         0       0   
MADRID            81         3      16           2         0      10   
MUNCHENB           8         0       0           0         0 

In [102]:
epochs = 30
batch_size = 32
n_hidden = 32

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='relu')) # Options: sigmoid, tanh, softmax, relu

In [103]:
model.summary()

In [104]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [105]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/30
538/538 - 2s - 4ms/step - accuracy: 0.0894 - loss: 19.8430
Epoch 2/30
538/538 - 1s - 2ms/step - accuracy: 0.5012 - loss: nan
Epoch 3/30
538/538 - 1s - 2ms/step - accuracy: 0.6440 - loss: nan
Epoch 4/30
538/538 - 1s - 2ms/step - accuracy: 0.6440 - loss: nan
Epoch 5/30
538/538 - 1s - 2ms/step - accuracy: 0.6440 - loss: nan
Epoch 6/30
538/538 - 1s - 2ms/step - accuracy: 0.6440 - loss: nan
Epoch 7/30
538/538 - 1s - 2ms/step - accuracy: 0.6440 - loss: nan
Epoch 8/30
538/538 - 1s - 2ms/step - accuracy: 0.6440 - loss: nan
Epoch 9/30
538/538 - 1s - 2ms/step - accuracy: 0.6440 - loss: nan
Epoch 10/30
538/538 - 1s - 2ms/step - accuracy: 0.6440 - loss: nan
Epoch 11/30
538/538 - 1s - 2ms/step - accuracy: 0.6440 - loss: nan
Epoch 12/30
538/538 - 1s - 2ms/step - accuracy: 0.6440 - loss: nan
Epoch 13/30
538/538 - 1s - 2ms/step - accuracy: 0.6440 - loss: nan
Epoch 14/30
538/538 - 1s - 2ms/step - accuracy: 0.6440 - loss: nan
Epoch 15/30
538/538 - 1s - 2ms/step - accuracy: 0.6440 - loss: nan


<keras.src.callbacks.history.History at 0x15212068550>

In [106]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [107]:
# Evaluate

print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Pred        BASEL
True             
BASEL        3682
BELGRADE     1092
BUDAPEST      214
DEBILT         82
DUSSELDORF     29
HEATHROW       82
KASSEL         11
LJUBLJANA      61
MAASTRICHT      9
MADRID        458
MUNCHENB        8
OSLO            5
STOCKHOLM       4
VALENTIA        1
