# 1. Importing data and libraries

In [81]:
import os
import time
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# TensorFlow and Keras imports
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam, SGD, RMSprop, Adadelta, Adagrad, Adamax, Nadam, Ftrl
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.utils import to_categorical

# Scikit-learn imports
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, StratifiedKFold, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, make_scorer
from sklearn.tree import plot_tree
from scipy.stats import randint

# Bayesian Optimization
from bayes_opt import BayesianOptimization

# Keras layers
from keras.layers import LeakyReLU

# Set default LeakyReLU
LeakyReLU = LeakyReLU(negative_slope=0.1)

# Ignore warnings
warnings.filterwarnings('ignore')

# Data processing and other utilities
from numpy import argmax, reshape, unique

In [82]:
# Create a path
path = r'/Users/marcela/Library/CloudStorage/OneDrive-Personal/CF/Machine Learning/Achievement/Data Sets'

In [83]:
# Delimiting columns displayed
pd.options.display.max_columns = None

In [84]:
# Import pleasant weather answers data set
df_pleasant_weather = pd.read_csv(os.path.join(path, 'Dataset-Answers-Weather_Prediction_Pleasant_Weather.csv'), index_col = False)

In [85]:
# Import unscaled mean temperatures data set
df_unscaled = pd.read_csv(os.path.join(path, 'Dataset-weather-prediction-dataset-processed.csv'), index_col = False)

In [86]:
df_unscaled.shape

(22950, 170)

In [87]:
df_pleasant_weather.shape

(22950, 16)

# 2. Data Wrangling

In [88]:
# Dropping all columns regarding Gdansk, Roma, Tours from unscaled, as they aren't included in pleasant_weather

columns_to_drop = ['GDANSK_cloud_cover', 'GDANSK_humidity', 'GDANSK_precipitation', 
                   'GDANSK_snow_depth', 'GDANSK_temp_mean', 'GDANSK_temp_min', 
                   'GDANSK_temp_max', 'ROMA_cloud_cover', 'ROMA_wind_speed', 
                   'ROMA_humidity', 'ROMA_pressure', 'ROMA_sunshine', 'ROMA_temp_mean',
                   'TOURS_wind_speed', 'TOURS_humidity', 'TOURS_pressure',
                   'TOURS_global_radiation', 'TOURS_precipitation', 'TOURS_temp_mean', 
                   'TOURS_temp_min', 'TOURS_temp_max']

# Using the drop() method to drop the specified columns
df_unscaled.drop(columns=columns_to_drop, inplace=True)

In [89]:
df_unscaled.shape 

(22950, 149)

In [90]:
df_unscaled.drop(columns = ['DATE', 'MONTH'], inplace = True)

In [91]:
df_pleasant_weather.drop(columns = 'DATE', inplace = True) 

In [92]:
df_pleasant_weather.shape

(22950, 15)

In [93]:
further_drops = ['BASEL_snow_depth',  'DUSSELDORF_snow_depth', 'HEATHROW_snow_depth',
                  'MUNCHENB_snow_depth', 'OSLO_snow_depth',  'VALENTIA_snow_depth',
                 'BASEL_wind_speed', 'DEBILT_wind_speed', 'DUSSELDORF_wind_speed',
                  'KASSEL_wind_speed', 'LJUBLJANA_wind_speed',  'MAASTRICHT_wind_speed',
                  'MADRID_wind_speed', 'OSLO_wind_speed','SONNBLICK_wind_speed',]

df_unscaled.drop(columns=further_drops, inplace=True)

In [94]:
# Filling missing observations using nearby stations with similar weather:
# Ljubljana -> Kassel
# Sonnblick -> Munchen
# Oslo -> Stockholm

df_unscaled.columns.get_loc('HEATHROW_temp_max')

53

In [95]:
df_unscaled.columns.get_loc('MUNCHENB_humidity')

90

In [96]:
df_unscaled.columns.get_loc('STOCKHOLM_cloud_cover')

115

In [97]:
df_unscaled.insert(54,'KASSEL_cloud_cover', df_unscaled['LJUBLJANA_cloud_cover'])
df_unscaled.insert(92,'MUNCHENB_pressure', df_unscaled['SONNBLICK_pressure'])
df_unscaled.insert(118, 'STOCKHOLM_humidity', df_unscaled['OSLO_humidity'])
df_unscaled.columns.tolist()

['BASEL_cloud_cover',
 'BASEL_humidity',
 'BASEL_pressure',
 'BASEL_global_radiation',
 'BASEL_precipitation',
 'BASEL_sunshine',
 'BASEL_temp_mean',
 'BASEL_temp_min',
 'BASEL_temp_max',
 'BELGRADE_cloud_cover',
 'BELGRADE_humidity',
 'BELGRADE_pressure',
 'BELGRADE_global_radiation',
 'BELGRADE_precipitation',
 'BELGRADE_sunshine',
 'BELGRADE_temp_mean',
 'BELGRADE_temp_min',
 'BELGRADE_temp_max',
 'BUDAPEST_cloud_cover',
 'BUDAPEST_humidity',
 'BUDAPEST_pressure',
 'BUDAPEST_global_radiation',
 'BUDAPEST_precipitation',
 'BUDAPEST_sunshine',
 'BUDAPEST_temp_mean',
 'BUDAPEST_temp_min',
 'BUDAPEST_temp_max',
 'DEBILT_cloud_cover',
 'DEBILT_humidity',
 'DEBILT_pressure',
 'DEBILT_global_radiation',
 'DEBILT_precipitation',
 'DEBILT_sunshine',
 'DEBILT_temp_mean',
 'DEBILT_temp_min',
 'DEBILT_temp_max',
 'DUSSELDORF_cloud_cover',
 'DUSSELDORF_humidity',
 'DUSSELDORF_pressure',
 'DUSSELDORF_global_radiation',
 'DUSSELDORF_precipitation',
 'DUSSELDORF_sunshine',
 'DUSSELDORF_temp_mean',


In [98]:
df_unscaled.shape

(22950, 135)

In [99]:
df_unscaled.to_pickle(os.path.join(path, 'X_cleaned.pkl'))

# 3. Reshaping for ML modeling

In [139]:
X = pd.read_pickle(os.path.join(path, 'X_cleaned.pkl'))

In [140]:
y = df_pleasant_weather

In [141]:
X.shape

(22950, 135)

In [142]:
y.shape

(22950, 15)

In [144]:
# Turning X and y from a df to arrays
X = np.array(X)
y = np.array(y)

In [145]:
# Reshape X to (22950, 15, 9)
X = X.reshape(-1, 15, 9)
print(X.shape)

(22950, 15, 9)


In [146]:
# Reshape y to (22950,)
y = np.argmax(y, axis=1)
print("y shape after argmax:", y.shape)

y shape after argmax: (22950,)


In [147]:
# Check unique values in y to ensure it's correct
print("Unique values in y:", np.unique(y))

Unique values in y: [ 0  1  2  3  4  5  6  7  8  9 10 11 13 14]


In [148]:
# Check shape
y.shape

(22950,)

# 4. Splitting data (training and test sets)

In [149]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 127)

In [150]:
type_of_target(y)

'multiclass'

# 5. Bayesian hyperparameter optimization

In [151]:
# Determine the number of time steps for the input data
timesteps = X_train.shape[1]

# Determine the dimensionality of the input data
input_dim = X_train.shape[2]

# Specify the number of classes for the target variable
n_classes = 15  

# Create a scorer for accuracy
score_acc = make_scorer(accuracy_score)

In [152]:
# Create function
def bay_area(neurons, activation, kernel, optimizer, learning_rate, batch_size, epochs, layers1, layers2, normalization, dropout, dropout_rate):
    optimizerL = ['SGD', 'Adam', 'RMSprop', 'Adadelta', 'Adagrad', 'Adamax', 'Nadam', 'Ftrl', 'SGD']
    activationL = ['relu', 'sigmoid', 'softplus', 'softsign', 'tanh', 'selu', 'elu', 'exponential', LeakyReLU, 'relu']
    
    neurons = round(neurons)
    kernel = round(kernel)
    activation = activationL[round(activation)]
    optimizer_name = optimizerL[round(optimizer)]
    batch_size = round(batch_size)
    epochs = round(epochs)
    layers1 = round(layers1)
    layers2 = round(layers2)

    def cnn_model():
        model = Sequential()
        model.add(Input(shape=(timesteps, input_dim)))
        model.add(Conv1D(neurons, kernel_size=kernel, activation=activation))
        if normalization > 0.5:
            model.add(BatchNormalization())
        for i in range(layers1):
            model.add(Dense(neurons, activation=activation))
        if dropout > 0.5:
            model.add(Dropout(dropout_rate, seed=127))
        for i in range(layers2):
            model.add(Dense(neurons, activation=activation))
        model.add(MaxPooling1D())
        model.add(Flatten())
        model.add(Dense(n_classes, activation='softmax'))  # sigmoid softmax
        
        # Create a new optimizer instance for each iteration
        if optimizer_name == 'Adam':
            optimizer_instance = Adam(learning_rate=learning_rate)
        elif optimizer_name == 'SGD':
            optimizer_instance = SGD(learning_rate=learning_rate)
        elif optimizer_name == 'RMSprop':
            optimizer_instance = RMSprop(learning_rate=learning_rate)
        elif optimizer_name == 'Adadelta':
            optimizer_instance = Adadelta(learning_rate=learning_rate)
        elif optimizer_name == 'Adagrad':
            optimizer_instance = Adagrad(learning_rate=learning_rate)
        elif optimizer_name == 'Adamax':
            optimizer_instance = Adamax(learning_rate=learning_rate)
        elif optimizer_name == 'Nadam':
            optimizer_instance = Nadam(learning_rate=learning_rate)
        elif optimizer_name == 'Ftrl':
            optimizer_instance = Ftrl(learning_rate=learning_rate)
        
        model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer_instance, metrics=['accuracy'])
        return model

    # K-fold cross-validation
    es = EarlyStopping(monitor='accuracy', mode='max', verbose=1, patience=20)
    kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=127)
    results = []
    for train, test in kfold.split(X, y):
        model = cnn_model()
        model.fit(X[train], y[train], epochs=epochs, batch_size=batch_size, verbose=0, callbacks=[es])
        scores = model.evaluate(X[test], y[test], verbose=1)
        results.append(scores[1])  # Assuming accuracy is the second metric
    return np.mean(results)

In [153]:
# Start timing the Bayesian Optimization process
start = time.time()

# Define the hyperparameter space for Bayesian Optimization
params = {
    'neurons': (10, 100),
    'kernel': (1, 3),
    'activation': (0, 9),  # 9
    'optimizer': (0, 7),  # 7
    'learning_rate': (0.001, 1),
    'batch_size': (200, 1000), #(10, 50), #
    'epochs': (20, 100),
    'layers1': (1, 3),
    'layers2': (1, 3),
    'normalization': (0, 1),
    'dropout': (0, 1),
    'dropout_rate': (0.3, 0.5)
}

# Run Bayesian Optimization
nn_opt = BayesianOptimization(bay_area, params, random_state=127)
nn_opt.maximize(init_points=15, n_iter=4)  # 25
print('Search took %s minutes' % ((time.time() - start)/60))

|   iter    |  target   | activa... | batch_... |  dropout  | dropou... |  epochs   |  kernel   |  layers1  |  layers2  | learni... |  neurons  | normal... | optimizer |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Epoch 22: early stopping
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 880us/step - accuracy: 0.0710 - loss: 72.6419
Epoch 26: early stopping
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 903us/step - accuracy: 0.6552 - loss: 33.4591
Epoch 20: early stopping
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0738 - loss: 96.0564  
Epoch 20: early stopping
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 914us/step - accuracy: 0.6617 - loss: 72.2257
Epoch 20: early stopping
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 

In [154]:
best_params = nn_opt.max['params']
best_score = nn_opt.max['target']

print(f"Best Parameters: {best_params}")
print(f"Highest Accuracy: {best_score}")

Best Parameters: {'activation': 3.238346333286747, 'batch_size': 858.0863899431488, 'dropout': 0.40228837505867143, 'dropout_rate': 0.393459528897498, 'epochs': 60.44669476125228, 'kernel': 2.1120927218648693, 'layers1': 2.1169196169026234, 'layers2': 2.3365351568358195, 'learning_rate': 0.06152500093002812, 'neurons': 12.581466622198223, 'normalization': 0.45787219367534804, 'optimizer': 0.902427852988836}
Highest Accuracy: 0.8280609965324401


In [155]:
# Retrieve the best parameters from the optimization result
optimum = nn_opt.max['params']

# Assign the best parameters to their respective variables
learning_rate = optimum['learning_rate']
activationL = ['relu', 'sigmoid', 'softplus', 'softsign', 'tanh', 'selu', 'elu', 'exponential', 'LeakyReLU', 'relu']
activation = activationL[round(optimum['activation'])]

# Convert the hyperparameters to their integer form where necessary
optimum['batch_size'] = round(optimum['batch_size'])
optimum['epochs'] = round(optimum['epochs'])
optimum['layers1'] = round(optimum['layers1'])
optimum['layers2'] = round(optimum['layers2'])
optimum['neurons'] = round(optimum['neurons'])
optimum['kernel'] = round(optimum['kernel'])

optimizerL = ['SGD', 'Adam', 'RMSprop', 'Adadelta', 'Adagrad', 'Adamax', 'Nadam', 'Ftrl']
optimizerD = {
    'SGD': SGD(learning_rate=learning_rate),
    'Adam': Adam(learning_rate=learning_rate),
    'RMSprop': RMSprop(learning_rate=learning_rate),
    'Adadelta': Adadelta(learning_rate=learning_rate),
    'Adagrad': Adagrad(learning_rate=learning_rate),
    'Adamax': Adamax(learning_rate=learning_rate),
    'Nadam': Nadam(learning_rate=learning_rate),
    'Ftrl': Ftrl(learning_rate=learning_rate)
}

# Retrieve the optimizer name
optimizer_name = optimizerL[round(optimum['optimizer'])]

# Print the optimum parameters in a readable format
print(f"Best Parameters: ")
print(f"Activation: {activation}")
print(f"Batch Size: {optimum['batch_size']}")
print(f"Dropout Rate: {optimum['dropout_rate']:.4f}")
print(f"Epochs: {optimum['epochs']}")
print(f"Kernel Size: {optimum['kernel']}")
print(f"Layers1: {optimum['layers1']}")
print(f"Layers2: {optimum['layers2']}")
print(f"Learning Rate: {optimum['learning_rate']:.4f}")
print(f"Neurons: {optimum['neurons']}")
print(f"Normalization: {optimum['normalization']:.4f}")
print(f"Optimizer: {optimizer_name}")

Best Parameters: 
Activation: softsign
Batch Size: 858
Dropout Rate: 0.3935
Epochs: 60
Kernel Size: 2
Layers1: 2
Layers2: 2
Learning Rate: 0.0615
Neurons: 13
Normalization: 0.4579
Optimizer: Adam


# 6. Building the CNN Model with Optimized Hyperparameters

In [156]:
# Best parameters from optimization
best_params = {
    'neurons': 13,
    'kernel': 3,
    'activation': 'softsign',
    'optimizer': 'Adam',
    'learning_rate': 0.0615,
    'batch_size': 858,
    'epochs':60,
    'layers1': 2,
    'layers2': 2,
    'normalization': 0.4579,
    'dropout': 0.40228837505867143,
    'dropout_rate':  0.3935
}

# Initialize optimizer with learning rate
optimizers = {
    'Adam': Adam(learning_rate=best_params['learning_rate']),
    'SGD': SGD(learning_rate=best_params['learning_rate']),
    'RMSprop': RMSprop(learning_rate=best_params['learning_rate']),
    'Adadelta': Adadelta(learning_rate=best_params['learning_rate']),
    'Adagrad': Adagrad(learning_rate=best_params['learning_rate']),
    'Adamax': Adamax(learning_rate=best_params['learning_rate']),
    'Nadam': Nadam(learning_rate=best_params['learning_rate']),
    'Ftrl': Ftrl(learning_rate=best_params['learning_rate'])
}

optimizer = optimizers[best_params['optimizer']]

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
#n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(best_params['neurons'], kernel_size=best_params['kernel'], activation=best_params['activation'], input_shape=(15,9)))

if best_params['normalization'] > 0.5:
    model.add(BatchNormalization())

for _ in range(best_params['layers1']):
    model.add(Dense(best_params['neurons'], activation=best_params['activation']))

if best_params['dropout'] > 0.5:
    model.add(Dropout(best_params['dropout_rate'], seed=123))

for _ in range(best_params['layers2']):
    model.add(Dense(best_params['neurons'], activation=best_params['activation']))

model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='softmax')) # sigmoid, tanh, softmax

model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

# Train the model with the optimized parameters
model.fit(X_train, y_train, epochs=best_params['epochs'], batch_size=best_params['batch_size'], verbose=2)

Epoch 1/60
21/21 - 1s - 30ms/step - accuracy: 0.5729 - loss: 1.3001
Epoch 2/60
21/21 - 0s - 4ms/step - accuracy: 0.6226 - loss: 1.0316
Epoch 3/60
21/21 - 0s - 4ms/step - accuracy: 0.6335 - loss: 0.9759
Epoch 4/60
21/21 - 0s - 4ms/step - accuracy: 0.6455 - loss: 0.9579
Epoch 5/60
21/21 - 0s - 4ms/step - accuracy: 0.6576 - loss: 0.9309
Epoch 6/60
21/21 - 0s - 4ms/step - accuracy: 0.6860 - loss: 0.8844
Epoch 7/60
21/21 - 0s - 4ms/step - accuracy: 0.6957 - loss: 0.8563
Epoch 8/60
21/21 - 0s - 4ms/step - accuracy: 0.7110 - loss: 0.8137
Epoch 9/60
21/21 - 0s - 4ms/step - accuracy: 0.7264 - loss: 0.7814
Epoch 10/60
21/21 - 0s - 4ms/step - accuracy: 0.7357 - loss: 0.7773
Epoch 11/60
21/21 - 0s - 4ms/step - accuracy: 0.7426 - loss: 0.7524
Epoch 12/60
21/21 - 0s - 4ms/step - accuracy: 0.7436 - loss: 0.7301
Epoch 13/60
21/21 - 0s - 4ms/step - accuracy: 0.7487 - loss: 0.7176
Epoch 14/60
21/21 - 0s - 4ms/step - accuracy: 0.7479 - loss: 0.7248
Epoch 15/60
21/21 - 0s - 4ms/step - accuracy: 0.7401 - l

<keras.src.callbacks.history.History at 0x42629bfb0>

In [157]:
model.summary()

In [158]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [159]:
# Assuming X_train and y_train are your input data and labels
# One-hot encode y_train
y_train_one_hot = to_categorical(y_train, num_classes=15)

In [160]:
# Check shapes
print(f'X_train shape: {X_train.shape}')
print(f'y_train_one_hot shape: {y_train_one_hot.shape}')

X_train shape: (17212, 15, 9)
y_train_one_hot shape: (17212, 15)


In [161]:
model.fit(X_train, y_train_one_hot, batch_size=744, epochs=97, verbose=2)

Epoch 1/97
24/24 - 1s - 24ms/step - accuracy: 0.8232 - loss: 0.4904
Epoch 2/97
24/24 - 0s - 4ms/step - accuracy: 0.8290 - loss: 0.4755
Epoch 3/97
24/24 - 0s - 4ms/step - accuracy: 0.8294 - loss: 0.4694
Epoch 4/97
24/24 - 0s - 4ms/step - accuracy: 0.8310 - loss: 0.4645
Epoch 5/97
24/24 - 0s - 4ms/step - accuracy: 0.8317 - loss: 0.4612
Epoch 6/97
24/24 - 0s - 4ms/step - accuracy: 0.8328 - loss: 0.4579
Epoch 7/97
24/24 - 0s - 4ms/step - accuracy: 0.8342 - loss: 0.4552
Epoch 8/97
24/24 - 0s - 4ms/step - accuracy: 0.8353 - loss: 0.4524
Epoch 9/97
24/24 - 0s - 4ms/step - accuracy: 0.8351 - loss: 0.4508
Epoch 10/97
24/24 - 0s - 4ms/step - accuracy: 0.8374 - loss: 0.4485
Epoch 11/97
24/24 - 0s - 4ms/step - accuracy: 0.8384 - loss: 0.4464
Epoch 12/97
24/24 - 0s - 4ms/step - accuracy: 0.8388 - loss: 0.4445
Epoch 13/97
24/24 - 0s - 4ms/step - accuracy: 0.8394 - loss: 0.4415
Epoch 14/97
24/24 - 0s - 4ms/step - accuracy: 0.8406 - loss: 0.4384
Epoch 15/97
24/24 - 0s - 4ms/step - accuracy: 0.8404 - l

<keras.src.callbacks.history.History at 0x42629bf20>

In [162]:
# Change this to Weather true/false
stations = {
0: 'BASEL',
1: 'BELGRADE',
2: 'BUDAPEST',
3: 'DEBILT',
4: 'DUSSELDORF',
5: 'HEATHROW',
6: 'KASSEL',
7: 'LJUBLJANA',
8: 'MAASTRICHT',
9: 'MADRID',
10: 'MUNCHENB',
11: 'OSLO',
12: 'SONNBLICK',
13: 'STOCKHOLM',
14: 'VALENTIA'
}

In [163]:
print("Unique classes in y_test:", np.unique(y_test))

Unique classes in y_test: [ 0  1  2  3  4  5  6  7  8  9 10 11 13 14]


In [164]:
def confusion_matrix(y_true, y_pred, stations):
    # Check if y_true and y_pred are one-hot encoded or already class indices
    if y_true.ndim == 1:
        y_true_labels = y_true
    else:
        y_true_labels = np.argmax(y_true, axis=1)
    
    if y_pred.ndim == 1:
        y_pred_labels = y_pred
    else:
        y_pred_labels = np.argmax(y_pred, axis=1)
        
    # Map numeric labels to activity names
    y_true_series = pd.Series([stations[y] for y in y_true_labels])
    y_pred_series = pd.Series([stations[y] for y in y_pred_labels])
    
    return pd.crosstab(y_true_series, y_pred_series, rownames=['True'], colnames=['Pred'])

In [165]:
# Before making predictions, convert y_test to one-hot format
y_test_one_hot = to_categorical(y_test, num_classes=15)

In [166]:
y_pred = model.predict(X_test)

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 389us/step


In [167]:
y_test_labels = np.argmax(y_test_one_hot, axis=1)  
y_pred_labels = np.argmax(y_pred, axis=1)  

# Manually calculate accuracy
correct_predictions = np.sum(y_test_labels == y_pred_labels)
total_samples = len(y_test_labels)
accuracy = correct_predictions / total_samples

print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 87.00%


In [168]:
cm = confusion_matrix(y_test, y_pred, stations)
print(cm)

Pred        BASEL  BELGRADE  BUDAPEST  DEBILT  DUSSELDORF  HEATHROW  KASSEL  \
True                                                                          
BASEL        3489       132        14       7           4        13       0   
BELGRADE      157       915        14       3           1         1       0   
BUDAPEST       43        47       118       3           2         4       0   
DEBILT         21         2        12      39           1         3       0   
DUSSELDORF      4         4         5       6          10         3       0   
HEATHROW       11         6         5       4           2        54       0   
KASSEL          3         4         2       2           0         1       2   
LJUBLJANA       6         4         4       0           0         3       1   
MAASTRICHT      2         0         0       0           0         1       1   
MADRID         13        23         9       2           1        19       0   
MUNCHENB       13         1         1       0       