# 1. Importing data and libraries

In [192]:
import os
import time
import warnings
import operator
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# TensorFlow and Keras Imports
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    Conv1D, Conv2D, Dense, BatchNormalization, Flatten, MaxPooling1D, Dropout, LeakyReLU, LSTM
)
from tensorflow.keras.optimizers import (
    Adam, SGD, RMSprop, Adadelta, Adagrad, Adamax, Nadam, Ftrl
)
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from scikeras.wrappers import KerasClassifier

# Scikit-learn Imports
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import (
    train_test_split, cross_val_score, GridSearchCV, RandomizedSearchCV, StratifiedKFold
)
from sklearn.ensemble import RandomForestClassifier
from sklearn import datasets, metrics, tree
from sklearn.metrics import make_scorer, accuracy_score
from sklearn.tree import plot_tree
from sklearn.utils.multiclass import type_of_target

# Bayesian Optimization Import
from bayes_opt import BayesianOptimization

# Additional Imports
from math import floor
from scipy.stats import randint
from numpy import argmax, unique, reshape

# Set LeakyReLU default
LeakyReLU = LeakyReLU(negative_slope=0.1)

# Warning Management
warnings.filterwarnings('ignore')

In [193]:
# Create a path
path = r'/Users/marcela/Library/CloudStorage/OneDrive-Personal/CF/Machine Learning/Achievement/Data Sets'

In [194]:
# Delimiting columns displayed
pd.options.display.max_columns = None

In [195]:
# Import pleasant weather answers data set
df_pleasant_weather = pd.read_csv(os.path.join(path, 'Dataset-Answers-Weather_Prediction_Pleasant_Weather.csv'), index_col = False)

In [196]:
# Import unscaled mean temperatures data set
df_unscaled = pd.read_csv(os.path.join(path, 'Dataset-weather-prediction-dataset-processed.csv'), index_col = False)

In [197]:
df_unscaled.shape

(22950, 170)

In [198]:
df_pleasant_weather.shape

(22950, 16)

# 2. Data Wrangling

In [199]:
# Dropping all columns regarding Gdansk, Roma, Tours from unscaled, as they aren't included in pleasant_weather
columns_to_drop = ['GDANSK_cloud_cover', 'GDANSK_humidity', 'GDANSK_precipitation', 
                   'GDANSK_snow_depth', 'GDANSK_temp_mean', 'GDANSK_temp_min', 
                   'GDANSK_temp_max', 'ROMA_cloud_cover', 'ROMA_wind_speed', 
                   'ROMA_humidity', 'ROMA_pressure', 'ROMA_sunshine', 'ROMA_temp_mean',
                   'TOURS_wind_speed', 'TOURS_humidity', 'TOURS_pressure',
                   'TOURS_global_radiation', 'TOURS_precipitation', 'TOURS_temp_mean', 
                   'TOURS_temp_min', 'TOURS_temp_max']

# Using the drop() method to drop the specified columns
df_unscaled.drop(columns=columns_to_drop, inplace=True)

In [200]:
df_unscaled.shape 

(22950, 149)

In [201]:
df_unscaled.drop(columns = ['DATE', 'MONTH'], inplace = True)

In [202]:
df_pleasant_weather.drop(columns = 'DATE', inplace = True) 

In [203]:
df_pleasant_weather.shape

(22950, 15)

In [204]:
further_drops = ['BASEL_snow_depth',  'DUSSELDORF_snow_depth', 'HEATHROW_snow_depth',
                  'MUNCHENB_snow_depth', 'OSLO_snow_depth',  'VALENTIA_snow_depth',
                 'BASEL_wind_speed', 'DEBILT_wind_speed', 'DUSSELDORF_wind_speed',
                  'KASSEL_wind_speed', 'LJUBLJANA_wind_speed',  'MAASTRICHT_wind_speed',
                  'MADRID_wind_speed', 'OSLO_wind_speed','SONNBLICK_wind_speed',]

df_unscaled.drop(columns=further_drops, inplace=True)

In [205]:
# Filling missing observations using nearby stations with similar weather:
# Ljubljana -> Kassel
# Sonnblick -> Munchen
# Oslo -> Stockholm

df_unscaled.columns.get_loc('HEATHROW_temp_max')

53

In [206]:
df_unscaled.columns.get_loc('MUNCHENB_humidity')

90

In [207]:
df_unscaled.columns.get_loc('STOCKHOLM_cloud_cover')

115

In [208]:
df_unscaled.insert(54,'KASSEL_cloud_cover', df_unscaled['LJUBLJANA_cloud_cover'])
df_unscaled.insert(92,'MUNCHENB_pressure', df_unscaled['SONNBLICK_pressure'])
df_unscaled.insert(118, 'STOCKHOLM_humidity', df_unscaled['OSLO_humidity'])
df_unscaled.columns.tolist()

['BASEL_cloud_cover',
 'BASEL_humidity',
 'BASEL_pressure',
 'BASEL_global_radiation',
 'BASEL_precipitation',
 'BASEL_sunshine',
 'BASEL_temp_mean',
 'BASEL_temp_min',
 'BASEL_temp_max',
 'BELGRADE_cloud_cover',
 'BELGRADE_humidity',
 'BELGRADE_pressure',
 'BELGRADE_global_radiation',
 'BELGRADE_precipitation',
 'BELGRADE_sunshine',
 'BELGRADE_temp_mean',
 'BELGRADE_temp_min',
 'BELGRADE_temp_max',
 'BUDAPEST_cloud_cover',
 'BUDAPEST_humidity',
 'BUDAPEST_pressure',
 'BUDAPEST_global_radiation',
 'BUDAPEST_precipitation',
 'BUDAPEST_sunshine',
 'BUDAPEST_temp_mean',
 'BUDAPEST_temp_min',
 'BUDAPEST_temp_max',
 'DEBILT_cloud_cover',
 'DEBILT_humidity',
 'DEBILT_pressure',
 'DEBILT_global_radiation',
 'DEBILT_precipitation',
 'DEBILT_sunshine',
 'DEBILT_temp_mean',
 'DEBILT_temp_min',
 'DEBILT_temp_max',
 'DUSSELDORF_cloud_cover',
 'DUSSELDORF_humidity',
 'DUSSELDORF_pressure',
 'DUSSELDORF_global_radiation',
 'DUSSELDORF_precipitation',
 'DUSSELDORF_sunshine',
 'DUSSELDORF_temp_mean',


In [209]:
df_unscaled.shape

(22950, 135)

In [210]:
df_unscaled.to_pickle(os.path.join(path, 'X_cleaned.pkl'))

# 3. Reshaping for ML modeling

In [211]:
X = pd.read_pickle(os.path.join(path, 'X_cleaned.pkl'))

In [212]:
y = df_pleasant_weather

In [213]:
X.shape

(22950, 135)

In [214]:
# Turning X and y from a df to arrays
X = np.array(X)
y = np.array(y)

In [215]:
X = X.reshape(-1,15,9)

In [216]:
y = np.argmax(y, axis = 1)

In [217]:
# Verifying array shape
y

array([0, 0, 0, ..., 0, 0, 0])

# 4. Splitting data (training and test sets)

In [218]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state = 39)

# 5. Bayesian hyperparameter optimization

In [219]:
# Ensure global variables are defined
timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(np.unique(y_train))  

# Define score function
score_acc = 'accuracy'

In [220]:
timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = 15 #_count_classes(Y_train) # Number of weather stations
# Make scorer accuracy
score_acc = make_scorer(accuracy_score)

In [221]:
# Create function
def bay_area_rnn(neurons, activation, kernel, optimizer, learning_rate, batch_size, epochs,
                 layers1, layers2, normalization, dropout, dropout_rate): 
    optimizerL = ['SGD', 'Adam', 'RMSprop', 'Adadelta', 'Adagrad', 'Adamax', 'Nadam', 'Ftrl', 'SGD']
    #optimizerD= {'Adam':Adam(lr=learning_rate), 'SGD':SGD(lr=learning_rate),
                 #'RMSprop':RMSprop(lr=learning_rate), 'Adadelta':Adadelta(lr=learning_rate),
                 #'Adagrad':Adagrad(lr=learning_rate), 'Adamax':Adamax(lr=learning_rate),
                 #'Nadam':Nadam(lr=learning_rate), 'Ftrl':Ftrl(lr=learning_rate)}
    activationL = ['relu', 'sigmoid', 'softplus', 'softsign', 'tanh', 'selu',
                   'elu', 'exponential', LeakyReLU, 'relu']
    
    neurons = round(neurons)
    kernel = round(kernel)  # This won't be used in RNN but kept for consistency
    activation = activationL[round(activation)]
    optimizer = optimizerL[round(optimizer)] #optimizerD[optimizerL[round(optimizer)]]
    batch_size = round(batch_size)
    
    epochs = round(epochs)
    layers1 = round(layers1)
    layers2 = round(layers2)
    
    def rnn_model():
        model = Sequential()
        model.add(LSTM(neurons, input_shape=(timesteps, input_dim), activation=activation))
        
        if normalization > 0.5:
            model.add(BatchNormalization())
        for i in range(layers1):
            model.add(Dense(neurons, activation=activation))
        if dropout > 0.5:
            model.add(Dropout(dropout_rate, seed=123))
        for i in range(layers2):
            model.add(Dense(neurons, activation=activation))
        model.add(Flatten())
        model.add(Dense(n_classes, activation='softmax'))  # Don't use relu here
        model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
        return model

    es = EarlyStopping(monitor='accuracy', mode='max', verbose=2, patience=20)
    nn = KerasClassifier(build_fn=rnn_model, epochs=epochs, batch_size=batch_size, verbose=2)
    kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=123)
    score = cross_val_score(nn, X_train, y_train, scoring=score_acc, cv=kfold, fit_params={'callbacks':[es]}).mean()
    return score


In [241]:
from tensorflow.keras.layers import SimpleRNN

neurons = 64  
kernel = 3  
activation = 'relu'  
normalization = 1  
layers1 = 2  
layers2 = 2  
dropout = 1  
dropout_rate = 0.4  
optimizer_name = 'Adam'  
learning_rate = 0.001  

model = rnn_model(neurons, kernel, activation, normalization, layers1, layers2, dropout, dropout_rate, optimizer_name, learning_rate)
es = EarlyStopping(monitor='accuracy', mode='max', verbose=1, patience=20)
results = []
for train, test in kfold.split(X, y):
    model.fit(X[train], y[train], epochs=epochs, batch_size=batch_size, verbose=0, 
              callbacks=[es], class_weight=class_weights)
    scores = model.evaluate(X[test], y[test], verbose=1)
    results.append(scores[1])  

print(f"Mean accuracy: {np.mean(results):.4f}")

[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7235 - loss: 0.7940
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 884us/step - accuracy: 0.8292 - loss: 0.4501
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 871us/step - accuracy: 0.8875 - loss: 0.3289
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 886us/step - accuracy: 0.8559 - loss: 0.4485
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 877us/step - accuracy: 0.9441 - loss: 0.1361
Mean accuracy: 0.8454


In [242]:
# Start timing the Bayesian Optimization process
start = time.time()

# Define the hyperparameter space for Bayesian Optimization
params = {
    'neurons': (10, 50),
    'kernel': (1, 3),
    'activation': (0, 9),  # 9
    'optimizer': (0, 7),  # 7
    'learning_rate': (0.001, 0.1),  # Reduce the range for a faster initial test
    'batch_size': (200, 500),
    'epochs': (10, 50),
    'layers1': (1, 2),  # Limit the depth initially
    'layers2': (1, 2),
    'normalization': (0, 1),
    'dropout': (0, 1),
    'dropout_rate': (0.3, 0.5)
}

# Run Bayesian Optimization
nn_opt = BayesianOptimization(bay_area_rnn, params, random_state=127)
nn_opt.maximize(init_points=5, n_iter=3)  # Initial test with fewer iterations
print('Search took %s minutes' % ((time.time() - start)/60))

|   iter    |  target   | activa... | batch_... |  dropout  | dropou... |  epochs   |  kernel   |  layers1  |  layers2  | learni... |  neurons  | normal... | optimizer |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Epoch 1/32
65/65 - 1s - 16ms/step - accuracy: 0.5054 - loss: 1.6696
Epoch 2/32
65/65 - 0s - 4ms/step - accuracy: 0.6849 - loss: 0.9112
Epoch 3/32
65/65 - 0s - 4ms/step - accuracy: 0.7035 - loss: 0.8341
Epoch 4/32
65/65 - 0s - 4ms/step - accuracy: 0.7128 - loss: 0.8013
Epoch 5/32
65/65 - 0s - 4ms/step - accuracy: 0.7275 - loss: 0.7705
Epoch 6/32
65/65 - 0s - 4ms/step - accuracy: 0.7358 - loss: 0.7425
Epoch 7/32
65/65 - 0s - 4ms/step - accuracy: 0.7446 - loss: 0.7176
Epoch 8/32
65/65 - 0s - 4ms/step - accuracy: 0.7506 - loss: 0.7084
Epoch 9/32
65/65 - 0s - 4ms/step - accuracy: 0.7608 - loss: 0.6876
Epoch 10/32
65/65 - 0s - 4ms/step - accuracy: 0.7658 - l

In [243]:
# Start timing the Bayesian Optimization process
start = time.time()

# Define the hyperparameter space for Bayesian Optimization
params = {
    'neurons': (10, 100),
    'kernel': (1, 3),
    'activation': (0, 9),  # 9
    'optimizer': (0, 7),  # 7
    'learning_rate': (0.001, 1),
    'batch_size': (200, 1000), #(10, 50), #
    'epochs': (20, 100),
    'layers1': (1, 3),
    'layers2': (1, 3),
    'normalization': (0, 1),
    'dropout': (0, 1),
    'dropout_rate': (0.3, 0.5)
}

# Run Bayesian Optimization
nn_opt = BayesianOptimization(bay_area_rnn, params, random_state=127)
nn_opt.maximize(init_points=15, n_iter=4)  # 25
print('Search took %s minutes' % ((time.time() - start)/60))

|   iter    |  target   | activa... | batch_... |  dropout  | dropou... |  epochs   |  kernel   |  layers1  |  layers2  | learni... |  neurons  | normal... | optimizer |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Epoch 1/64
60/60 - 1s - 23ms/step - accuracy: 0.6207 - loss: 1.2639
Epoch 2/64
60/60 - 1s - 9ms/step - accuracy: 0.7156 - loss: 0.8213
Epoch 3/64
60/60 - 1s - 9ms/step - accuracy: 0.7421 - loss: 0.7489
Epoch 4/64
60/60 - 1s - 10ms/step - accuracy: 0.7589 - loss: 0.6975
Epoch 5/64
60/60 - 1s - 11ms/step - accuracy: 0.7710 - loss: 0.6593
Epoch 6/64
60/60 - 1s - 11ms/step - accuracy: 0.7806 - loss: 0.6366
Epoch 7/64
60/60 - 1s - 11ms/step - accuracy: 0.7889 - loss: 0.6095
Epoch 8/64
60/60 - 1s - 11ms/step - accuracy: 0.7956 - loss: 0.5926
Epoch 9/64
60/60 - 1s - 11ms/step - accuracy: 0.7968 - loss: 0.5825
Epoch 10/64
60/60 - 1s - 11ms/step - accuracy: 0.8

In [244]:
best_params = nn_opt.max['params']
best_score = nn_opt.max['target']

print(f"Best Parameters: {best_params}")
print(f"Highest Accuracy: {best_score}")

Best Parameters: {'activation': 2.2290229350486315, 'batch_size': 222.41518042081844, 'dropout': 0.21503440566464227, 'dropout_rate': 0.48631029033793277, 'epochs': 95.59733857114313, 'kernel': 2.074386512917631, 'layers1': 1.0069196116945347, 'layers2': 2.593736552905681, 'learning_rate': 0.21689906644083645, 'neurons': 85.73445902439704, 'normalization': 0.3765696099390927, 'optimizer': 1.2422905178635517}
Highest Accuracy: 0.8693361953608895


In [245]:
activationL = ['relu', 'sigmoid', 'softplus', 'softsign', 'tanh', 'selu', 'elu', 'exponential', 'LeakyReLU', 'relu']
optimizerL = ['SGD', 'Adam', 'RMSprop', 'Adadelta', 'Adagrad', 'Adamax', 'Nadam', 'Ftrl', 'SGD']

# Best parameters from optimization
activation_index = round(best_params['activation'])
optimizer_index = round(best_params['optimizer'])

best_params['activation'] = activationL[activation_index]
best_params['optimizer'] = optimizerL[optimizer_index]
best_params['neurons'] = round(best_params['neurons'])
best_params['kernel'] = round(best_params['kernel'])
best_params['batch_size'] = round(best_params['batch_size'])
best_params['epochs'] = round(best_params['epochs'])
best_params['layers1'] = round(best_params['layers1'])
best_params['layers2'] = round(best_params['layers2'])

print(f"Best Parameters (interpreted): {best_params}")

Best Parameters (interpreted): {'activation': 'softplus', 'batch_size': 222, 'dropout': 0.21503440566464227, 'dropout_rate': 0.48631029033793277, 'epochs': 96, 'kernel': 2, 'layers1': 1, 'layers2': 3, 'learning_rate': 0.21689906644083645, 'neurons': 86, 'normalization': 0.3765696099390927, 'optimizer': 'Adam'}


### 6. Keras Model

In [247]:
# Check unique labels in your training and validation data
unique_labels_train = np.unique(y_train)
unique_labels_test = np.unique(y_test)

print("Unique labels in training data:", unique_labels_train)
print("Unique labels in validation data:", unique_labels_test)

# Ensure all labels are in the range [0, 13]
y_train_fixed = np.where(y_train >= 14, 13, y_train)
y_test_fixed = np.where(y_test >= 14, 13, y_test)

unique_labels_train_fixed = np.unique(y_train_fixed)
unique_labels_test_fixed = np.unique(y_test_fixed)

print("Fixed unique labels in training data:", unique_labels_train_fixed)
print("Fixed unique labels in validation data:", unique_labels_test_fixed)

# Now use y_train_fixed and y_test_fixed for training
model.fit(X_train, y_train_fixed, epochs=best_params['epochs'], batch_size=best_params['batch_size'], verbose=2, validation_data=(X_test, y_test_fixed))

Unique labels in training data: [ 0  1  2  3  4  5  6  7  8  9 10 11 13 14]
Unique labels in validation data: [ 0  1  2  3  4  5  6  7  8  9 10 11 13 14]
Fixed unique labels in training data: [ 0  1  2  3  4  5  6  7  8  9 10 11 13]
Fixed unique labels in validation data: [ 0  1  2  3  4  5  6  7  8  9 10 11 13]
Epoch 1/96
78/78 - 1s - 19ms/step - accuracy: 0.9783 - loss: 0.0720 - val_accuracy: 0.9800 - val_loss: 0.0537
Epoch 2/96
78/78 - 1s - 13ms/step - accuracy: 0.9824 - loss: 0.0521 - val_accuracy: 0.9716 - val_loss: 0.0739
Epoch 3/96
78/78 - 1s - 13ms/step - accuracy: 0.9835 - loss: 0.0453 - val_accuracy: 0.9775 - val_loss: 0.0580
Epoch 4/96
78/78 - 1s - 13ms/step - accuracy: 0.9809 - loss: 0.0537 - val_accuracy: 0.9516 - val_loss: 0.4125
Epoch 5/96
78/78 - 1s - 13ms/step - accuracy: 0.9720 - loss: 0.1188 - val_accuracy: 0.9641 - val_loss: 0.0963
Epoch 6/96
78/78 - 1s - 13ms/step - accuracy: 0.9843 - loss: 0.0433 - val_accuracy: 0.9822 - val_loss: 0.0493
Epoch 7/96
78/78 - 1s - 14

<keras.src.callbacks.history.History at 0x40f4bcd40>

In [248]:
# Assuming the labels should be in the range of 0 to 13
y_train_corrected = np.where(y_train == 14, 13, y_train)  # Replace 14 with 13
unique_labels_corrected = np.unique(y_train_corrected)
print("Corrected unique labels in y_train:", unique_labels_corrected)

Corrected unique labels in y_train: [ 0  1  2  3  4  5  6  7  8  9 10 11 13]


In [249]:
# Best parameters from optimization
best_params = {
    'neurons': 38,
    'kernel': 2,
    'activation': 'softplus',
    'optimizer': 'Adam',
    'learning_rate': 0.7175216868627151,
    'batch_size': 670,
    'epochs': 64,
    'layers1': 2,
    'layers2': 1,
    'normalization': 0.8666093251432194,
    'dropout': 0.8046122554274766,
    'dropout_rate':  0.04147123592409228,
}

# Initialize optimizer with learning rate
optimizers = {
    'Adam': Adam(learning_rate=best_params['learning_rate']),
    'SGD': SGD(learning_rate=best_params['learning_rate']),
    'RMSprop': RMSprop(learning_rate=best_params['learning_rate']),
    'Adadelta': Adadelta(learning_rate=best_params['learning_rate']),
    'Adagrad': Adagrad(learning_rate=best_params['learning_rate']),
    'Adamax': Adamax(learning_rate=best_params['learning_rate']),
    'Nadam': Nadam(learning_rate=best_params['learning_rate']),
    'Ftrl': Ftrl(learning_rate=best_params['learning_rate'])
}

optimizer = optimizers[best_params['optimizer']]

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(set(y_train))  # Ensure n_classes is correctly calculated

model = Sequential()
model.add(Input(shape=(timesteps, input_dim)))
model.add(Conv1D(best_params['neurons'], kernel_size=best_params['kernel'], activation=best_params['activation']))

if best_params['normalization'] > 0.5:
    model.add(BatchNormalization())

for _ in range(best_params['layers1']):
    model.add(Dense(best_params['neurons'], activation=best_params['activation']))

if best_params['dropout'] > 0.5:
    model.add(Dropout(best_params['dropout_rate']))

for _ in range(best_params['layers2']):
    model.add(Dense(best_params['neurons'], activation=best_params['activation']))

model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='softmax'))  # Ensure n_classes is correctly calculated

model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

# Train the model with the optimized parameters
model.fit(X_train, y_train, epochs=best_params['epochs'], batch_size=best_params['batch_size'], verbose=2)

Epoch 1/64


2024-08-13 04:21:56.130862: W tensorflow/core/framework/op_kernel.cc:1840] OP_REQUIRES failed at sparse_xent_op.cc:103 : INVALID_ARGUMENT: Received a label value of 14 which is outside the valid range of [0, 14).  Label values: 0 0 0 0 1 1 0 1 1 0 9 0 0 9 0 0 1 0 1 1 2 0 3 1 1 0 0 0 0 0 0 1 0 1 1 1 9 4 0 0 9 9 0 0 0 0 0 1 0 0 0 0 0 1 9 0 0 0 0 1 1 0 0 0 6 1 0 0 0 0 0 1 1 13 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 2 9 0 0 9 0 0 0 0 0 0 3 0 0 0 9 3 0 1 0 0 0 0 0 2 1 1 0 0 0 5 0 0 0 0 0 0 0 0 1 0 0 0 9 9 3 0 0 0 0 0 0 9 1 0 0 0 1 0 0 0 1 1 0 0 2 0 0 2 9 0 0 0 0 0 1 0 5 9 1 9 0 14 1 1 9 0 0 0 0 0 1 9 0 0 0 0 0 0 9 0 1 3 9 0 0 2 0 0 0 1 0 0 0 0 0 1 1 0 0 0 9 1 0 0 9 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 1 9 2 0 0 2 5 0 1 1 0 1 0 0 0 0 0 0 1 0 2 1 0 0 0 1 0 9 2 9 9 0 1 0 2 0 0 7 0 0 0 0 0 9 0 1 0 0 0 0 0 0 9 3 9 1 9 1 0 9 0 1 1 5 0 0 1 0 2 3 9 0 1 1 0 0 0 0 9 1 1 0 9 0 1 0 0 0 0 0 0 9 0 0 0 0 0 9 0 0 0 0 0 0 0 0 1 0 2 0 0 5 0 0 0 5 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 5 

InvalidArgumentError: Graph execution error:

Detected at node compile_loss/sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/opt/anaconda3/lib/python3.12/site-packages/ipykernel_launcher.py", line 17, in <module>

  File "/opt/anaconda3/lib/python3.12/site-packages/traitlets/config/application.py", line 1075, in launch_instance

  File "/opt/anaconda3/lib/python3.12/site-packages/ipykernel/kernelapp.py", line 701, in start

  File "/opt/anaconda3/lib/python3.12/site-packages/tornado/platform/asyncio.py", line 205, in start

  File "/opt/anaconda3/lib/python3.12/asyncio/base_events.py", line 641, in run_forever

  File "/opt/anaconda3/lib/python3.12/asyncio/base_events.py", line 1987, in _run_once

  File "/opt/anaconda3/lib/python3.12/asyncio/events.py", line 88, in _run

  File "/opt/anaconda3/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 534, in dispatch_queue

  File "/opt/anaconda3/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 523, in process_one

  File "/opt/anaconda3/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 429, in dispatch_shell

  File "/opt/anaconda3/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 767, in execute_request

  File "/opt/anaconda3/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 429, in do_execute

  File "/opt/anaconda3/lib/python3.12/site-packages/ipykernel/zmqshell.py", line 549, in run_cell

  File "/opt/anaconda3/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3075, in run_cell

  File "/opt/anaconda3/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3130, in _run_cell

  File "/opt/anaconda3/lib/python3.12/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner

  File "/opt/anaconda3/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3334, in run_cell_async

  File "/opt/anaconda3/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3517, in run_ast_nodes

  File "/opt/anaconda3/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3577, in run_code

  File "/var/folders/gz/793390fs5lz_gt8d667yd0qw0000gn/T/ipykernel_85667/4109303546.py", line 58, in <module>

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 318, in fit

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 121, in one_step_on_iterator

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 108, in one_step_on_data

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 54, in train_step

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/trainers/trainer.py", line 357, in _compute_loss

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/trainers/trainer.py", line 325, in compute_loss

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/trainers/compile_utils.py", line 609, in __call__

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/trainers/compile_utils.py", line 645, in call

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/losses/loss.py", line 43, in __call__

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/losses/losses.py", line 27, in call

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/losses/losses.py", line 1853, in sparse_categorical_crossentropy

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/ops/nn.py", line 1567, in sparse_categorical_crossentropy

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/backend/tensorflow/nn.py", line 645, in sparse_categorical_crossentropy

Received a label value of 14 which is outside the valid range of [0, 14).  Label values: 0 0 0 0 1 1 0 1 1 0 9 0 0 9 0 0 1 0 1 1 2 0 3 1 1 0 0 0 0 0 0 1 0 1 1 1 9 4 0 0 9 9 0 0 0 0 0 1 0 0 0 0 0 1 9 0 0 0 0 1 1 0 0 0 6 1 0 0 0 0 0 1 1 13 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 2 9 0 0 9 0 0 0 0 0 0 3 0 0 0 9 3 0 1 0 0 0 0 0 2 1 1 0 0 0 5 0 0 0 0 0 0 0 0 1 0 0 0 9 9 3 0 0 0 0 0 0 9 1 0 0 0 1 0 0 0 1 1 0 0 2 0 0 2 9 0 0 0 0 0 1 0 5 9 1 9 0 14 1 1 9 0 0 0 0 0 1 9 0 0 0 0 0 0 9 0 1 3 9 0 0 2 0 0 0 1 0 0 0 0 0 1 1 0 0 0 9 1 0 0 9 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 1 9 2 0 0 2 5 0 1 1 0 1 0 0 0 0 0 0 1 0 2 1 0 0 0 1 0 9 2 9 9 0 1 0 2 0 0 7 0 0 0 0 0 9 0 1 0 0 0 0 0 0 9 3 9 1 9 1 0 9 0 1 1 5 0 0 1 0 2 3 9 0 1 1 0 0 0 0 9 1 1 0 9 0 1 0 0 0 0 0 0 9 0 0 0 0 0 9 0 0 0 0 0 0 0 0 1 0 2 0 0 5 0 0 0 5 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 5 1 0 0 0 0 0 0 0 0 0 0 9 1 9 0 1 0 1 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 2 1 1 1 1 0 0 0 0 0 11 9 0 1 0 2 1 1 0 0 1 0 1 9 0 9 0 0 0 1 9 8 9 1 0 1 0 0 0 0 0 0 0 0 2 1 0 0 0 0 1 0 5 9 0 0 0 9 9 0 0 9 9 0 0 1 0 0 0 9 0 1 9 0 1 0 0 0 0 9 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 9 10 0 1 0 0 9 0 1 0 1 9 0 0 2 0 9 0 5 0 0 1 10 9 7 0 0 1 1 1 0 9 0 9 0 0 1 9 0 1 0 1 0 0 0 0 1 0 0 1 0 1 2 1 1 0 0 0 1 0 1 0 0 0 0 1 0 1 9 0 0 0 0 9 3 0 0 0 0 9 0 0 0 2 0 0 2 0 0 0 0 0 1 0 1 0 5 0 1 0 0 1 0 0 0 1 0 1 1 5 0 9 0 0 1 0 0 0 0 0 9 9 0 0 0 0 0 0 0 9 1 1 7 0 0 0 0 0 0 9
	 [[{{node compile_loss/sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits}}]] [Op:__inference_one_step_on_iterator_4135001]

In [250]:
y_train_fixed = np.where(y_train >= 14, 13, y_train)
y_test_fixed = np.where(y_test >= 14, 13, y_test)

In [251]:
# Train the model with the corrected labels
model.fit(X_train, y_train_fixed, epochs=best_params['epochs'], batch_size=best_params['batch_size'], verbose=2, validation_data=(X_test, y_test_fixed))

Epoch 1/64
26/26 - 0s - 17ms/step - accuracy: 0.5704 - loss: 422.8434 - val_accuracy: 0.6070 - val_loss: 5.3139
Epoch 2/64
26/26 - 0s - 13ms/step - accuracy: 0.6425 - loss: 1.1885 - val_accuracy: 0.6452 - val_loss: 1.2291
Epoch 3/64
26/26 - 0s - 13ms/step - accuracy: 0.6425 - loss: 1.1616 - val_accuracy: 0.6460 - val_loss: 1.1779
Epoch 4/64
26/26 - 0s - 13ms/step - accuracy: 0.6425 - loss: 1.1598 - val_accuracy: 0.6460 - val_loss: 1.1790
Epoch 5/64
26/26 - 0s - 13ms/step - accuracy: 0.6425 - loss: 1.1597 - val_accuracy: 0.6460 - val_loss: 1.1771
Epoch 6/64
26/26 - 0s - 14ms/step - accuracy: 0.6425 - loss: 1.1594 - val_accuracy: 0.6460 - val_loss: 1.1748
Epoch 7/64
26/26 - 0s - 14ms/step - accuracy: 0.6425 - loss: 1.1619 - val_accuracy: 0.6460 - val_loss: 1.1775
Epoch 8/64
26/26 - 0s - 13ms/step - accuracy: 0.6425 - loss: 1.1617 - val_accuracy: 0.6460 - val_loss: 1.1814
Epoch 9/64
26/26 - 0s - 13ms/step - accuracy: 0.6425 - loss: 1.1602 - val_accuracy: 0.6460 - val_loss: 1.1780
Epoch 10

<keras.src.callbacks.history.History at 0x40d937860>

In [252]:
model.summary()

In [253]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [254]:
# Assuming X_train and y_train are your input data and labels
# One-hot encode y_train
y_train_one_hot = to_categorical(y_train, num_classes=15)

In [255]:
# Check shapes
print(f'X_train shape: {X_train.shape}')
print(f'y_train_one_hot shape: {y_train_one_hot.shape}')

X_train shape: (17212, 15, 9)
y_train_one_hot shape: (17212, 15)


In [256]:
n_classes_one_hot = y_train_one_hot.shape[1]
print("Number of classes in y_train_one_hot:", n_classes_one_hot)

Number of classes in y_train_one_hot: 15


In [257]:
# Best parameters from optimization
best_params = {
    'neurons': 38,
    'kernel': 2,
    'activation': 'softplus',
    'optimizer': 'Adam',
    'learning_rate': 0.7175216868627151,
    'batch_size': 670,
    'epochs': 64,
    'layers1': 2,
    'layers2': 1,
    'normalization': 0.8666093251432194,
    'dropout': 0.8046122554274766,
    'dropout_rate':  0.04147123592409228,
}

# Initialize optimizer with learning rate
optimizers = {
    'Adam': Adam(learning_rate=best_params['learning_rate']),
    'SGD': SGD(learning_rate=best_params['learning_rate']),
    'RMSprop': RMSprop(learning_rate=best_params['learning_rate']),
    'Adadelta': Adadelta(learning_rate=best_params['learning_rate']),
    'Adagrad': Adagrad(learning_rate=best_params['learning_rate']),
    'Adamax': Adamax(learning_rate=best_params['learning_rate']),
    'Nadam': Nadam(learning_rate=best_params['learning_rate']),
    'Ftrl': Ftrl(learning_rate=best_params['learning_rate'])
}

optimizer = optimizers[best_params['optimizer']]

# Verify the correct number of classes in the one-hot encoded labels
n_classes_one_hot = y_train_one_hot.shape[1]
print("Number of classes in y_train_one_hot:", n_classes_one_hot)

# Input variables
timesteps = len(X_train[0])
input_dim = len(X_train[0][0])

model = Sequential()
model.add(Input(shape=(timesteps, input_dim)))
model.add(Conv1D(best_params['neurons'], kernel_size=best_params['kernel'], activation=best_params['activation']))

if best_params['normalization'] > 0.5:
    model.add(BatchNormalization())

for _ in range(best_params['layers1']):
    model.add(Dense(best_params['neurons'], activation=best_params['activation']))

if best_params['dropout'] > 0.5:
    model.add(Dropout(best_params['dropout_rate']))

for _ in range(best_params['layers2']):
    model.add(Dense(best_params['neurons'], activation=best_params['activation']))

model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes_one_hot, activation='softmax'))  

model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

# Train the model with the optimized parameters and y_train_one_hot
model.fit(X_train, y_train_one_hot, epochs=best_params['epochs'], batch_size=best_params['batch_size'], verbose=2)

Number of classes in y_train_one_hot: 15
Epoch 1/64
26/26 - 1s - 33ms/step - accuracy: 0.4847 - loss: 450.9380
Epoch 2/64
26/26 - 0s - 11ms/step - accuracy: 0.5598 - loss: 1.4272
Epoch 3/64
26/26 - 0s - 11ms/step - accuracy: 0.6425 - loss: 1.1800
Epoch 4/64
26/26 - 0s - 11ms/step - accuracy: 0.6425 - loss: 1.1611
Epoch 5/64
26/26 - 0s - 11ms/step - accuracy: 0.6425 - loss: 1.1593
Epoch 6/64
26/26 - 0s - 11ms/step - accuracy: 0.6425 - loss: 1.1614
Epoch 7/64
26/26 - 0s - 11ms/step - accuracy: 0.6425 - loss: 1.1610
Epoch 8/64
26/26 - 0s - 11ms/step - accuracy: 0.6425 - loss: 1.1603
Epoch 9/64
26/26 - 0s - 11ms/step - accuracy: 0.6425 - loss: 1.1607
Epoch 10/64
26/26 - 0s - 11ms/step - accuracy: 0.6425 - loss: 1.1622
Epoch 11/64
26/26 - 0s - 11ms/step - accuracy: 0.6425 - loss: 1.1615
Epoch 12/64
26/26 - 0s - 11ms/step - accuracy: 0.6425 - loss: 1.1603
Epoch 13/64
26/26 - 0s - 11ms/step - accuracy: 0.6425 - loss: 1.1610
Epoch 14/64
26/26 - 0s - 11ms/step - accuracy: 0.6425 - loss: 1.1600


<keras.src.callbacks.history.History at 0x3224435f0>

In [258]:
model.fit(X_train, y_train_one_hot, batch_size=460, epochs=91, verbose=2)

Epoch 1/91
38/38 - 0s - 9ms/step - accuracy: 0.6425 - loss: 1.1657
Epoch 2/91
38/38 - 0s - 8ms/step - accuracy: 0.6425 - loss: 1.1688
Epoch 3/91
38/38 - 0s - 8ms/step - accuracy: 0.6425 - loss: 1.1747
Epoch 4/91
38/38 - 0s - 8ms/step - accuracy: 0.6425 - loss: 1.1742
Epoch 5/91
38/38 - 0s - 9ms/step - accuracy: 0.6425 - loss: 1.1756
Epoch 6/91
38/38 - 0s - 8ms/step - accuracy: 0.6425 - loss: 1.1686
Epoch 7/91
38/38 - 0s - 8ms/step - accuracy: 0.6425 - loss: 1.1654
Epoch 8/91
38/38 - 0s - 9ms/step - accuracy: 0.6425 - loss: 1.1662
Epoch 9/91
38/38 - 0s - 9ms/step - accuracy: 0.6425 - loss: 1.1679
Epoch 10/91
38/38 - 0s - 9ms/step - accuracy: 0.6425 - loss: 1.1651
Epoch 11/91
38/38 - 0s - 9ms/step - accuracy: 0.6425 - loss: 1.1669
Epoch 12/91
38/38 - 0s - 9ms/step - accuracy: 0.6425 - loss: 1.1676
Epoch 13/91
38/38 - 0s - 11ms/step - accuracy: 0.6425 - loss: 1.1710
Epoch 14/91
38/38 - 0s - 9ms/step - accuracy: 0.6425 - loss: 1.1674
Epoch 15/91
38/38 - 0s - 9ms/step - accuracy: 0.6425 - l

<keras.src.callbacks.history.History at 0x47df5bc50>

In [259]:
# Change this to Weather true/false
stations = {
0: 'BASEL',
1: 'BELGRADE',
2: 'BUDAPEST',
3: 'DEBILT',
4: 'DUSSELDORF',
5: 'HEATHROW',
6: 'KASSEL',
7: 'LJUBLJANA',
8: 'MAASTRICHT',
9: 'MADRID',
10: 'MUNCHENB',
11: 'OSLO',
12: 'SONNBLICK',
13: 'STOCKHOLM',
14: 'VALENTIA'
}

In [260]:
def confusion_matrix(y_true, y_pred, stations):
    # Check if y_true and y_pred are one-hot encoded or already class indices
    if y_true.ndim == 1:
        y_true_labels = y_true
    else:
        y_true_labels = np.argmax(y_true, axis=1)
    
    if y_pred.ndim == 1:
        y_pred_labels = y_pred
    else:
        y_pred_labels = np.argmax(y_pred, axis=1)
        
    # Map numeric labels to activity names
    y_true_series = pd.Series([stations[y] for y in y_true_labels])
    y_pred_series = pd.Series([stations[y] for y in y_pred_labels])
    
    return pd.crosstab(y_true_series, y_pred_series, rownames=['True'], colnames=['Pred'])

In [267]:
# Before making predictions, convert y_test to one-hot format
y_test_one_hot = to_categorical(y_test, num_classes=15)

In [268]:
y_pred = model.predict(X_test)

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 620us/step


In [269]:
y_test_labels = np.argmax(y_test_one_hot, axis=1)  
y_pred_labels = np.argmax(y_pred, axis=1)  

# Manually calculate accuracy
correct_predictions = np.sum(y_test_labels == y_pred_labels)
total_samples = len(y_test_labels)
accuracy = correct_predictions / total_samples

print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 64.60%


In [270]:
cm = confusion_matrix(y_test, y_pred, stations)
print(cm)

Pred        BASEL
True             
BASEL        3707
BELGRADE     1081
BUDAPEST      196
DEBILT         89
DUSSELDORF     33
HEATHROW      107
KASSEL         15
LJUBLJANA      69
MAASTRICHT     10
MADRID        409
MUNCHENB       10
OSLO            7
STOCKHOLM       1
VALENTIA        4
