# 2.4.2 Evaluating hyperparameters - Keras - CNN
### The following script contains the following:

#### 1. Import data libraries, additional requirements
#### 2. File directory, read the data
#### 3. Data preprocessing
#### 4. Bayesian optimization function
#### 5. Build and run CNN keras model
#### 6. Run confusion matrix
        - check accuracy and loss
---------------------------------------------------------------------------------------------------------------------------
## 1. Import data libraries, additional requirements
---------------------------------------------------------------------------------------------------------------------------

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import os
import operator
import time
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import tensorflow as tf
from numpy import unique
from numpy import reshape
import tensorflow.keras as keras
from keras.models import Sequential
from sklearn.model_selection import cross_val_score
from keras.layers import Conv1D, Conv2D, Dense, BatchNormalization, Flatten, MaxPooling1D, Dropout
from tensorflow.keras.utils import to_categorical
from keras.optimizers import Adam, SGD, RMSprop, Adadelta, Adagrad, Adamax, Nadam, Ftrl
from keras.callbacks import EarlyStopping, ModelCheckpoint
from scikeras.wrappers import KerasClassifier
from math import floor
from sklearn.metrics import make_scorer, accuracy_score
from bayes_opt import BayesianOptimization
from sklearn.model_selection import StratifiedKFold
from keras.layers import LeakyReLU
LeakyReLU = LeakyReLU(negative_slope=0.1)
import warnings

In [3]:
# Turning off warning feature
warnings.filterwarnings('ignore')

# Set display options to show all columns without truncation
pd.set_option('display.max_columns', None)

---------------------------------------------------------------------------------------------------------------------------
## 2. File directory, read the data

In [4]:
path = r'climatewins-ml'

In [5]:
weather = pd.read_csv(os.path.join(path, '02 Data Sets', 'Unsupervised', 'weather-data-cleaned.csv'))
weather

Unnamed: 0,BASEL_cloud_cover,BASEL_global_radiation,BASEL_humidity,BASEL_precipitation,BASEL_pressure,BASEL_sunshine,BASEL_temp_max,BASEL_temp_mean,BASEL_temp_min,BELGRADE_cloud_cover,BELGRADE_global_radiation,BELGRADE_humidity,BELGRADE_precipitation,BELGRADE_pressure,BELGRADE_sunshine,BELGRADE_temp_max,BELGRADE_temp_mean,BELGRADE_temp_min,BUDAPEST_cloud_cover,BUDAPEST_global_radiation,BUDAPEST_humidity,BUDAPEST_precipitation,BUDAPEST_pressure,BUDAPEST_sunshine,BUDAPEST_temp_max,BUDAPEST_temp_mean,BUDAPEST_temp_min,DEBILT_cloud_cover,DEBILT_global_radiation,DEBILT_humidity,DEBILT_precipitation,DEBILT_pressure,DEBILT_sunshine,DEBILT_temp_max,DEBILT_temp_mean,DEBILT_temp_min,DUSSELDORF_cloud_cover,DUSSELDORF_global_radiation,DUSSELDORF_humidity,DUSSELDORF_precipitation,DUSSELDORF_pressure,DUSSELDORF_sunshine,DUSSELDORF_temp_max,DUSSELDORF_temp_mean,DUSSELDORF_temp_min,HEATHROW_cloud_cover,HEATHROW_global_radiation,HEATHROW_humidity,HEATHROW_precipitation,HEATHROW_pressure,HEATHROW_sunshine,HEATHROW_temp_max,HEATHROW_temp_mean,HEATHROW_temp_min,KASSEL_cloud_cover,KASSEL_global_radiation,KASSEL_humidity,KASSEL_precipitation,KASSEL_pressure,KASSEL_sunshine,KASSEL_temp_max,KASSEL_temp_mean,KASSEL_temp_min,LJUBLJANA_cloud_cover,LJUBLJANA_global_radiation,LJUBLJANA_humidity,LJUBLJANA_precipitation,LJUBLJANA_pressure,LJUBLJANA_sunshine,LJUBLJANA_temp_max,LJUBLJANA_temp_mean,LJUBLJANA_temp_min,MAASTRICHT_cloud_cover,MAASTRICHT_global_radiation,MAASTRICHT_humidity,MAASTRICHT_precipitation,MAASTRICHT_pressure,MAASTRICHT_sunshine,MAASTRICHT_temp_max,MAASTRICHT_temp_mean,MAASTRICHT_temp_min,MADRID_cloud_cover,MADRID_global_radiation,MADRID_humidity,MADRID_precipitation,MADRID_pressure,MADRID_sunshine,MADRID_temp_max,MADRID_temp_mean,MADRID_temp_min,MUNCHENB_cloud_cover,MUNCHENB_global_radiation,MUNCHENB_humidity,MUNCHENB_precipitation,MUNCHENB_pressure,MUNCHENB_sunshine,MUNCHENB_temp_max,MUNCHENB_temp_mean,MUNCHENB_temp_min,OSLO_cloud_cover,OSLO_global_radiation,OSLO_humidity,OSLO_precipitation,OSLO_pressure,OSLO_sunshine,OSLO_temp_max,OSLO_temp_mean,OSLO_temp_min,SONNBLICK_cloud_cover,SONNBLICK_global_radiation,SONNBLICK_humidity,SONNBLICK_precipitation,SONNBLICK_pressure,SONNBLICK_sunshine,SONNBLICK_temp_max,SONNBLICK_temp_mean,SONNBLICK_temp_min,STOCKHOLM_cloud_cover,STOCKHOLM_global_radiation,STOCKHOLM_humidity,STOCKHOLM_precipitation,STOCKHOLM_pressure,STOCKHOLM_sunshine,STOCKHOLM_temp_max,STOCKHOLM_temp_mean,STOCKHOLM_temp_min,VALENTIA_cloud_cover,VALENTIA_global_radiation,VALENTIA_humidity,VALENTIA_precipitation,VALENTIA_pressure,VALENTIA_sunshine,VALENTIA_temp_max,VALENTIA_temp_mean,VALENTIA_temp_min
0,7,0.32,0.85,0.09,1.02,0.70,10.90,6.50,0.80,1,0.88,0.81,0.00,1.02,7.00,7.90,3.70,-0.90,4,0.44,0.67,0.01,1.02,2.30,5.10,2.40,-0.40,7,0.07,0.85,0.25,1.00,0.00,11.00,9.30,7.40,8,0.12,0.83,0.08,1.02,0.00,11.50,10.00,7.00,7,0.13,0.91,0.22,1.00,0.00,8.30,10.60,9.40,8,0.28,0.82,0.48,1.01,1.60,9.40,7.90,3.90,8,0.20,1.00,0.00,1.02,0.00,0.50,-0.60,-1.90,7,0.22,0.83,0.32,1.01,1.00,11.10,9.50,8.50,6,0.53,0.92,0.00,1.03,1.40,10.80,7.60,4.40,5,0.20,0.67,0.10,1.03,0.00,10.40,6.90,1.10,8,0.04,0.98,1.14,1.00,0.00,5.90,4.90,3.80,4,0.48,0.73,0.01,1.03,2.30,-3.20,-5.90,-8.50,5,0.05,0.98,0.32,1.01,0.00,4.90,4.20,2.20,5,0.45,0.88,0.34,1.00,4.70,10.90,8.50,6.00
1,6,0.36,0.84,1.05,1.02,1.10,10.10,6.10,3.30,6,0.25,0.84,0.00,1.02,0.00,4.40,2.90,2.20,4,0.18,0.67,0.31,1.02,0.00,3.10,2.30,1.40,8,0.14,0.90,0.06,1.01,0.10,8.30,7.70,6.40,8,0.18,0.89,0.66,1.02,0.50,11.00,8.20,7.40,7,0.13,0.98,0.23,1.01,0.00,10.60,6.10,3.90,6,0.12,0.86,0.27,1.01,0.00,9.10,7.70,6.80,6,0.56,0.94,0.13,1.02,3.20,5.50,2.10,-1.30,8,0.17,0.92,1.34,1.01,0.40,9.90,8.60,7.50,7,0.46,0.86,0.00,1.03,0.90,12.20,9.80,7.40,6,0.61,0.72,0.30,1.03,5.10,10.20,6.20,4.20,8,0.04,0.62,0.00,1.01,0.00,4.90,3.40,2.80,6,0.21,0.97,0.61,1.03,0.00,-8.50,-9.50,-10.50,5,0.05,0.62,0.06,1.01,0.00,5.00,4.00,3.00,7,0.25,0.91,0.84,1.00,0.70,12.10,8.90,5.60
2,8,0.18,0.90,0.30,1.02,0.00,9.90,8.50,5.10,6,0.67,0.77,0.00,1.02,3.50,6.40,3.10,-0.50,4,0.30,0.67,0.00,1.02,0.60,5.30,2.70,1.70,6,0.28,0.92,0.01,1.02,3.00,9.90,6.80,4.60,7,0.12,0.95,0.07,1.02,0.00,9.10,7.10,6.90,8,0.15,0.96,0.07,1.02,0.10,12.20,8.40,6.10,8,0.12,0.91,0.60,1.01,0.00,8.00,6.50,6.00,8,0.20,0.96,0.12,1.02,0.00,6.30,4.60,0.90,7,0.12,0.97,0.46,1.02,0.00,9.90,6.90,5.50,5,0.63,0.90,0.00,1.03,2.30,10.80,8.60,6.40,6,0.20,0.91,0.30,1.03,0.00,8.00,5.80,4.00,8,0.04,0.69,0.08,1.02,0.00,3.10,1.90,0.60,8,0.21,0.93,3.20,1.03,0.00,-8.90,-9.50,-10.00,5,0.05,0.69,0.02,1.01,0.00,4.10,2.40,1.30,7,0.17,0.91,0.08,1.01,0.10,12.90,10.50,8.10
3,3,0.58,0.92,0.00,1.02,4.10,10.60,6.30,3.80,8,0.25,0.93,0.00,1.03,0.00,3.00,2.00,-2.00,4,0.19,0.67,0.00,1.02,0.00,4.40,2.00,0.40,8,0.08,0.95,0.09,1.03,0.00,10.10,6.70,3.60,8,0.12,0.86,0.02,1.02,0.00,8.00,6.80,3.60,8,0.13,0.98,0.00,1.02,0.00,8.90,9.40,6.70,6,0.12,0.87,0.00,1.03,0.00,6.50,5.80,5.20,6,0.49,0.94,0.00,1.02,2.20,7.00,3.20,1.00,7,0.16,0.89,0.00,1.03,0.30,10.00,7.00,3.00,0,1.16,0.75,0.00,1.03,8.70,16.10,10.30,4.50,6,0.20,0.90,0.01,1.04,0.00,5.40,3.90,3.20,8,0.04,0.98,0.35,1.02,0.00,4.90,3.00,0.40,5,0.22,0.93,1.10,1.04,0.00,-10.00,-11.50,-12.90,5,0.05,0.98,0.00,1.01,0.00,2.30,1.20,0.40,7,0.13,0.86,0.98,1.02,0.00,10.60,7.40,7.30
4,6,0.65,0.95,0.14,1.02,5.40,6.00,3.00,-0.70,8,0.25,0.99,0.06,1.03,0.00,2.80,2.00,0.70,4,0.19,0.67,0.00,1.02,0.00,5.30,2.50,1.10,6,0.04,0.90,0.39,1.02,0.00,11.20,8.00,2.40,7,0.12,0.92,0.62,1.02,0.00,11.00,7.70,6.20,5,0.30,0.84,0.00,1.03,2.10,7.20,8.90,8.90,7,0.13,0.86,0.71,1.03,0.00,6.00,5.40,3.70,7,0.20,0.94,0.00,1.02,0.00,4.80,3.60,0.40,7,0.12,0.92,0.56,1.03,0.00,11.10,8.10,2.50,2,1.10,0.64,0.00,1.03,7.80,16.00,12.10,8.20,5,0.65,0.85,0.96,1.04,5.60,6.00,1.80,-3.00,8,0.05,0.96,0.26,1.01,0.00,4.90,3.70,2.90,2,0.72,0.75,0.01,1.04,6.10,-6.50,-9.30,-12.00,5,0.05,0.96,1.32,1.01,0.00,4.30,3.30,0.80,3,0.46,0.80,0.00,1.03,5.70,8.40,5.70,3.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22945,1,1.34,0.79,0.22,1.02,7.70,21.40,15.90,11.40,2,1.57,0.68,0.18,1.03,5.70,24.40,18.20,12.10,4,1.41,0.67,0.14,1.02,5.40,16.20,11.70,7.90,8,1.13,0.84,0.22,1.02,2.80,19.40,15.70,12.80,8,1.13,0.75,0.20,1.02,6.40,21.40,17.80,13.60,5,1.18,0.87,0.16,1.01,1.90,18.90,16.40,11.90,4,1.14,0.77,0.19,1.02,4.00,13.10,9.10,5.40,4,1.35,0.80,0.37,1.03,5.90,21.10,14.70,12.10,8,1.17,0.67,0.20,1.02,5.30,22.60,18.60,14.10,8,1.89,0.52,0.12,1.02,5.30,23.90,20.00,16.20,2,1.37,0.76,0.26,1.03,9.70,22.20,14.30,8.30,8,1.06,0.98,0.21,1.01,0.10,12.00,9.70,5.80,2,1.56,0.84,0.47,1.03,4.70,2.60,0.60,-1.40,5,1.11,0.98,0.14,1.02,3.20,14.20,11.50,8.20,5,1.13,0.82,0.41,1.01,3.40,13.50,10.70,7.90
22946,6,1.34,0.77,0.22,1.02,5.40,21.90,16.70,14.30,0,1.57,0.68,0.18,1.03,5.70,21.20,15.90,10.60,4,1.41,0.67,0.14,1.02,5.40,16.20,11.70,7.90,8,1.13,0.84,0.22,1.02,3.50,20.50,16.00,10.30,7,1.13,0.71,0.20,1.02,4.90,23.90,19.40,15.40,4,1.18,0.82,0.16,1.02,4.20,21.80,15.80,12.70,3,1.14,0.77,0.19,1.02,4.00,13.10,9.10,5.40,3,1.35,0.82,0.37,1.03,4.50,19.80,12.90,9.80,7,1.17,0.70,0.20,1.02,5.00,23.50,18.90,15.80,8,1.89,0.51,0.12,1.02,3.90,23.50,19.10,14.70,6,1.37,0.70,0.26,1.03,7.70,26.10,16.10,8.90,8,1.06,1.00,0.21,1.01,0.00,11.70,10.90,8.80,5,1.56,0.84,0.47,1.03,4.70,4.00,2.30,0.60,5,1.11,1.00,0.14,1.01,0.80,14.30,12.50,11.00,5,1.13,0.82,0.41,1.01,3.40,13.50,10.70,7.90
22947,4,1.34,0.76,0.22,1.02,6.10,22.40,16.70,13.10,2,1.57,0.68,0.18,1.03,5.70,18.20,13.40,8.60,4,1.41,0.67,0.14,1.02,5.40,16.20,11.70,7.90,8,1.13,0.86,0.22,1.02,3.30,21.10,15.80,9.30,8,1.13,0.73,0.20,1.02,4.00,22.00,18.20,13.40,7,1.18,0.85,0.16,1.01,4.20,17.00,16.50,11.20,3,1.14,0.77,0.19,1.02,4.00,13.10,9.10,5.40,3,1.35,0.81,0.37,1.03,5.10,20.70,13.20,10.20,8,1.17,0.69,0.20,1.02,3.20,24.30,18.20,13.70,8,1.89,0.46,0.12,1.02,8.10,22.60,19.00,15.40,7,1.37,0.64,0.26,1.03,6.80,26.20,17.40,11.20,3,1.06,0.85,0.21,1.01,6.80,14.20,9.70,7.70,3,1.56,0.84,0.47,1.03,4.70,4.50,3.30,2.10,5,1.11,0.85,0.14,1.01,6.90,14.40,13.10,12.10,5,1.13,0.82,0.41,1.01,3.40,13.50,10.70,7.90
22948,5,1.34,0.80,0.22,1.02,5.80,21.10,15.40,11.60,1,1.57,0.68,0.18,1.02,5.70,20.90,15.00,9.10,4,1.41,0.67,0.14,1.02,5.40,16.20,11.70,7.90,8,1.13,0.87,0.22,1.02,6.00,20.20,14.40,10.30,7,1.13,0.73,0.20,1.02,6.90,21.10,16.70,11.90,5,1.18,0.86,0.16,1.01,0.60,17.50,15.20,13.40,3,1.14,0.77,0.19,1.02,4.00,13.10,9.10,5.40,3,1.35,0.77,0.37,1.02,5.70,23.10,14.00,10.00,8,1.17,0.73,0.20,1.02,6.80,21.40,16.30,12.80,5,1.89,0.66,0.12,1.02,3.10,18.30,15.70,13.10,6,1.37,0.75,0.26,1.03,8.30,23.50,14.50,9.20,5,1.06,0.94,0.21,1.01,2.90,8.10,5.90,2.10,3,1.56,0.84,0.47,1.03,4.70,4.10,3.40,2.70,5,1.11,0.94,0.14,1.02,8.40,12.40,7.50,5.10,5,1.13,0.82,0.41,1.01,3.40,13.50,10.70,7.90


In [6]:
outcomes = pd.read_csv(os.path.join(path, '02 Data Sets', 'Unsupervised', 'outcomes-cleaned.csv'))
outcomes

Unnamed: 0,BASEL_pleasant_weather,BELGRADE_pleasant_weather,BUDAPEST_pleasant_weather,DEBILT_pleasant_weather,DUSSELDORF_pleasant_weather,HEATHROW_pleasant_weather,KASSEL_pleasant_weather,LJUBLJANA_pleasant_weather,MAASTRICHT_pleasant_weather,MADRID_pleasant_weather,MUNCHENB_pleasant_weather,OSLO_pleasant_weather,SONNBLICK_pleasant_weather,STOCKHOLM_pleasant_weather,VALENTIA_pleasant_weather
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22945,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
22946,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
22947,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
22948,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [28]:
outcomes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22950 entries, 0 to 22949
Data columns (total 15 columns):
 #   Column                       Non-Null Count  Dtype
---  ------                       --------------  -----
 0   BASEL_pleasant_weather       22950 non-null  int64
 1   BELGRADE_pleasant_weather    22950 non-null  int64
 2   BUDAPEST_pleasant_weather    22950 non-null  int64
 3   DEBILT_pleasant_weather      22950 non-null  int64
 4   DUSSELDORF_pleasant_weather  22950 non-null  int64
 5   HEATHROW_pleasant_weather    22950 non-null  int64
 6   KASSEL_pleasant_weather      22950 non-null  int64
 7   LJUBLJANA_pleasant_weather   22950 non-null  int64
 8   MAASTRICHT_pleasant_weather  22950 non-null  int64
 9   MADRID_pleasant_weather      22950 non-null  int64
 10  MUNCHENB_pleasant_weather    22950 non-null  int64
 11  OSLO_pleasant_weather        22950 non-null  int64
 12  SONNBLICK_pleasant_weather   22950 non-null  int64
 13  STOCKHOLM_pleasant_weather   22950 non-null  i

---------------------------------------------------------------------------------------------------------------------------
## 3. Data preprocessing

In [7]:
# List of weather stations prefixes/ need help defining these as we have 135 total observations of 15 weather station groups
#by sets of 9 observations per group.
observations = [
    'BASEL_',
    'BELGRADE_',
    'BUDAPEST_',
    'DEBILT_',
    'DUSSELDORF_',
    'HEATHROW_',
    'KASSEL_',
    'LJUBLJANA_',
    'MAASTRICHT_',
    'MADRID_',
    'MUNCHENB_',
    'OSLO_',
    'SONNBLICK_',
    'STOCKHOLM_',
    'VALENTIA_'
]

In [8]:
# Dictionary mapping outcomes to weather stations for confusion matrix
labels = {
    0: 'BASEL_pleasant_weather',
    1: 'BELGRADE_pleasant_weather',
    2: 'BUDAPEST_pleasant_weather',
    3: 'DEBILT_pleasant_weather',
    4: 'DUSSELDORF_pleasant_weather',
    5: 'HEATHROW_pleasant_weather',
    6: 'KASSEL_pleasant_weather',
    7: 'LJUBLJANA_pleasant_weather',
    8: 'MAASTRICHT_pleasant_weather',
    9: 'MADRID_pleasant_weather',
    10: 'MUNCHENB_pleasant_weather',
    11: 'OSLO_pleasant_weather',
    12: 'SONNBLICK_pleasant_weather',
    13: 'STOCKHOLM_pleasant_weather',
    14: 'VALENTIA_pleasant_weather'
}

In [9]:
# Load data, process it, and format it appropriately for training a machine learning model.

In [15]:
# Drop rows containing NaN values in y dataset
#y_cleaned = outcomes[~np.isnan(outcomes).any(axis=1)]
outcomes[~np.isnan(outcomes).any(axis=1)]
# Check the shape of the cleaned y dataset
#print("Shape of cleaned y dataset:", y_cleaned.shape)

Unnamed: 0,BASEL_pleasant_weather,BELGRADE_pleasant_weather,BUDAPEST_pleasant_weather,DEBILT_pleasant_weather,DUSSELDORF_pleasant_weather,HEATHROW_pleasant_weather,KASSEL_pleasant_weather,LJUBLJANA_pleasant_weather,MAASTRICHT_pleasant_weather,MADRID_pleasant_weather,MUNCHENB_pleasant_weather,OSLO_pleasant_weather,SONNBLICK_pleasant_weather,STOCKHOLM_pleasant_weather,VALENTIA_pleasant_weather
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22945,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
22946,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
22947,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
22948,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [16]:
# Reshape X dataset
X = weather.values.reshape(-1, 15, 9)

# Reshape y dataset 
y = y_cleaned.values.reshape(-1, 15)

# Turn into 1D array
###y = np.argmax(outcomes, axis = 1)

X_train, X_test, y2_train, y2_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [17]:
print(X_train.shape, y2_train.shape)
print(X_test.shape, y2_test.shape)

(18360, 15, 9) (18360, 15)
(4590, 15, 9) (4590, 15)


In [18]:
X_train

array([[[ 7.  ,  1.41,  0.72, ..., 25.8 , 21.2 , 17.6 ],
        [ 3.  ,  2.76,  0.49, ..., 28.3 , 23.  , 13.8 ],
        [ 7.  ,  1.2 ,  0.64, ..., 23.2 , 19.5 , 15.5 ],
        ...,
        [ 7.  ,  1.65,  0.88, ...,  5.3 ,  3.5 ,  1.7 ],
        [ 6.  ,  1.8 ,  0.65, ..., 19.7 , 15.1 , 12.3 ],
        [ 7.  ,  1.39,  0.88, ..., 18.8 , 16.  , 13.2 ]],

       [[ 2.  ,  3.23,  0.62, ..., 25.6 , 18.5 , 10.7 ],
        [ 0.  ,  3.38,  0.48, ..., 24.  , 19.  , 14.  ],
        [ 3.  ,  2.96,  0.52, ..., 23.2 , 18.3 , 12.8 ],
        ...,
        [ 2.  ,  3.76,  0.87, ...,  5.8 ,  0.9 , -4.  ],
        [ 4.  ,  2.7 ,  0.6 , ..., 21.  , 15.6 , 11.7 ],
        [ 6.  ,  1.56,  0.91, ..., 16.1 , 14.2 , 12.4 ]],

       [[ 5.  ,  2.38,  0.84, ..., 25.9 , 18.3 , 12.4 ],
        [ 2.  ,  2.73,  0.68, ..., 28.3 , 22.7 , 18.5 ],
        [ 2.  ,  2.94,  0.5 , ..., 30.  , 23.9 , 17.8 ],
        ...,
        [ 5.  ,  3.02,  0.74, ...,  8.4 ,  4.3 ,  0.2 ],
        [ 5.  ,  2.89,  0.54, ..., 16.7 , 12.

In [19]:
len(X_train[0])

15

In [20]:
len(X_train[0][0])

9

In [21]:
y2_train

array([[0, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       ...,
       [0, 1, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 0, 0]], dtype=int64)

In [22]:
from sklearn.utils.multiclass import type_of_target
type_of_target(y2_train)

'multilabel-indicator'

In [23]:
#Use argmax to get rid of one-hot encoding and supply the numerical value.
y_train = np.argmax(y2_train, axis = 1)
print(y_train.shape)
y_train

#Turn y_test into one-hot format
#y_train=np_utils.to_categorical(y_test)
#print(y_train)

(18360,)


array([1, 0, 2, ..., 1, 9, 1], dtype=int64)

In [24]:
type_of_target(y_train)

'multiclass'

---------------------------------------------------------------------------------------------------------------------------
## 4. Bayesian optimization function

In [25]:
timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = 15 #_count_classes(y_train)
# Make scorer accuracy
score_acc = make_scorer(accuracy_score)

In [26]:
# Create function
def bay_area(neurons, activation, kernel, optimizer, learning_rate, batch_size, epochs,
              layers1, layers2, normalization, dropout, dropout_rate): 
    optimizerL = ['SGD', 'Adam', 'RMSprop', 'Adadelta', 'Adagrad', 'Adamax', 'Nadam', 'Ftrl','SGD']
    optimizerD= {'Adam':Adam(learning_rate=learning_rate), 'SGD':SGD(learning_rate=learning_rate),
                 'RMSprop':RMSprop(learning_rate=learning_rate), 'Adadelta':Adadelta(learning_rate=learning_rate),
                 'Adagrad':Adagrad(learning_rate=learning_rate), 'Adamax':Adamax(learning_rate=learning_rate),
                 'Nadam':Nadam(learning_rate=learning_rate), 'Ftrl':Ftrl(learning_rate=learning_rate)}
    activationL = ['relu', 'sigmoid', 'softplus', 'softsign', 'tanh', 'selu',
                   'elu', 'exponential', LeakyReLU,'relu']
    
    neurons = round(neurons)
    kernel = round(kernel)
    activation = activationL[round(activation)]
    optimizer = optimizerD[optimizerL[round(optimizer)]]
    batch_size = round(batch_size)
    
    epochs = round(epochs)
    layers1 = round(layers1)
    layers2 = round(layers2)
    
    def cnn_model():
        model = Sequential()
        model.add(Conv1D(neurons, kernel_size=kernel,activation=activation, input_shape=(timesteps, input_dim)))
        #model.add(Conv1D(32, kernel_size=1,activation='relu', input_shape=(timesteps, input_dim)))
        
        if normalization > 0.5:
            model.add(BatchNormalization())
        for i in range(layers1):
            model.add(Dense(neurons, activation=activation)) #(neurons, activation=activation))
        if dropout > 0.5:
            model.add(Dropout(dropout_rate, seed=123))
        for i in range(layers2):
            model.add(Dense(neurons, activation=activation))
        model.add(MaxPooling1D())
        model.add(Flatten())
        model.add(Dense(n_classes, activation='softmax')) #sigmoid softmax
        #model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) #categorical_crossentropy
        model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) #categorical_crossentropy
        return model
    es = EarlyStopping(monitor='accuracy', mode='max', verbose=2, patience=20)
    nn = KerasClassifier(build_fn=cnn_model, epochs=epochs, batch_size=batch_size, verbose=2)
    kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=123)
    score = cross_val_score(nn, X_train, y_train, scoring=score_acc, cv=kfold, fit_params={'callbacks':[es]}).mean()
    return score

In [27]:
start = time.time()
params ={
    'neurons': (10, 50),            # Reduced upper limit
    'kernel': (1, 3),
    'activation':(0, 9), 
    'optimizer':(0,7), 
    'learning_rate':(0.001, 0.1),   # Reduced range
    'batch_size': (200, 500),       # Reduced upper limit
    'epochs':(20, 50),              # Reduced upper limit
    'layers1':(1,2),                # Simplified to 1 or 2 layers
    'layers2':(1,2),                # Simplified to 1 or 2 layers
    'normalization':(0,1),
    'dropout':(0,1),
    'dropout_rate':(0,0.3)
}
# Run Bayesian Optimization
nn_opt = BayesianOptimization(bay_area, params, random_state=42)
nn_opt.maximize(init_points=15, n_iter=100)
print('Search took %s minutes' % ((time.time() - start)/60))

|   iter    |  target   | activa... | batch_... |  dropout  | dropou... |  epochs   |  kernel   |  layers1  |  layers2  | learni... |  neurons  | normal... | optimizer |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Epoch 1/25
31/31 - 3s - 96ms/step - accuracy: 0.6228 - loss: 1.6613
Epoch 2/25
31/31 - 1s - 16ms/step - accuracy: 0.6433 - loss: 1.1927
Epoch 3/25
31/31 - 1s - 17ms/step - accuracy: 0.6433 - loss: 1.1770
Epoch 4/25
31/31 - 0s - 15ms/step - accuracy: 0.6433 - loss: 1.1712
Epoch 5/25
31/31 - 0s - 13ms/step - accuracy: 0.6433 - loss: 1.1687
Epoch 6/25
31/31 - 0s - 15ms/step - accuracy: 0.6433 - loss: 1.1671
Epoch 7/25
31/31 - 0s - 16ms/step - accuracy: 0.6433 - loss: 1.1663
Epoch 8/25
31/31 - 1s - 17ms/step - accuracy: 0.6433 - loss: 1.1658
Epoch 9/25
31/31 - 1s - 17ms/step - accuracy: 0.6433 - loss: 1.1654
Epoch 10/25
31/31 - 0s - 15ms/step - accuracy: 0

ValueError: Input y contains NaN.

In [29]:
# Display the best parameters
optimum = nn_opt.max['params']
learning_rate = optimum['learning_rate']
activationL = ['relu', 'sigmoid', 'softplus', 'softsign', 'tanh', 'selu',
               'elu', 'exponential', LeakyReLU,'relu']
optimum['activation'] = activationL[round(optimum['activation'])]
optimum['batch_size'] = round(optimum['batch_size']) 
optimum['epochs'] = round(optimum['epochs'])
optimum['layers1'] = round(optimum['layers1'])
optimum['layers2'] = round(optimum['layers2'])
optimum['neurons'] = round(optimum['neurons'])
optimizerL = ['Adam', 'SGD', 'RMSprop', 'Adadelta', 'Adagrad', 'Adamax', 'Nadam', 'Ftrl','Adam']
optimizerD= {'Adam':Adam(learning_rate=learning_rate), 'SGD':SGD(learning_rate=learning_rate),
             'RMSprop':RMSprop(learning_rate=learning_rate), 'Adadelta':Adadelta(learning_rate=learning_rate),
             'Adagrad':Adagrad(learning_rate=learning_rate), 'Adamax':Adamax(learning_rate=learning_rate),
             'Nadam':Nadam(learning_rate=learning_rate), 'Ftrl':Ftrl(learning_rate=learning_rate)}
optimum['optimizer'] = optimizerD[optimizerL[round(optimum['optimizer'])]]
optimum

{'activation': 'softsign',
 'batch_size': 485,
 'dropout': 0.7319939418114051,
 'dropout_rate': 0.17959754525911098,
 'epochs': 25,
 'kernel': 1.3119890406724053,
 'layers1': 1,
 'layers2': 2,
 'learning_rate': 0.06051038616257767,
 'neurons': 38,
 'normalization': 0.020584494295802447,
 'optimizer': <keras.src.optimizers.ftrl.Ftrl at 0x2432775c450>}

**Optimum parameters:**

|   iter    |  target   | activa... | batch_... |  dropout  | dropou... |  epochs   |  kernel   |  layers1  |  layers2  | learni... |  neurons  | normal... | optimizer |

**last used params**

| 1         | nan       | 3.371     | 485.2     | 0.732     | 0.1796    | 24.68     | 1.312     | 1.058     | 1.866     | 0.06051   | 38.32     | 0.02058   | 6.789     |

**best params**

| 3         | nan       | 4.105     | 435.6     | 0.1997    | 0.1543    | 37.77     | 1.093     | 1.608     | 1.171     | 0.00744   | 47.96     | 0.9656    | 5.659     |

---------------------------------------------------------------------------------------------------------------------------
## 5. Build and run CNN keras model

In [30]:
# Adjust model hyperparameters
epochs = 38
batch_size = 435
#n_hidden = 256

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = 15 #_count_classes(y_train)
layers1 = 2
layers2 = 1
learning_rate = 0.00744
activation = 'softsign'
kernel = 1
neurons = 48
normalization = 0.9656
dropout = 0.1997
dropout_rate = 0.1543
optimizer = 'Ftrl'

# Implement complex layers
model = Sequential()
model.add(Conv1D(neurons, kernel_size=kernel, activation=activation, input_shape=(timesteps, input_dim)))
if normalization > 0.5:
    model.add(BatchNormalization())
for i in range(layers1):
    model.add(Dense(neurons, activation=activation))
if dropout > 0.5:
    model.add(Dropout(dropout_rate, seed=123))
for i in range(layers2):
    model.add(Dense(neurons, activation=activation))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='softmax')) #softmax sigmoid

# Build the model
model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) #binary_crossentropy

In [31]:
# Run the model
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/38
43/43 - 4s - 89ms/step - accuracy: 0.6282 - loss: 2.7057
Epoch 2/38
43/43 - 1s - 16ms/step - accuracy: 0.6433 - loss: 2.6924
Epoch 3/38
43/43 - 1s - 17ms/step - accuracy: 0.6433 - loss: 2.6862
Epoch 4/38
43/43 - 1s - 32ms/step - accuracy: 0.6433 - loss: 2.6811
Epoch 5/38
43/43 - 1s - 16ms/step - accuracy: 0.6433 - loss: 2.6766
Epoch 6/38
43/43 - 1s - 16ms/step - accuracy: 0.6433 - loss: 2.6725
Epoch 7/38
43/43 - 1s - 16ms/step - accuracy: 0.6433 - loss: 2.6686
Epoch 8/38
43/43 - 1s - 16ms/step - accuracy: 0.6433 - loss: 2.6650
Epoch 9/38
43/43 - 1s - 15ms/step - accuracy: 0.6433 - loss: 2.6614
Epoch 10/38
43/43 - 1s - 15ms/step - accuracy: 0.6433 - loss: 2.6577
Epoch 11/38
43/43 - 1s - 16ms/step - accuracy: 0.6433 - loss: 2.6539
Epoch 12/38
43/43 - 1s - 15ms/step - accuracy: 0.6433 - loss: 2.6498
Epoch 13/38
43/43 - 1s - 15ms/step - accuracy: 0.6433 - loss: 2.6453
Epoch 14/38
43/43 - 1s - 14ms/step - accuracy: 0.6433 - loss: 2.6400
Epoch 15/38
43/43 - 1s - 14ms/step - accura

<keras.src.callbacks.history.History at 0x2432c150dd0>

- **Notes:**
- In the optimum parameters the highest accuracy score was around 97% with loss converging to a minimum but these optimal values were not printed out due to stop  iteration.
  
- Lower loss but stagnant accuracy can indicate **overfitting** of the data.

- Stop iteration occurs for init_point=3 and n_iterations=100 as well; optimum values are shown and losses are not all NANS.
- The input data shape is correct for both data sets.

---------------------------------------------------------------------------------------------------------------------------
## 6. Run confusion matrix
        - check accuracy and loss

In [32]:
def confusion_matrix(Y_true, Y_pred):
    Y_true = pd.Series([labels[y] for y in np.argmax(Y_true, axis=1)])
    Y_pred = pd.Series([labels[y] for y in np.argmax(Y_pred, axis=1)])

    return pd.crosstab(Y_true, Y_pred, rownames=['True'], colnames=['Pred'])

In [33]:
# Evaluate
print(confusion_matrix(y2_test, model.predict(X_test)))

[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
Pred                         BASEL_pleasant_weather
True                                               
BASEL_pleasant_weather                         2955
BELGRADE_pleasant_weather                       879
BUDAPEST_pleasant_weather                       162
DEBILT_pleasant_weather                          64
DUSSELDORF_pleasant_weather                      25
HEATHROW_pleasant_weather                        67
KASSEL_pleasant_weather                           9
LJUBLJANA_pleasant_weather                       46
MAASTRICHT_pleasant_weather                       7
MADRID_pleasant_weather                         360
MUNCHENB_pleasant_weather                         8
OSLO_pleasant_weather                             4
STOCKHOLM_pleasant_weather                        3
VALENTIA_pleasant_weather                         1


In [34]:
model.summary()

---------------------------------------------------------------------------------------------------------------------------