# Setup enviorment

In [1]:
# Data reading in Dataframe format and data preprocessing
import pandas as pd
from pandas import read_csv
from pandas import DataFrame
from pandas import concat

# Data Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Linear algebra operations
import numpy as np

# Machine learning models and preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn import preprocessing

# Deep learning
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import Sequential, layers, callbacks
from tensorflow.keras.layers import Dense, LSTM, Dropout, GRU, Bidirectional

# Epiweek
from epiweeks import Week, Year

# Date
from datetime import date as convert_to_date

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
embeddings = 'Embeddings/embeddings_autoencoder_1024features.csv'
labels = 'Tabular_data/Label_CSV_All_Municipality.csv'
MUNICIPALITY = 'Ibagué'

# Read Data

In [4]:
def epiweek_from_date(image_date):
    date = image_date.split('-')
    
    # Get year as int
    year = ''.join(filter(str.isdigit, date[0]))
    year = int(year)
    
    # Get month as int
    month = ''.join(filter(str.isdigit, date[1]))
    month = int(month)
    
    # Get day as int
    day = ''.join(filter(str.isdigit, date[2]))
    day = int(day)
    
    # Get epiweek:
    date = convert_to_date(year, month, day)
    epiweek = str(Week.fromdate(date))
    epiweek = int(epiweek)
    
    return epiweek

### 1. Features

In [5]:
def read_features(path, Municipality = None):
    df = pd.read_csv(path)
    #df.Date = pd.to_datetime(df.Date)
    
    if Municipality:
        print('Obtaining dataframe for the city of Medellin only...')
        df = df[df['Municipality Code'] == Municipality]
        
    df.Date = df.Date.apply(epiweek_from_date)
    
    df = df.sort_values(by=['Date'])
    
    df = df.set_index('Date')
    
    if Municipality:
        df.drop(columns=['Municipality Code'], inplace=True)
        
    df.index.name = None
    return df

In [6]:
features_df = read_features(path=embeddings, Municipality=MUNICIPALITY)
features_df

Obtaining dataframe for the city of Medellin only...


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
201544,0.0,0.000000,0.0,0.0,6.194829,0.0,29.783459,0.0,29.06569,14.480082,...,0.000000,182.28792,0.0,6.770391,0.0,0.0,3.103770,22.228394,14.962168,0.0
201545,0.0,0.000000,0.0,0.0,6.194829,0.0,29.783459,0.0,29.06569,14.480082,...,0.000000,182.28792,0.0,6.770391,0.0,0.0,3.103770,22.228394,14.962168,0.0
201546,0.0,0.000000,0.0,0.0,6.194829,0.0,29.783459,0.0,29.06569,14.480082,...,0.000000,182.28792,0.0,6.770391,0.0,0.0,3.103770,22.228394,14.962168,0.0
201547,0.0,0.000000,0.0,0.0,6.194829,0.0,29.783459,0.0,29.06569,14.480082,...,0.000000,182.28792,0.0,6.770391,0.0,0.0,3.103770,22.228394,14.962168,0.0
201548,0.0,0.000000,0.0,0.0,6.194829,0.0,29.783459,0.0,29.06569,14.480082,...,0.000000,182.28792,0.0,6.770391,0.0,0.0,3.103770,22.228394,14.962168,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201848,0.0,162.968550,0.0,0.0,0.000000,0.0,0.000000,0.0,0.00000,0.000000,...,0.000000,483.99040,0.0,0.000000,0.0,0.0,37.917923,194.997240,0.000000,0.0
201849,0.0,98.883835,0.0,0.0,0.000000,0.0,0.000000,0.0,0.00000,0.000000,...,60.541855,93.76406,0.0,0.000000,0.0,0.0,31.353489,95.447860,0.000000,0.0
201850,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.00000,0.000000,...,0.000000,393.65164,0.0,0.000000,0.0,0.0,0.000000,47.705810,0.000000,0.0
201851,0.0,0.000000,0.0,0.0,20.526907,0.0,49.732930,0.0,48.92910,23.647943,...,0.000000,300.74010,0.0,14.985338,0.0,0.0,2.885499,10.678081,32.098557,0.0


### 2. Labels

In [7]:
def get_epiweek(name):
    
    # Get week
    week = name.split('/')[1]
    week = week.replace('w','')
    week = int(week)
    
    # Year
    year = name.split('/')[0]
    year = int(year)
    
    epiweek = Week(year, week)
    
    epiweek = str(epiweek)
    epiweek = int(epiweek)

    return epiweek

In [8]:
def read_labels(path, Municipality = None):
    df = pd.read_csv(path)
    if df.shape[1] > 678:
        df = pd.concat([df[['Municipality code', 'Municipality']], df.iloc[:,-676:]], axis=1)
        cols = df.iloc[:, 2:].columns
        new_cols = df.iloc[:, 2:].columns.to_series().apply(get_epiweek)
        df = df.rename(columns=dict(zip(cols, new_cols))) 
        
    if 'Label_CSV_All_Municipality' in path:
        # Get Columns
        df = df[['epiweek', 'Municipality code', 'Municipality', 'final_cases_label']]
        
        # change epiweek format
        df.epiweek = df.epiweek.apply(get_epiweek)
        
        # Remove duplicates
        df = df[df.duplicated(['epiweek','Municipality code','Municipality']) == False]
        
        # Replace Increase, decrease, stable to numerical:
        """
        - Stable = 0
        - Increased = 1 
        - Decreased = 2
        """
        df.final_cases_label = df.final_cases_label.replace({'Stable': 0, 'Increased': 1, 'Decreased': 2})
        
        # Create table
        df = df.pivot(index=['Municipality code', 'Municipality'], columns='epiweek', values='final_cases_label')

        # Reset Index:
        df = df.reset_index()
    
    if Municipality:
        df = df[df['Municipality'] == Municipality]
        df.drop(columns=['Municipality code'], inplace=True)
        df.rename(columns={'Municipality': 'Municipality Code'}, inplace=True)
    
        df = df.set_index('Municipality Code')
        df = df.T

        df.columns.name = None
        df.index.name = None
        
        df.columns = ['Labels']
    
    return df

In [9]:
labels_df = read_labels(path=labels, Municipality=MUNICIPALITY)
labels_df_orig = labels_df
labels_df = pd.get_dummies(labels_df['Labels'])
labels_df

Unnamed: 0,0,1,2
201601,1,0,0
201602,1,0,0
201603,0,1,0
201604,0,0,1
201605,0,1,0
...,...,...,...
201848,0,1,0
201849,0,0,1
201850,1,0,0
201851,1,0,0


# Data preparation

In [10]:
n_labels = labels_df.shape[1]

In [11]:
# Merge the two dataframes based on the date values
dengue_df = features_df.merge(labels_df, how='inner', left_index=True, right_index=True)
dengue_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1017,1018,1019,1020,1021,1022,1023,0.1,1.1,2.1
201601,0.0,128.888630,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,...,0.000000,0.0,0.0,64.878235,52.031696,0.000000,0.0,1,0,0
201602,0.0,128.888630,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,...,0.000000,0.0,0.0,64.878235,52.031696,0.000000,0.0,1,0,0
201603,0.0,32.973410,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,...,0.000000,0.0,0.0,12.834430,16.112963,0.000000,0.0,0,1,0
201604,0.0,124.197480,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,...,0.000000,0.0,0.0,27.064438,17.626286,0.000000,0.0,0,0,1
201605,0.0,0.000000,0.0,0.0,7.445062,0.0,32.734035,0.0,30.212196,19.450012,...,8.168530,0.0,0.0,12.651873,23.180506,15.441554,0.0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201848,0.0,162.968550,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,...,0.000000,0.0,0.0,37.917923,194.997240,0.000000,0.0,0,1,0
201849,0.0,98.883835,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,...,0.000000,0.0,0.0,31.353489,95.447860,0.000000,0.0,0,0,1
201850,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,...,0.000000,0.0,0.0,0.000000,47.705810,0.000000,0.0,1,0,0
201851,0.0,0.000000,0.0,0.0,20.526907,0.0,49.732930,0.0,48.929100,23.647943,...,14.985338,0.0,0.0,2.885499,10.678081,32.098557,0.0,1,0,0


### Train Test split

In [12]:
def train_test_split(df, train_percentage = 80):
    # We need a sequence so we can't split randomly
    # To divide into Train and test we have to calculate the train percentage of the dataset:
    size = df.shape[0]
    split = int(size*(train_percentage/100))
    
    """ Train """
    # We will train with 1st percentage % of data and test with the rest
    train_df = df.iloc[:split,:] ## percentage % train
    
    """ Test """
    test_df = df.iloc[split:,:] # 100 - percentage % test
    
    print(f'The train shape is: {train_df.shape}')
    print(f'The test shape is: {test_df.shape}')
    
    return train_df, test_df

In [13]:
train_df, test_df = train_test_split(dengue_df, train_percentage = 80)

The train shape is: (124, 1027)
The test shape is: (32, 1027)


### Normalize features

In [14]:
# Normalize train data and create the scaler
def normalize_train_features(df, feature_range=(-1, 1), n_labels=None):
    
    if n_labels:
        n_features = df.shape[1] - n_labels
    
    scalers = {}
    # For each column in the dataframe
    for i, column in enumerate(df.columns):
        if n_labels:
            if i >= n_features:
                break
        # Get values of the column
        values = df[column].values.reshape(-1,1)
        # Generate a new scaler
        scaler = MinMaxScaler(feature_range=feature_range)
        # Fit the scaler just for that column
        scaled_column = scaler.fit_transform(values)
        # Add the scaled column to the dataframe
        scaled_column = np.reshape(scaled_column, len(scaled_column))
        df[column] = scaled_column
        
        # Save the scaler of the column
        scalers['scaler_' + column] = scaler
        
    print(f' Min values are: ')
    print(df.min())
    print(f' Max values are: ')
    print(df.max())
        
    return df, scalers


""" If you want to use the same scaler used in train, you can use this function"""
def normalize_test_features(df, scalers=None, n_labels=None):
    
    if not scalers:
        raise TypeError("You should provide a list of scalers.")
    
    if n_labels:
        n_features = df.shape[1] - n_labels
    
    for i, column in enumerate(df.columns):
        if n_labels:
            if i >= n_features:
                break
        # Get values of the column
        values = df[column].values.reshape(-1,1)
        # Take the scaler of that column
        scaler = scalers['scaler_' + column]
        # Scale values
        scaled_column = scaler.transform(values)
        scaled_column = np.reshape(scaled_column,len(scaled_column))
        # Add the scaled values to the df
        df[column] = scaled_column
        
    print(f' Min values are: ')
    print(df.min())
    print(f' Max values are: ')
    print(df.max())
        
    return df 

In [15]:
feature_range = (-1, 1)

# Scale train:
train_df, scalers = normalize_train_features(train_df, feature_range=feature_range, n_labels=n_labels)

#print(f'The scalers are: {scalers}')

train_df.head()

 Min values are: 
0      -1.0
1      -1.0
2      -1.0
3      -1.0
4      -1.0
       ... 
1022   -1.0
1023   -1.0
0       0.0
1       0.0
2       0.0
Length: 1027, dtype: float64
 Max values are: 
0      -1.0
1       1.0
2      -1.0
3      -1.0
4       1.0
       ... 
1022    1.0
1023   -1.0
0       1.0
1       1.0
2       1.0
Length: 1027, dtype: float64


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1017,1018,1019,1020,1021,1022,1023,0.1,1.1,2.1
201601,-1.0,0.325929,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-0.028346,-0.400411,-1.0,-1.0,1,0,0
201602,-1.0,0.325929,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-0.028346,-0.400411,-1.0,-1.0,1,0,0
201603,-1.0,-0.660789,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-0.807784,-0.814322,-1.0,-1.0,0,1,0
201604,-1.0,0.277669,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-0.594667,-0.796883,-1.0,-1.0,0,0,1
201605,-1.0,-1.0,-1.0,-1.0,-0.180936,-1.0,0.297773,-1.0,0.195502,0.466559,...,-0.045788,-1.0,-1.0,-0.810518,-0.732879,-0.032108,-1.0,0,1,0


In [16]:
# Scale test:
test_df = normalize_test_features(test_df, scalers=scalers, n_labels=n_labels)
test_df.head()

 Min values are: 
0      -1.0
1      -1.0
2      -1.0
3      -1.0
4      -1.0
       ... 
1022   -1.0
1023   -1.0
0       0.0
1       0.0
2       0.0
Length: 1027, dtype: float64
 Max values are: 
0      -1.000000
1       0.676522
2      -1.000000
3      -1.000000
4       1.258256
          ...   
1022    1.011970
1023   -1.000000
0       1.000000
1       1.000000
2       1.000000
Length: 1027, dtype: float64


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1017,1018,1019,1020,1021,1022,1023,0.1,1.1,2.1
201821,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-0.867197,-0.637486,-1.0,-1.0,1,0,0
201822,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-0.806547,-0.826607,-1.0,-1.0,0,1,0
201823,-1.0,-0.949499,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-0.934207,-0.864672,-1.0,-1.0,1,0,0
201824,-1.0,-0.266053,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-0.98659,-0.400142,-1.0,-1.0,0,1,0
201825,-1.0,0.01053,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-0.835434,-1.0,-1.0,-1.0,0,0,1


### Prepare data for time series supervised learning (function to create sliding window)

In [17]:
# prepare data for time series

# convert series to supervised learning
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [18]:
# length of window
days = 10

# frame as supervised learning
train = series_to_supervised(train_df, n_in=days)
test = series_to_supervised(test_df, n_in=days)

DataFrame(train)

Unnamed: 0,var1(t-10),var2(t-10),var3(t-10),var4(t-10),var5(t-10),var6(t-10),var7(t-10),var8(t-10),var9(t-10),var10(t-10),...,var1018(t),var1019(t),var1020(t),var1021(t),var1022(t),var1023(t),var1024(t),var1025(t),var1026(t),var1027(t)
201611,-1.0,0.325929,-1.0,-1.0,-1.000000,-1.0,-1.000000,-1.0,-1.000000,-1.000000,...,-1.0,-1.0,-1.0,-1.000000,0.620904,-1.0,-1.0,1,0,0
201612,-1.0,0.325929,-1.0,-1.0,-1.000000,-1.0,-1.000000,-1.0,-1.000000,-1.000000,...,-1.0,-1.0,-1.0,-1.000000,0.620904,-1.0,-1.0,0,0,1
201613,-1.0,-0.660789,-1.0,-1.0,-1.000000,-1.0,-1.000000,-1.0,-1.000000,-1.000000,...,-1.0,-1.0,-1.0,-1.000000,0.620904,-1.0,-1.0,0,1,0
201614,-1.0,0.277669,-1.0,-1.0,-1.000000,-1.0,-1.000000,-1.0,-1.000000,-1.000000,...,-1.0,-1.0,-1.0,-1.000000,0.620904,-1.0,-1.0,0,0,1
201615,-1.0,-1.000000,-1.0,-1.0,-0.180936,-1.0,0.297773,-1.0,0.195502,0.466559,...,-1.0,-1.0,-1.0,-0.578439,-1.000000,-1.0,-1.0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201816,-1.0,0.479237,-1.0,-1.0,-1.000000,-1.0,-1.000000,-1.0,-1.000000,-1.000000,...,-1.0,-1.0,-1.0,-0.995322,-0.673927,-1.0,-1.0,1,0,0
201817,-1.0,1.000000,-1.0,-1.0,-1.000000,-1.0,-1.000000,-1.0,-1.000000,-1.000000,...,-1.0,-1.0,-1.0,-0.668477,-1.000000,-1.0,-1.0,0,1,0
201818,-1.0,0.312099,-1.0,-1.0,-1.000000,-1.0,-1.000000,-1.0,-1.000000,-1.000000,...,-1.0,-1.0,-1.0,-0.808112,-0.683993,-1.0,-1.0,1,0,0
201819,-1.0,-1.000000,-1.0,-1.0,-0.882780,-1.0,-0.011168,-1.0,-0.059641,-0.009608,...,-1.0,-1.0,-1.0,0.272854,-0.963339,-1.0,-1.0,1,0,0


### Features and Labels Set

In [19]:
def features_labels_set(timeseries_data, original_df, n_labels):
    
    """ Features """
    # We define the number of features as (features and labels)
    n_features = original_df.shape[1]

    # The features to train the model will be all except the values of the actual week 
    # We can't use other variables in week t because whe need to resample a a 3D Array
    features_set = DataFrame(timeseries_data.values[:,:-n_features])
    # Convert pandas data frame to np.array to reshape as 3D Array
    features_set = features_set.to_numpy()
    print(f'The shape of the features is {features_set.shape}')
    
    """ Labels """
    # We will use labels in last week 
    labels_set = DataFrame(timeseries_data.values[:,-n_labels:])
    # Convert pandas data frame to np.array
    labels_set = labels_set.to_numpy()
    print(f'The shape of the labels is {labels_set.shape}')
    
    return features_set, labels_set, n_features

In [20]:
# Train features and labels set
print('Train:')
train_X, train_y, n_features = features_labels_set(timeseries_data=train, original_df=dengue_df, n_labels=n_labels)

# Test features and labels set
print('Test:')
test_X, test_y, n_features = features_labels_set(timeseries_data=test, original_df=dengue_df, n_labels=n_labels)

Train:
The shape of the features is (114, 10270)
The shape of the labels is (114, 3)
Test:
The shape of the features is (22, 10270)
The shape of the labels is (22, 3)


# Modeling

In [21]:
def reshape_tensor(train_X, test_X, n_features):
    print('The initial shapes are:')
    print(f'The train shape is {train_X.shape}')
    print(f'The test shape is {test_X.shape}')
    
    # reshape input to be 3D [samples, timesteps, features]
    train_X = train_X.reshape((train_X.shape[0], days, n_features))
    test_X = test_X.reshape((test_X.shape[0], days, n_features))
    
    print('-----------------------')
    print('The Final shapes are:')
    print(f'The train shape is {train_X.shape}')
    print(f'The test shape is {test_X.shape}')
    
    return train_X, test_X

In [22]:
# reshape input to be 3D [samples, timesteps, features]
train_X, test_X = reshape_tensor(train_X, test_X, n_features)

The initial shapes are:
The train shape is (114, 10270)
The test shape is (22, 10270)
-----------------------
The Final shapes are:
The train shape is (114, 10, 1027)
The test shape is (22, 10, 1027)


# Define the Model

In [23]:
# Set Seed
#tf.random.set_seed(0)

def create_model():
    # design network
    model = Sequential()
    model.add(LSTM(120, dropout=0.1, input_shape=(train_X.shape[1], train_X.shape[2]), return_sequences=True))
    model.add(LSTM(240, dropout=0.1, input_shape=(train_X.shape[1], 120)))
    model.add(Dense(60))
    model.add(Dense(3, activation='softmax'))
    
    # Compile the model:
    opt = keras.optimizers.Adam()
    metrics = [
        tf.keras.metrics.AUC(name='auc', multi_label=False), #num_labels=3),
        tf.keras.metrics.CategoricalAccuracy(name='acc'),
        tf.keras.metrics.CategoricalCrossentropy(name='entropy')
    ]
    
    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=metrics)

    return model

### Train the model

In [24]:
from tensorflow.keras.callbacks import EarlyStopping

# EarlyStopping:
monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=20, 
        verbose=1, mode='auto', restore_best_weights=True)

In [25]:
### Imbalanced data
n_zeros = (labels_df_orig.to_numpy() == 0).sum()
n_ones = (labels_df_orig.to_numpy() == 1).sum()
n_twos = (labels_df_orig.to_numpy() == 2).sum()
n_total = n_zeros + n_ones + n_twos

weights = {0: n_total/n_zeros, 1: n_total/n_ones, 2: n_total/n_twos}
print(f'zeros: {n_zeros}, ones: {n_ones}, twos: {n_twos}, total: {n_total}')
weights

zeros: 104, ones: 23, twos: 29, total: 156


{0: 1.5, 1: 6.782608695652174, 2: 5.379310344827586}

In [26]:
# fit network
def train_model(model, monitor, weights, plot=None, epochs=20):
    if monitor and weights:
        history = model.fit(train_X, train_y, epochs=epochs, batch_size=16, validation_data=(test_X, test_y), verbose=2, shuffle=False, callbacks=[monitor], class_weight=weights)
    elif monitor:
        history = model.fit(train_X, train_y, epochs=epochs, batch_size=16, validation_data=(test_X, test_y), verbose=2, shuffle=False, callbacks=[monitor])
    elif weights:
        history = model.fit(train_X, train_y, epochs=epochs, batch_size=16, validation_data=(test_X, test_y), verbose=2, shuffle=False, class_weight=weights)
    else:
        history = model.fit(train_X, train_y, epochs=epochs, batch_size=16, validation_data=(test_X, test_y), verbose=2, shuffle=False)
    
    if plot:
        # plot history
        plt.plot(history.history['loss'], label='train')
        plt.plot(history.history['val_loss'], label='validation')
        plt.legend()
        plt.show()
        

# Test the model

# AUC

In [27]:
# You can also evaluate or predict on a dataset.
def evaluate(model, verbose = None):
    if verbose:
        print('Evaluate: ')
    result = model.evaluate(test_X, test_y)
    stored_results = {}
    for i, metric in enumerate(model.metrics_names):
        stored_results[metric] = result[i]
        if verbose:
            print(f'{metric}: {result[i]}')
    return stored_results

# Calculate Mean and SD

In [28]:
def calculate_mean_std(weights):
    
    metrics = {
        "auc": [],
        "acc": [],
        "entropy": []
    }
    
    for i in range(5):
        model = create_model()
        train_model(model=model, monitor=monitor, weights=weights)
        stored_results = evaluate(model=model)
        
        for key in metrics.keys():
            metrics[key].append(stored_results[key])
            
    for key in metrics.keys():
        results = metrics[key]
        print(key, f": average={np.average(results):.3f}, std={np.std(results):.3f}")


In [29]:
calculate_mean_std(weights=None)

Epoch 1/20
8/8 - 3s - loss: 1.0155 - auc: 0.7452 - acc: 0.5965 - entropy: 1.0155 - val_loss: 0.7166 - val_auc: 0.8678 - val_acc: 0.8182 - val_entropy: 0.7166
Epoch 2/20
8/8 - 0s - loss: 1.2054 - auc: 0.6686 - acc: 0.6667 - entropy: 1.2054 - val_loss: 0.7180 - val_auc: 0.8626 - val_acc: 0.8182 - val_entropy: 0.7180
Epoch 3/20
8/8 - 0s - loss: 0.8606 - auc: 0.7864 - acc: 0.6667 - entropy: 0.8606 - val_loss: 0.6270 - val_auc: 0.8580 - val_acc: 0.8182 - val_entropy: 0.6270
Epoch 4/20
8/8 - 0s - loss: 0.8965 - auc: 0.7349 - acc: 0.6667 - entropy: 0.8965 - val_loss: 0.6172 - val_auc: 0.8574 - val_acc: 0.8182 - val_entropy: 0.6172
Epoch 5/20
8/8 - 0s - loss: 0.8942 - auc: 0.6789 - acc: 0.6667 - entropy: 0.8942 - val_loss: 0.6357 - val_auc: 0.8853 - val_acc: 0.8182 - val_entropy: 0.6357
Epoch 6/20
8/8 - 0s - loss: 0.8762 - auc: 0.7079 - acc: 0.6667 - entropy: 0.8762 - val_loss: 0.6309 - val_auc: 0.8926 - val_acc: 0.8182 - val_entropy: 0.6309
Epoch 7/20
8/8 - 0s - loss: 0.8705 - auc: 0.7509 - a

In [30]:
calculate_mean_std(weights=weights)

Epoch 1/20
8/8 - 2s - loss: 4.0411 - auc: 0.5148 - acc: 0.3684 - entropy: 1.5389 - val_loss: 0.6097 - val_auc: 0.8755 - val_acc: 0.8182 - val_entropy: 0.6097
Epoch 2/20
8/8 - 0s - loss: 4.7786 - auc: 0.4866 - acc: 0.2281 - entropy: 1.3298 - val_loss: 1.3445 - val_auc: 0.1544 - val_acc: 0.0909 - val_entropy: 1.3445
Epoch 3/20
8/8 - 0s - loss: 3.2772 - auc: 0.3429 - acc: 0.1754 - entropy: 1.1896 - val_loss: 1.0117 - val_auc: 0.5067 - val_acc: 0.0909 - val_entropy: 1.0117
Epoch 4/20
8/8 - 0s - loss: 3.3584 - auc: 0.7278 - acc: 0.6491 - entropy: 1.0380 - val_loss: 0.9382 - val_auc: 0.8688 - val_acc: 0.8182 - val_entropy: 0.9382
Epoch 5/20
8/8 - 0s - loss: 3.3807 - auc: 0.7142 - acc: 0.6579 - entropy: 1.0369 - val_loss: 0.9925 - val_auc: 0.8724 - val_acc: 0.8182 - val_entropy: 0.9925
Epoch 6/20
8/8 - 0s - loss: 3.3297 - auc: 0.7332 - acc: 0.6667 - entropy: 1.0530 - val_loss: 1.0104 - val_auc: 0.8678 - val_acc: 0.8182 - val_entropy: 1.0104
Epoch 7/20
8/8 - 0s - loss: 3.3119 - auc: 0.7264 - a