### Optimizing RNN using Genetic Algorithm 

#### Importing required packages

In [2]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split as split

from keras.layers import LSTM, Input, Dense
from keras.models import Model

from deap import base, creator, tools, algorithms
from scipy.stats import bernoulli
from bitstring import BitArray
from keras.models import Sequential
from keras.layers import Dense,LSTM
import matplotlib.pyplot as plt

np.random.seed(1120)

In [3]:
df_data = pd.read_csv('train.csv',index_col=0)

In [4]:
df_data = df_data[['wp7','wp6','wp5','wp4','wp3','wp2','wp1']]

In [5]:
df_data

Unnamed: 0_level_0,wp7,wp6,wp5,wp4,wp3,wp2,wp1
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2009070100,0.051,0.118,0.056,0.105,0.494,0.233,0.045
2009070101,0.051,0.066,0.066,0.105,0.257,0.249,0.085
2009070102,0.000,0.026,0.015,0.033,0.178,0.175,0.020
2009070103,0.000,0.013,0.010,0.022,0.109,0.085,0.060
2009070104,0.000,0.000,0.010,0.039,0.079,0.032,0.045
...,...,...,...,...,...,...,...
2012062608,0.101,0.132,0.339,0.149,0.119,0.138,0.170
2012062609,0.076,0.132,0.359,0.193,0.208,0.106,0.211
2012062610,0.076,0.132,0.329,0.132,0.247,0.090,0.251
2012062611,0.076,0.105,0.293,0.061,0.356,0.058,0.301


#### convert time series data to supervised format

In [6]:
def to_supervised(data,dropNa = True,lag = 1):
    df = pd.DataFrame(data)
    column = []
    column.append(df)
    for i in range(1,lag+1):
        column.append(df.shift(-i))
    df = pd.concat(column,axis=1)
    df.dropna(inplace = True)
    features = data.shape[1]
    df = df.values
    supervised_data = df[:,:features*lag]
    supervised_data = np.column_stack( [supervised_data, df[:,features*lag]])
    return supervised_data

In [7]:
timeSteps = 1 #Y shift parameter

supervised = to_supervised(df_data,lag=timeSteps)
pd.DataFrame(supervised).head()

Unnamed: 0,0,1,2,3,4,5,6,7
0,0.051,0.118,0.056,0.105,0.494,0.233,0.045,0.051
1,0.051,0.066,0.066,0.105,0.257,0.249,0.085,0.0
2,0.0,0.026,0.015,0.033,0.178,0.175,0.02,0.0
3,0.0,0.013,0.01,0.022,0.109,0.085,0.06,0.0
4,0.0,0.0,0.01,0.039,0.079,0.032,0.045,0.0


##### Train/Test split and reshape data for LSTM

In [8]:
features = df_data.shape[1]
train_days = 17257 # trainning/test split
X = supervised[:,:features*timeSteps]
y = supervised[:,features*timeSteps]

x_train = X[:train_days,:]
x_test = X[train_days:,:]
y_train = y[:train_days]
y_test = y[train_days:]

print(x_train.shape,x_test.shape,y_train.shape,y_test.shape)

(17257, 7) (1499, 7) (17257,) (1499,)


In [9]:
x_train = x_train.reshape(x_train.shape[0], timeSteps, features)
x_test = x_test.reshape(x_test.shape[0], timeSteps, features)

print(x_train.shape,x_test.shape)

(17257, 1, 7) (1499, 1, 7)


#### Define model and evaluate function

In [9]:
def train_evaluate(ga_individual_solution):   
    # Decode GA solution to integer for window_size and num_units
    #window_size_bits = BitArray(ga_individual_solution[0:6])
    num_units_bits = BitArray(ga_individual_solution[0:4]) 
    epochs_bits = BitArray(ga_individual_solution[2:4]) 
    batch_bits = BitArray(ga_individual_solution[0:4]) 
    
    #window_size = window_size_bits.uint
    num_units = num_units_bits.uint
    epochs = epochs_bits.uint
    batch_size = batch_bits.uint

    print('\epochs: ', epochs, ', Num of Units: ', num_units,' ,batch size: ', batch_size)
    
    # Return fitness score of 100 if window_size or num_unit is zero
#     if epochs == 0 or num_units == 0 or batch_size == 0:
#         return 100, 
    
    #Train LSTM model and predict on validation set
    model = Sequential()
    model.add(LSTM(num_units, input_shape = ( timeSteps,x_train.shape[2])))
    model.add(Dense(1))
    model.compile(loss = "mae", optimizer = "adam")
    model.fit(x_train,y_train, validation_data = (x_test,y_test), epochs = epochs , batch_size = batch_size, verbose = 1, shuffle = False)
    y_pred = model.predict(x_test)
    x_test_val = x_test.reshape(x_test.shape[0],x_test.shape[2]*x_test.shape[1])
    inv_new = np.concatenate( (y_pred, x_test_val[:,-6:] ) , axis =1)
    final_pred = inv_new[:,0]
    y_test_val = y_test.reshape( len(y_test), 1)
    inv_new = np.concatenate( (y_test_val, x_test_val[:,-6:] ) ,axis = 1)
    actual_pred = inv_new[:,0]

    mae = mean_squared_error(final_pred,actual_pred)
    print('Validation MAE: ', mae,'\n')
    
    return mae,

#### GA parameters setting from DEAP package

In [10]:
population_size = 4
num_generations = 4
gene_length = 10

# As we are trying to minimize the RMSE score, that's why using -1.0. 
# In case, when you want to maximize accuracy for instance, use 1.0
creator.create('FitnessMax', base.Fitness, weights = (-1.0,))
creator.create('Individual', list , fitness = creator.FitnessMax)

toolbox = base.Toolbox()
toolbox.register('binary', bernoulli.rvs, 0.5)
toolbox.register('individual', tools.initRepeat, creator.Individual, toolbox.binary, n = gene_length)
toolbox.register('population', tools.initRepeat, list , toolbox.individual)

toolbox.register('mate', tools.cxOrdered)
toolbox.register('mutate', tools.mutShuffleIndexes, indpb = 0.6)
toolbox.register('select', tools.selRoulette)
toolbox.register('evaluate', train_evaluate)

population = toolbox.population(n = population_size)
r = algorithms.eaSimple(population, toolbox, cxpb = 0.3, mutpb = 0.2, ngen = num_generations, verbose = False)

\epochs:  1 , Num of Units:  9  ,batch size:  9
Validation MAE:  0.012811239957227083 

\epochs:  2 , Num of Units:  14  ,batch size:  14
Epoch 1/2
Epoch 2/2
Validation MAE:  0.012683266412281267 

\epochs:  3 , Num of Units:  15  ,batch size:  15
Epoch 1/3
Epoch 2/3
Epoch 3/3
Validation MAE:  0.013007623985819013 

\epochs:  0 , Num of Units:  12  ,batch size:  12
Validation MAE:  0.24108955835496987 

\epochs:  0 , Num of Units:  12  ,batch size:  12
Validation MAE:  0.36311542648584855 

\epochs:  0 , Num of Units:  12  ,batch size:  12
Validation MAE:  0.21854096296301037 

\epochs:  0 , Num of Units:  12  ,batch size:  12
Validation MAE:  0.3034914051009952 

\epochs:  0 , Num of Units:  12  ,batch size:  12
Validation MAE:  0.34882705393755453 

\epochs:  0 , Num of Units:  4  ,batch size:  4
Validation MAE:  0.46917014072922336 

\epochs:  1 , Num of Units:  13  ,batch size:  13
Validation MAE:  0.013103195789408346 

\epochs:  3 , Num of Units:  15  ,batch size:  15
Epoch 1/3
E

#### Get best paramters from GA-LSTM

In [11]:
best_individuals = tools.selBest(population,k = 1)
best_epochs = None
best_num_units = None
best_batch_size = None

for bi in best_individuals:
    num_units_bits = BitArray(bi[0:4]) 
    epochs_bits = BitArray(bi[4:]) 
    batch_bits = BitArray(bi[0:4]) 
    
    best_epochs = epochs_bits.uint
    best_num_units = num_units_bits.uint
    best_batch_size = batch_bits.uint
    
    print('Best Parameters - epochs: ', best_epochs, ', Num of Units: ', best_num_units,' ,batch size: ', best_batch_size)

Best Parameters - epochs:  25 , Num of Units:  15  ,batch size:  15
