#### 1. Import the necessary modules

---





In [None]:
!pip install deap

In [None]:
!pip install bitstring

In [3]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split as split
from keras.layers import LSTM, Input, Dense
from keras.models import Model
from deap import base, creator, tools, algorithms
from scipy.stats import bernoulli
from bitstring import BitArray
np.random.seed(1120)

#### 2. Import time series data

In [None]:
df = pd.read_csv('/content/sample_data/desempleo.csv', sep=';')
print(df.head())

In [None]:
cols = list(df)[2:6]
print(cols)
df_train = df[cols]

scaler = StandardScaler()
scaler.fit(df_train)
df_train_scaled = scaler.transform(df_train)

train_data = df_train_scaled[0:194]
test_data = df_train_scaled[194:]

#### 3. Prepare the data set according to the window size (windows_size) chosen

In [14]:
def prepare_dataset(df_train_scaled, window_size):
  X_train = []
  Y_train = []
  n_future = 1
  number_of_features = 4

  for i in range(window_size, len(df_train_scaled)-n_future+1):
    X = X_train.append(df_train_scaled[i-window_size:i,0:number_of_features])
    Y = Y_train.append(df_train_scaled[i+n_future-1:i+n_future,3])

  X, Y = np.array(X_train), np.array(Y_train)
  return X, Y

####4. The train_evaluate function creates the LSTM network for a given individual and returns the accuracy of the training.


In [62]:
def train_evaluate(ga_individual_solution):
    window_size_bits = BitArray(ga_individual_solution[0:5])
    num_units_bits = BitArray(ga_individual_solution[5:])
    window_size = window_size_bits.uint
    num_units = num_units_bits.uint
    print('\nWindow Size: ', window_size, ', Num of Units: ', num_units)
    print (ga_individual_solution)
    if window_size == 0 or num_units == 0:
        return 100,
    X,Y = prepare_dataset(train_data,window_size)
    X_train, X_val, y_train, y_val = split(X, Y, test_size = 0.10, random_state = 1120)
    inputs = Input(shape=(window_size,4))
    x = LSTM(num_units, input_shape=(window_size,4))(inputs)
    predictions = Dense(1, activation='linear')(x)
    model = Model(inputs=inputs, outputs=predictions)
    print(model.summary())
    model.compile(optimizer='adam',loss='mean_squared_error')
    model.fit(X_train, y_train, epochs=5, batch_size=10, shuffle=True)
    y_pred = model.predict(X_val)
    rmse = np.sqrt(mean_squared_error(y_val, y_pred))
    print('Validation RMSE: ', rmse,'\n')
    return rmse,

####5. Use the DEAP tool to define the individual (since the chromosome is represented by 9 bits, the Bernoulli distribution is used). Create the population, use ordered mating, use mutShuffleIndexes mutation, and use spinner selection for parental selection

In [None]:
population_size = 4
num_generations = 10
gene_length = 9
creator.create('FitnessMax', base.Fitness, weights = (-1.0,))
creator.create('Individual', list , fitness = creator.FitnessMax)
toolbox = base.Toolbox()
toolbox.register('binary', bernoulli.rvs, 0.5)
toolbox.register('individual', tools.initRepeat, creator.Individual, toolbox.binary, n = gene_length)
toolbox.register('population', tools.initRepeat, list, toolbox.individual)
toolbox.register('mate', tools.cxOrdered)
toolbox.register('mutate', tools.mutShuffleIndexes, indpb = 0.6)
toolbox.register('select', tools.selRoulette)
toolbox.register('evaluate', train_evaluate)
population = toolbox.population(n = population_size)
r = algorithms.eaSimple(population, toolbox, cxpb = 0.6, mutpb = 0.4, ngen = num_generations, verbose = False)

#### 6. Get the best solution

In [None]:
best_individuals = tools.selBest(population,k = 1)
best_window_size = None
best_num_units = None

for bi in best_individuals:
    window_size_bits = BitArray(bi[0:5])
    num_units_bits = BitArray(bi[5:]) 
    best_window_size = window_size_bits.uint
    best_num_units = num_units_bits.uint
    print (best_individuals, '\n')
    print('\nWindow Size: ', best_window_size, ', Num of Units: ', best_num_units)
    print ('\n')

#### 7. Implement the best solution

In [None]:
X_train, y_train = prepare_dataset(train_data,best_window_size)
X_test, y_test = prepare_dataset(test_data,best_window_size)
inputs = Input(shape=(best_window_size,4))
x = LSTM(best_num_units, input_shape=(best_window_size,4))(inputs)
predictions = Dense(1, activation='linear')(x)
model = Model(inputs = inputs, outputs = predictions)
print(model.summary())
model.compile(optimizer='adam',loss='mean_squared_error')
history=model.fit(X_train, y_train, epochs=5, batch_size=10, validation_split=0.1, verbose=1, shuffle=True)
y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print('Test RMSE: ', rmse)

In [None]:
plt.plot(history.history['loss'], label='Traininig loss')
plt.plot(history.history['val_loss'], label='Validation loss')
plt.legend()
plt.title('Training and validation loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')

#### 8. Graph the solution

In [None]:
y_test_copies = np.repeat(y_test, df_train.shape[1], axis=-1) 
y_test = scaler.inverse_transform(y_test_copies)[:,3]
y_pred_copies = np.repeat(y_pred, df_train.shape[1], axis=-1)
y_pred = scaler.inverse_transform(y_pred_copies)[:,3]
y_pred = np.round(y_pred,2)

train_dates = pd.to_datetime(df['date'])
lim_batch=len(y_pred)
fin=len(train_dates)
inicio = (fin - lim_batch)
lista = []

for j in range(inicio,fin,1):
  r=train_dates[j].strftime('%Y-%m')
  lista.append(r)

mapped = range(len(lista))
plt.figure(figsize=(15,6))
plt.plot(y_test, label="Real values")
plt.plot(y_pred, label="Predicted values")
plt.xticks(mapped, lista)
lim_final = len(lista)
plt.xlim(0, lim_final)
max1 = np.round(max (y_test),0)
max2 = np.round(max (y_pred),0)

if (max1 >= max2):
  limy = max1 + 1
else:
  limy = max2 + 1
  
plt.ylim(0, limy)
plt.title("Prediction of the unemployment rate in Ecuador")
plt.xlabel("Months")
plt.ylabel("Unemployment rate (%)")
plt.legend()
plt.show()