# Train IPC prediction NN using genetic algorithm

Investigate the effectiveness of the genetic algorithm for navigating the NN "Back Propagation" in the task of macroeconomic forecasting.

Five indicators of the Ukrainian economy are presented in the form of time series. The data is gathered during the period from 01/01/1995 to 12/01/1997. IPC is selected as predicted value.

In [1]:
import numpy as np
import pandas as pd

from lib.ml.layer.layer_def import Dense, Input
from lib.ml.util.loss_function import MEAN_SQUARED_ERROR
from lib.ml.model.seq_model import SeqNet
from lib.ml.optimizer.genetic_optimizer import GeneticAlgorithmNeuralNetOptimizer
from lib.ml.util.progress_tracker import PrintProgressTracker
from src.data.economic.process_raw_economic_dataset import (
    ECONOMIC_DATASET_FILENAME,
    process_economic_raw_dataset,
)
from src.definitions import RAW_DATA_FOLDER

## Part 1: Train NN on all features

1. Load dataset.

In [2]:

dataset = pd.read_csv(RAW_DATA_FOLDER / ECONOMIC_DATASET_FILENAME, sep="\t")
dataset.head(4)

Unnamed: 0,M0,M2,IOC,IPC,KVVE
0,4.7,2.0,29.2,21.2,2.1
1,15.4,13.2,11.4,18.1,13.4
2,18.7,8.0,9.3,11.4,10.1
3,29.9,7.9,5.1,5.8,11.9


2. Prepare dataset.

In [3]:

processed_dataset = process_economic_raw_dataset(dataset)
processed_dataset.head(4)

Unnamed: 0,M0(-7),M2(-7),IOC(0),IPC(0),KVVE(-7),IPC(+1)
7,4.7,2.0,6.5,4.6,2.1,14.2
8,15.4,13.2,9.9,14.2,13.4,9.1
9,18.7,8.0,8.4,9.1,10.1,6.2
10,29.9,7.9,4.2,6.2,11.9,4.6


3. Define neural network with 5 inputs, 10 units in a hidden layer and 1 output. For optimization using genetic algorithm with population of 40 and 320 mutation ratio.

In [7]:
model = SeqNet(layers=[Input(5), Dense(10), Dense(1)])
opt = GeneticAlgorithmNeuralNetOptimizer(
    population_size=200, mutation_rate=0.5, mutation_decay=0.05
)

compiled_model = model.compile(
    optimizer=opt,
    loss=MEAN_SQUARED_ERROR,
    progress_tracker=PrintProgressTracker(100),
)

4. Train neural network for 1000 epochs.

In [8]:

x = processed_dataset.iloc[:, :-1].to_numpy().T
y = processed_dataset.iloc[:, -1].to_numpy()

trained_model = compiled_model.fit(x, y, 1000)

print("MRE = " + str(trained_model.metrics().accuracy))

Iteration # [ 0 ] cost is: 1546.1475267141705
Iteration # [ 100 ] cost is: 7.453231780524159
Iteration # [ 200 ] cost is: 6.895954547527106
Iteration # [ 300 ] cost is: 6.605532561037771
Iteration # [ 400 ] cost is: 6.571417673321669
Iteration # [ 500 ] cost is: 6.479779241891645
Iteration # [ 600 ] cost is: 6.469538453405551
Iteration # [ 700 ] cost is: 6.469538453398005
Iteration # [ 800 ] cost is: 6.469538453397992
Iteration # [ 900 ] cost is: 6.4695384533979885
MRE = 6.469538453397983


5. Check result and build report.

In [9]:
y_predicted = trained_model.predict(x).reshape((-1))
y_true = y.reshape((-1))
delta = y_true - y_predicted

report = pd.DataFrame(
    {
        "IPC RO": y_true,
        "IPC PO": y_predicted,
        "DELTA": delta,
        "DELTA^2": np.square(delta),
    }
)

display(report)

print("Delta total = " + str(report["DELTA"].sum()))
print("Delta^2 total = " + str(report["DELTA^2"].sum()))

Unnamed: 0,IPC RO,IPC PO,DELTA,DELTA^2
0,14.2,7.851271,6.348729,40.306358
1,9.1,11.357726,-2.257726,5.097327
2,6.2,9.18955,-2.98955,8.937411
3,4.6,3.855783,0.744217,0.553859
4,9.4,6.650974,2.749026,7.557146
5,7.4,4.947756,2.452244,6.013499
6,3.0,4.956296,-1.956296,3.827095
7,2.4,3.899016,-1.499016,2.24705
8,0.7,2.686803,-1.986803,3.947385
9,0.1,2.266038,-2.166038,4.691721


Delta total = 1.374602318748635
Delta^2 total = 103.51261525436773


## Part 2: Train NN on a subset of features (ІОС[0], IPC[0] and KVVE[-7])

1. Remove redundant columns from pre processed dataset.

In [10]:
reduced_dataset = processed_dataset.drop(columns=["M0(-7)", "M2(-7)"])

2. Create and train model.

In [11]:
reduces_model = SeqNet(layers=[Input(3), Dense(10), Dense(1)])

compiled_reduces_model = reduces_model.compile(
    optimizer=opt,
    loss=MEAN_SQUARED_ERROR,
    progress_tracker=PrintProgressTracker(100),
)

x = reduced_dataset.iloc[:, :-1].to_numpy().T
y = reduced_dataset.iloc[:, -1].to_numpy()

trained_reduces_model = compiled_reduces_model.fit(x, y, 1000)

print("MRE = " + str(trained_reduces_model.metrics().accuracy))

Iteration # [ 0 ] cost is: 65.98133638416924
Iteration # [ 100 ] cost is: 7.898262251919611
Iteration # [ 200 ] cost is: 7.7650790386228365
Iteration # [ 300 ] cost is: 7.761869918306806
Iteration # [ 400 ] cost is: 7.761838955549166
Iteration # [ 500 ] cost is: 7.761838948196414
Iteration # [ 600 ] cost is: 7.76183894813824
Iteration # [ 700 ] cost is: 7.7618389481379095
Iteration # [ 800 ] cost is: 7.761838948137906
Iteration # [ 900 ] cost is: 7.761838948137906
MRE = 7.761838948137904


3. Check result and build report.

In [12]:
y_predicted = trained_reduces_model.predict(x).reshape((-1))
y_true = y.reshape((-1))
delta = y_true - y_predicted

report = pd.DataFrame(
    {
        "IPC RO": y_true,
        "IPC PO": y_predicted,
        "DELTA": delta,
        "DELTA^2": np.square(delta),
    }
)

display(report)

print("Delta total = " + str(report["DELTA"].sum()))
print("Delta^2 total = " + str(report["DELTA^2"].sum()))

Unnamed: 0,IPC RO,IPC PO,DELTA,DELTA^2
0,14.2,8.246639,5.953361,35.442503
1,9.1,10.470804,-1.370804,1.879103
2,6.2,9.928966,-3.728966,13.905186
3,4.6,5.930831,-1.330831,1.771112
4,9.4,4.207047,5.192953,26.966765
5,7.4,4.501986,2.898014,8.398485
6,3.0,4.097636,-1.097636,1.204804
7,2.4,3.404771,-1.004771,1.009566
8,0.7,2.97956,-2.27956,5.196395
9,0.1,2.904507,-2.804507,7.865257


Delta total = 0.49873614522929266
Delta^2 total = 124.18942317020647
