# Train IPC prediction NN using genetic algorithm

Investigate the effectiveness of the genetic algorithm for navigating the NN "Back Propagation" in the task of macroeconomic forecasting.

Five indicators of the Ukrainian economy are presented in the form of time series. The data is gathered during the period from 01/01/1995 to 12/01/1997. IPC is selected as predicted value.

In [1]:
import numpy as np
import pandas as pd

from lib.ml.layer.layer_def import Dense, Input
from lib.ml.loss.loss_function import MEAN_SQUARED_ERROR
from lib.ml.model.seq_model import SeqNet
from lib.ml.optimizer.genetic_optimizer import GeneticAlgorithmNeuralNetOptimizer
from lib.ml.util.progress_tracker import PrintProgressTracker
from src.data.economic.process_raw_economic_dataset import (
    ECONOMIC_DATASET_FILENAME,
    process_economic_raw_dataset,
)
from src.definitions import RAW_DATA_FOLDER

## Part 1: Train NN on all features

1. Load dataset.

In [2]:

dataset = pd.read_csv(RAW_DATA_FOLDER / ECONOMIC_DATASET_FILENAME, sep="\t")
dataset.head(4)

Unnamed: 0,M0,M2,IOC,IPC,KVVE
0,4.7,2.0,29.2,21.2,2.1
1,15.4,13.2,11.4,18.1,13.4
2,18.7,8.0,9.3,11.4,10.1
3,29.9,7.9,5.1,5.8,11.9


2. Prepare dataset.

In [3]:

processed_dataset = process_economic_raw_dataset(dataset)
processed_dataset.head(4)

Unnamed: 0,M0(-7),M2(-7),IOC(0),IPC(0),KVVE(-7),IPC(+1)
7,4.7,2.0,6.5,4.6,2.1,14.2
8,15.4,13.2,9.9,14.2,13.4,9.1
9,18.7,8.0,8.4,9.1,10.1,6.2
10,29.9,7.9,4.2,6.2,11.9,4.6


3. Define neural network with 5 inputs, 10 units in a hidden layer and 1 output. For optimization using genetic algorithm with population of 40 and 320 mutation ratio.

In [4]:
model = SeqNet(layers=[Input(5), Dense(10), Dense(1)])
opt = GeneticAlgorithmNeuralNetOptimizer(
    population_size=40, mutation_rate=320, alpha=0.5
)

compiled_model = model.compile(
    optimizer=opt,
    loss=MEAN_SQUARED_ERROR,
    progress_tracker=PrintProgressTracker(100),
)

4. Train neural network for 1000 epochs.

In [5]:

x = processed_dataset.iloc[:, :-1].to_numpy().T
y = processed_dataset.iloc[:, -1].to_numpy()

trained_model = compiled_model.fit(x, y, 1000)

print("MRE = " + str(trained_model.metrics().accuracy))

Iteration # [ 0 ] cost is: 9732.657835764667
Iteration # [ 100 ] cost is: 6.636644618772129
Iteration # [ 200 ] cost is: 6.6337675889592225
Iteration # [ 300 ] cost is: 6.6337675889592225
Iteration # [ 400 ] cost is: 6.6337675889592225
Iteration # [ 500 ] cost is: 6.6337675889592225
Iteration # [ 600 ] cost is: 6.6337675889592225
Iteration # [ 700 ] cost is: 6.6337675889592225
Iteration # [ 800 ] cost is: 6.6337675889592225
Iteration # [ 900 ] cost is: 6.6337675889592225
MRE = 6.6337675889592225


5. Check result and build report.

In [6]:
y_predicted = trained_model.predict(x).reshape((-1))
y_true = y.reshape((-1))
delta = y_true - y_predicted

report = pd.DataFrame(
    {
        "IPC RO": y_true,
        "IPC PO": y_predicted,
        "DELTA": delta,
        "DELTA^2": np.square(delta),
    }
)

display(report)

print("Delta total = " + str(report["DELTA"].sum()))
print("Delta^2 total = " + str(report["DELTA^2"].sum()))

Unnamed: 0,IPC RO,IPC PO,DELTA,DELTA^2
0,14.2,7.916075,6.283925,39.48771
1,9.1,11.918169,-2.818169,7.942076
2,6.2,9.499791,-3.299791,10.888623
3,4.6,3.934887,0.665113,0.442376
4,9.4,6.212444,3.187556,10.160514
5,7.4,4.92272,2.47728,6.136916
6,3.0,4.822986,-1.822986,3.323279
7,2.4,3.625703,-1.225703,1.502348
8,0.7,2.467604,-1.767604,3.124423
9,0.1,1.830618,-1.730618,2.995038


## Part 2: Train NN on a subset of features (ІОС[0], IPC[0] and KVVE[-7])

1. Remove redundant columns from pre processed dataset.

In [11]:
reduced_dataset = processed_dataset.drop(columns=["M0(-7)", "M2(-7)"])

2. Create and train model.

In [12]:
reduces_model = SeqNet(layers=[Input(3), Dense(10), Dense(1)])

compiled_reduces_model = reduces_model.compile(
    optimizer=opt,
    loss=MEAN_SQUARED_ERROR,
    progress_tracker=PrintProgressTracker(100),
)

x = reduced_dataset.iloc[:, :-1].to_numpy().T
y = reduced_dataset.iloc[:, -1].to_numpy()

trained_reduces_model = compiled_reduces_model.fit(x, y, 1000)

print("MRE = " + str(trained_reduces_model.metrics().accuracy))

Iteration # [ 0 ] cost is: 1708.8985112430623
Iteration # [ 100 ] cost is: 7.823174314355579
Iteration # [ 200 ] cost is: 7.8012982755615345
Iteration # [ 300 ] cost is: 7.8012982755615345
Iteration # [ 400 ] cost is: 7.8012982755615345
Iteration # [ 500 ] cost is: 7.8012982755615345
Iteration # [ 600 ] cost is: 7.8012982755615345
Iteration # [ 700 ] cost is: 7.8012982755615345
Iteration # [ 800 ] cost is: 7.8012982755615345
Iteration # [ 900 ] cost is: 7.8012982755615345
MRE = 7.8012982755615345


3. Check result and build report.

In [14]:
y_predicted = trained_reduces_model.predict(x).reshape((-1))
y_true = y.reshape((-1))
delta = y_true - y_predicted

report = pd.DataFrame(
    {
        "IPC RO": y_true,
        "IPC PO": y_predicted,
        "DELTA": delta,
        "DELTA^2": np.square(delta),
    }
)

display(report)

print("Delta total = " + str(report["DELTA"].sum()))
print("Delta^2 total = " + str(report["DELTA^2"].sum()))

Unnamed: 0,IPC RO,IPC PO,DELTA,DELTA^2
0,14.2,8.178937,6.021063,36.253205
1,9.1,10.212034,-1.112034,1.236619
2,6.2,9.93064,-3.73064,13.917673
3,4.6,6.161125,-1.561125,2.437113
4,9.4,4.376315,5.023685,25.237408
5,7.4,4.69375,2.70625,7.323789
6,3.0,4.230248,-1.230248,1.513509
7,2.4,3.452436,-1.052436,1.107623
8,0.7,2.999922,-2.299922,5.289641
9,0.1,3.195977,-3.095977,9.585076


Delta sum = 0.16778585473940666
Delta^2 sum = 124.82077240898455
