# Notebook for model selection in lab 6

Tested: MLP with 1,2,3 hidden layers and activations: sigmoid, tanh, relu, linear

In [37]:
import MultiLayerPerceptron as mlp
import pandas as pd

## Dataset: multimodal-large

In [38]:
df_train = pd.read_csv('data/regression/multimodal-large-test.csv')
df_test = pd.read_csv('data/regression/multimodal-large-training.csv')
print(df_train.head())

x_train = [[x] for x in df_train.loc[:,"x"]]
y_train = [[y] for y in df_train.loc[:,"y"]]
x_test = [[x] for x in df_test.loc[:,"x"]]
y_test = [[y] for y in df_test.loc[:,"y"]]

          x           y
0  0.493292  -98.208166
1 -0.470203  -55.283891
2  1.869983  100.299997
3 -1.040446    2.720629
4 -0.616507  -75.991636


## Model training

In [39]:
RELU = mlp.ActivationReLU()
SIGMOID = mlp.ActivationSigmoid()
TANH = mlp.ActivationTanh()
LINEAR = mlp.ActivationLinear()

# to be tested:
activations = [RELU, SIGMOID, TANH, LINEAR]
hidden_layers = [1,2,3]
hidden_neurons_total = 120
epochs = 200

In [41]:
results = []
def train_net(activation, hidden, lr):
    net = mlp.NeuralNetwork()
    net.add(mlp.Layer(1)) # only 1 input value
    neurons_per_hidden = int(hidden_neurons_total / hidden)
    for i in range(hidden):
        net.add(mlp.Layer(neurons_per_hidden, activation))
    net.add(mlp.Layer(1,add_bias=False)) # only 1 output value
    net.train(x_train, y_train,x_test,y_test, epochs=200, learning_rate=lr, batch_size=1)
    hist = net.get_training_history()
    return [[str(activation), hidden,lr, hist['loss_train'][-1], hist['loss_test'][-1]]]

#### ReLu

In [44]:
res1 = train_net(RELU, 1, 0.0003)

Epoch:    1/200,  MSE loss train: 3197.281,  test: 3161.066
Epoch:   21/200,  MSE loss train: 1711.915,  test: 1691.843
Epoch:   41/200,  MSE loss train: 1635.916,  test: 1601.014
Epoch:   61/200,  MSE loss train: 1624.463,  test: 1587.807
Epoch:   81/200,  MSE loss train:  1619.69,  test:  1582.38
Epoch:  101/200,  MSE loss train: 1617.251,  test: 1579.957
Epoch:  121/200,  MSE loss train: 1615.511,  test: 1578.464
Epoch:  141/200,  MSE loss train: 1612.151,  test:  1575.54
Epoch:  161/200,  MSE loss train: 1608.605,  test: 1572.133
Epoch:  181/200,  MSE loss train: 1605.546,  test: 1569.075
Epoch:  200/200,  MSE loss train: 1602.898,  test: 1566.403


In [45]:
res1

[['relu', 1, 0.0003, 1602.89814861013, 1566.403175085824]]

In [53]:
res2 = train_net(RELU, 2, 0.0003)

Epoch:    1/200,  MSE loss train: 2409.433,  test: 2380.421
Epoch:   21/200,  MSE loss train:  527.541,  test:  549.613
Epoch:   41/200,  MSE loss train:  258.209,  test:  266.184
Epoch:   61/200,  MSE loss train:   216.32,  test:  223.703
Epoch:   81/200,  MSE loss train:  218.044,  test:   230.15
Epoch:  101/200,  MSE loss train:  152.827,  test:  153.564
Epoch:  121/200,  MSE loss train:  146.183,  test:  146.152
Epoch:  141/200,  MSE loss train:  174.895,  test:  177.554
Epoch:  161/200,  MSE loss train:  171.835,  test:  172.955
Epoch:  181/200,  MSE loss train:  175.183,  test:  175.699
Epoch:  200/200,  MSE loss train:  144.594,  test:  143.694


In [54]:
res2

[['relu', 2, 0.0003, 144.59368000950653, 143.6938414980373]]

In [51]:
res3 = train_net(RELU, 3, 0.0001)

Epoch:    1/200,  MSE loss train: 3269.361,  test: 3204.873
Epoch:   21/200,  MSE loss train:  326.488,  test:  331.381
Epoch:   41/200,  MSE loss train:   122.84,  test:  124.515
Epoch:   61/200,  MSE loss train:   54.082,  test:   70.508
Epoch:   81/200,  MSE loss train:   73.761,  test:   81.681
Epoch:  101/200,  MSE loss train:   21.031,  test:   28.074
Epoch:  121/200,  MSE loss train:    5.801,  test:   11.397
Epoch:  141/200,  MSE loss train:   10.369,  test:   15.194
Epoch:  161/200,  MSE loss train:   13.163,  test:   18.045
Epoch:  181/200,  MSE loss train:    8.175,  test:   12.774
Epoch:  200/200,  MSE loss train:    7.607,  test:   12.496


In [52]:
res3

[['relu', 3, 0.0001, 7.606620362241119, 12.495676041805275]]

#### Linear

In [55]:
res4 = train_net(LINEAR, 1, 0.0001)

Epoch:    1/200,  MSE loss train: 4471.382,  test: 4437.484
Epoch:   21/200,  MSE loss train:  4455.72,  test: 4420.794
Epoch:   41/200,  MSE loss train: 4455.477,  test: 4420.318
Epoch:   61/200,  MSE loss train: 4455.821,  test: 4420.575
Epoch:   81/200,  MSE loss train:   4456.3,  test: 4421.017
Epoch:  101/200,  MSE loss train:  4456.83,  test: 4421.527
Epoch:  121/200,  MSE loss train: 4457.386,  test:  4422.07
Epoch:  141/200,  MSE loss train: 4457.956,  test: 4422.632
Epoch:  161/200,  MSE loss train: 4458.538,  test: 4423.207
Epoch:  181/200,  MSE loss train: 4459.127,  test: 4423.791
Epoch:  200/200,  MSE loss train: 4459.693,  test: 4424.354


In [56]:
res4

[['linear', 1, 0.0001, 4459.693008558467, 4424.354227295575]]

In [57]:
res5 = train_net(LINEAR, 2, 0.0001)

Epoch:    1/200,  MSE loss train: 4568.883,  test:  4536.53
Epoch:   21/200,  MSE loss train: 4535.169,  test: 4500.582
Epoch:   41/200,  MSE loss train: 4549.059,  test: 4514.327
Epoch:   61/200,  MSE loss train: 4559.022,  test:  4524.24
Epoch:   81/200,  MSE loss train: 4565.931,  test: 4531.118
Epoch:  101/200,  MSE loss train: 4570.833,  test: 4535.991
Epoch:  121/200,  MSE loss train: 4574.462,  test: 4539.586
Epoch:  141/200,  MSE loss train: 4577.282,  test: 4542.369
Epoch:  161/200,  MSE loss train: 4579.581,  test: 4544.631
Epoch:  181/200,  MSE loss train: 4581.539,  test: 4546.552
Epoch:  200/200,  MSE loss train: 4583.189,  test: 4548.168


In [58]:
res5

[['linear', 2, 0.0001, 4583.188540893184, 4548.167808027301]]

In [59]:
res6 = train_net(LINEAR, 3, 0.0001)

Epoch:    1/200,  MSE loss train: 4628.613,  test: 4596.941
Epoch:   21/200,  MSE loss train: 4618.215,  test: 4584.671
Epoch:   41/200,  MSE loss train: 4627.946,  test: 4593.984
Epoch:   61/200,  MSE loss train: 4632.714,  test: 4598.476
Epoch:   81/200,  MSE loss train: 4636.413,  test: 4601.976
Epoch:  101/200,  MSE loss train: 4639.487,  test:   4604.9
Epoch:  121/200,  MSE loss train: 4642.046,  test: 4607.339
Epoch:  141/200,  MSE loss train: 4644.195,  test: 4609.388
Epoch:  161/200,  MSE loss train: 4646.031,  test:  4611.14
Epoch:  181/200,  MSE loss train: 4647.638,  test: 4612.673
Epoch:  200/200,  MSE loss train:  4649.01,  test: 4613.984


In [60]:
res6

[['linear', 3, 0.0001, 4649.009925706688, 4613.9842402455515]]

#### Sigmoid

In [61]:
res7 = train_net(SIGMOID, 1, 0.003)

Epoch:    1/200,  MSE loss train: 2906.766,  test: 2853.649
Epoch:   21/200,  MSE loss train:  222.781,  test:  222.937
Epoch:   41/200,  MSE loss train:  115.218,  test:  116.274
Epoch:   61/200,  MSE loss train:    41.39,  test:   45.647
Epoch:   81/200,  MSE loss train:    18.04,  test:   22.827
Epoch:  101/200,  MSE loss train:   10.465,  test:   15.286
Epoch:  121/200,  MSE loss train:    7.801,  test:   12.684
Epoch:  141/200,  MSE loss train:    6.536,  test:   11.471
Epoch:  161/200,  MSE loss train:    5.702,  test:   10.665
Epoch:  181/200,  MSE loss train:    5.051,  test:   10.025
Epoch:  200/200,  MSE loss train:    4.528,  test:    9.502


In [62]:
res7

[['sigmoid', 1, 0.003, 4.5275544043070735, 9.50229057128129]]

In [66]:
res8 = train_net(SIGMOID, 2, 0.0003)

Epoch:    1/200,  MSE loss train: 5180.808,  test: 5142.655
Epoch:   21/200,  MSE loss train: 1658.218,  test: 1510.964
Epoch:   41/200,  MSE loss train: 1614.489,  test: 1469.816
Epoch:   61/200,  MSE loss train:   1594.8,  test: 1454.243
Epoch:   81/200,  MSE loss train: 1589.222,  test: 1450.222
Epoch:  101/200,  MSE loss train:  1586.72,  test: 1448.342
Epoch:  121/200,  MSE loss train: 1585.297,  test: 1447.198
Epoch:  141/200,  MSE loss train:   1584.4,  test: 1446.446
Epoch:  161/200,  MSE loss train: 1583.781,  test: 1445.959
Epoch:  181/200,  MSE loss train: 1583.333,  test: 1445.546
Epoch:  200/200,  MSE loss train: 1583.002,  test: 1445.242


In [67]:
res8

[['sigmoid', 2, 0.0003, 1583.0022959877779, 1445.2416712105726]]

In [65]:
res9 = train_net(SIGMOID, 3, 0.0003)

Epoch:    1/200,  MSE loss train: 5255.934,  test: 5215.411
Epoch:   21/200,  MSE loss train: 1753.705,  test: 1645.683
Epoch:   41/200,  MSE loss train: 1653.607,  test:  1506.08
Epoch:   61/200,  MSE loss train: 1652.175,  test: 1504.393
Epoch:   81/200,  MSE loss train: 1651.792,  test: 1503.951
Epoch:  101/200,  MSE loss train: 1640.425,  test: 1491.285
Epoch:  121/200,  MSE loss train: 1526.064,  test: 1375.752
Epoch:  141/200,  MSE loss train:   1503.1,  test: 1352.448
Epoch:  161/200,  MSE loss train:  812.289,  test:  759.527
Epoch:  181/200,  MSE loss train:  482.064,  test:   437.79
Epoch:  200/200,  MSE loss train:  416.797,  test:  374.566


In [20]:
res9

[['sigmoid', 3, 0.0003, 1420.809347368804, 1297.8523976115744]]

#### Tahn

In [68]:
res10 = train_net(TANH, 1, 0.001)

Epoch:    1/200,  MSE loss train: 2467.043,  test: 2405.598
Epoch:   21/200,  MSE loss train:   269.19,  test:   262.03
Epoch:   41/200,  MSE loss train:   96.524,  test:   86.455
Epoch:   61/200,  MSE loss train:  162.885,  test:   135.04
Epoch:   81/200,  MSE loss train:    6.805,  test:   12.381
Epoch:  101/200,  MSE loss train:    4.452,  test:    9.872
Epoch:  121/200,  MSE loss train:     3.76,  test:    9.016
Epoch:  141/200,  MSE loss train:    3.527,  test:    8.706
Epoch:  161/200,  MSE loss train:     3.45,  test:    8.559
Epoch:  181/200,  MSE loss train:    3.398,  test:    8.462
Epoch:  200/200,  MSE loss train:    3.369,  test:    8.402


In [69]:
res10

[['tanh', 1, 0.001, 3.3689714949864564, 8.402340618373064]]

In [70]:
res11 = train_net(TANH, 2, 0.0003)

Epoch:    1/200,  MSE loss train: 2873.092,  test: 2788.562
Epoch:   21/200,  MSE loss train: 1715.814,  test: 1563.907
Epoch:   41/200,  MSE loss train: 1713.159,  test: 1561.245
Epoch:   61/200,  MSE loss train: 1568.874,  test: 1416.651
Epoch:   81/200,  MSE loss train:  813.791,  test:  772.874
Epoch:  101/200,  MSE loss train: 1558.712,  test: 1408.689
Epoch:  121/200,  MSE loss train: 1561.372,  test: 1412.274
Epoch:  141/200,  MSE loss train:   874.89,  test:  822.664
Epoch:  161/200,  MSE loss train: 1491.677,  test: 1353.875
Epoch:  181/200,  MSE loss train:  761.416,  test:  728.251
Epoch:  200/200,  MSE loss train:  737.841,  test:  727.638


In [71]:
res11

[['tanh', 2, 0.0003, 737.8413239128622, 727.6379189257589]]

In [72]:
res12 = train_net(TANH, 3, 0.0001)

Epoch:    1/200,  MSE loss train: 3400.103,  test: 3354.086
Epoch:   21/200,  MSE loss train: 1663.841,  test:  1512.57
Epoch:   41/200,  MSE loss train: 1600.237,  test: 1463.604
Epoch:   61/200,  MSE loss train: 1589.917,  test: 1451.635
Epoch:   81/200,  MSE loss train: 1672.395,  test: 1521.431
Epoch:  101/200,  MSE loss train: 1611.322,  test: 1469.661
Epoch:  121/200,  MSE loss train: 1610.511,  test: 1469.796
Epoch:  141/200,  MSE loss train: 1613.646,  test: 1472.302
Epoch:  161/200,  MSE loss train: 1410.788,  test: 1290.554
Epoch:  181/200,  MSE loss train:  838.952,  test:  763.173
Epoch:  200/200,  MSE loss train:  1152.34,  test:  1042.08


In [73]:
res12

[['tanh', 3, 0.0001, 1152.3399181283032, 1042.0796565998414]]

## Summarise results

In [74]:
results = res1  + res2  + res3 +\
          res4  + res5  + res6 +\
          res7  + res8  + res9 +\
          res10 + res11 + res12

In [77]:
df = pd.DataFrame(data=results, columns=["activation", "hidden","learning rate", "loss_train","loss_test"])
df.to_csv("lab6_model_selection_results.csv")
df

Unnamed: 0,activation,hidden,learning rate,loss_train,loss_test
0,relu,1,0.0003,1602.898149,1566.403175
1,relu,2,0.0003,144.59368,143.693841
2,relu,3,0.0001,7.60662,12.495676
3,linear,1,0.0001,4459.693009,4424.354227
4,linear,2,0.0001,4583.188541,4548.167808
5,linear,3,0.0001,4649.009926,4613.98424
6,sigmoid,1,0.003,4.527554,9.502291
7,sigmoid,2,0.0003,1583.002296,1445.241671
8,sigmoid,3,0.0003,416.797194,374.565892
9,tanh,1,0.001,3.368971,8.402341
