# Imports and data

In [259]:
import definitions as d
import neural_network as nn
import numpy as np

from test_case_creator import (
    denormalized,
    get_sets__without_neighbors__one_prediction__without_aggregation,
    get_sets__without_neighbors__one_prediction__with_aggregation,
    get_sets__without_neighbors__24_predictions__without_aggregation,
    get_sets__without_neighbors__8_predictions__with_aggregation
)

In [222]:
(train_set1, test_set1, params1) = get_sets__without_neighbors__one_prediction__without_aggregation()
(train_set2, test_set2, params2) = get_sets__without_neighbors__one_prediction__with_aggregation()
(train_set3, test_set3, params3) = get_sets__without_neighbors__24_predictions__without_aggregation()
(train_set4, test_set4, params4) = get_sets__without_neighbors__8_predictions__with_aggregation()

# No neighbors, no aggregation, 1 prediction

## Networks

In [260]:
def get_nn_merge_initially(layer_sizes, activations, dropout_rates, loss):
    assert len(layer_sizes) == len(activations) and len(layer_sizes) == len(dropout_rates)
    
    rng = np.random.default_rng(1)

    d1_layer = nn.InputLayer(120, "d1")
    d2_layer = nn.InputLayer(120, "d2")
    d3_layer = nn.InputLayer(120, "d3")
    days_layer = nn.MergeLayer([d1_layer, d2_layer, d3_layer])

    coh_layer = nn.InputLayer(36, "city_one_hot")
    date_layer = nn.InputLayer(1, "date")
    coords_layer = nn.InputLayer(2, "coords")
    city_layer = nn.MergeLayer([coh_layer, date_layer, coords_layer])

    output_layer = nn.MergeLayer([days_layer, city_layer])
    for (n, activation, dropout_rate) in zip(layer_sizes, activations, dropout_rates):
        output_layer = nn.FullConnectLayer(output_layer, n, activation, rng, dropout_rate)
    return nn.NeuralNetwork(output_layer, loss)


def get_nn_merge_after_while(layer_sizes, activations, dropout_rates, loss):
    assert len(layer_sizes) == len(activations) and len(layer_sizes) == len(dropout_rates)
    
    rng = np.random.default_rng(1)

    def get_day_layer(num):
        l = nn.InputLayer(120, f"d{num}")
        return nn.FullConnectLayer(l, 120, d.linear, rng, 0.8)

    def get_days_layer():
        ls = [get_day_layer(1), get_day_layer(2), get_day_layer(3)]
        l = nn.MergeLayer(ls)
        return nn.FullConnectLayer(l, 120, d.linear, rng, 0.7)

    def get_city_layer():
        coh = nn.InputLayer(36, "city_one_hot")
        date = nn.InputLayer(1, "date")
        coords = nn.InputLayer(2, "coords")
        l = nn.MergeLayer([coh, date, coords])
        return nn.FullConnectLayer(l, 39, d.linear, rng, 0.8)

    ds = get_days_layer()
    c = get_city_layer()
    l = nn.MergeLayer([ds, c])
    for (n, activation, dropout_rate) in zip(layer_sizes, activations, dropout_rates):
        l = nn.FullConnectLayer(l, n, activation, rng, dropout_rate)
    return nn.NeuralNetwork(l, loss)


def get_nn_only_days(layer_sizes, activations, dropout_rates, loss):
    assert len(layer_sizes) == len(activations) and len(layer_sizes) == len(dropout_rates)
    
    rng = np.random.default_rng(1)

    d1_layer = nn.InputLayer(120, "d1")
    d2_layer = nn.InputLayer(120, "d2")
    d3_layer = nn.InputLayer(120, "d3")

    output_layer = nn.MergeLayer([d1_layer, d2_layer, d3_layer])
    for (n, activation, dropout_rate) in zip(layer_sizes, activations, dropout_rates):
        output_layer = nn.FullConnectLayer(output_layer, n, activation, rng, dropout_rate)
    return nn.NeuralNetwork(output_layer, loss)


def get_nn3(layer_sizes, activations, dropout_rates, loss):
    assert len(layer_sizes) == len(activations)
    
    rng = np.random.default_rng(1)
    
    def get_days_layer():
        ld1 = nn.InputLayer(120, "d1")
        ld2 = nn.InputLayer(120, "d2")
        ld3 = nn.InputLayer(120, "d3")

        ld11 = nn.FullConnectLayer(ld1, 60, d.linear, rng)
        ld22 = nn.FullConnectLayer(ld2, 60, d.linear, rng)
        ld33 = nn.FullConnectLayer(ld3, 60, d.linear, rng)

        l = nn.MergeLayer([ld11, ld22, ld33])
        return nn.FullConnectLayer(l, 60, d.linear, rng)

    def get_city_layer():
        coh = nn.InputLayer(36, "city_one_hot")
        date = nn.InputLayer(1, "date")
        coords = nn.InputLayer(2, "coords")
        l = nn.MergeLayer([coh, date, coords])
        return nn.FullConnectLayer(l, 20, d.linear, rng)

    ds = get_days_layer()
    c = get_city_layer()
    l = nn.MergeLayer([ds, c])
    for (n, activation) in zip(layer_sizes, activations):
        l = nn.FullConnectLayer(l, n, activation, rng)
    return nn.NeuralNetwork(l, loss)


def get_nn4(layer_sizes, activations, dropout_rates, loss1, loss2):
    assert len(layer_sizes) == len(activations)
    
    rng = np.random.default_rng(1)

    def get_day_layer(num):
        l = nn.InputLayer(120, f"d{num}")
        return nn.FullConnectLayer(l, 60, d.linear, rng)

    def get_days_layer():
        ls = [get_day_layer(1), get_day_layer(2), get_day_layer(3)]
        l = nn.MergeLayer(ls)
        return nn.FullConnectLayer(l, 100, d.linear, rng)

    def get_city_layer():
        coh = nn.InputLayer(36, "city_one_hot")
        date = nn.InputLayer(1, "date")
        coords = nn.InputLayer(2, "coords")
        l = nn.MergeLayer([coh, date, coords])
        return nn.FullConnectLayer(l, 20, d.linear, rng)

    ds = get_days_layer()
    c = get_city_layer()
    l = nn.MergeLayer([ds, c])
    for i in range(len(layer_sizes)):
        if i == len(layer_sizes) - 2:
            break
        l = nn.FullConnectLayer(l, layer_sizes[i], activations[i], rng)
    l_temp = nn.FullConnectLayer(l, layer_sizes[-2], activations[-2], rng)
    l_wind = nn.FullConnectLayer(l, layer_sizes[-1], activations[-1], rng)
    return (nn.NeuralNetwork(l_temp, loss1), nn.NeuralNetwork(l_wind, loss2))


def get_nn_mid_prediction(loss):
    rng = np.random.default_rng(1)

    d1_layer = nn.InputLayer(120, "d1")
    d2_layer = nn.InputLayer(120, "d2")
    d3_layer = nn.InputLayer(120, "d3")
    days_layer = nn.MergeLayer([d1_layer, d2_layer, d3_layer])

    coh_layer = nn.InputLayer(36, "city_one_hot")
    date_layer = nn.InputLayer(1, "date")
    coords_layer = nn.InputLayer(2, "coords")
    city_layer = nn.MergeLayer([coh_layer, date_layer, coords_layer])

    d4_layer = nn.MergeLayer([days_layer, city_layer])
    d4_layer = nn.FullConnectLayer(d4_layer, 300, d.linear, rng)
    d4_layer = nn.FullConnectLayer(d4_layer, 140, d.linear, rng)
    d4_layer = nn.FullConnectLayer(d4_layer, 50, d.linear, rng)
    d4_layer = nn.FullConnectLayer(d4_layer, 1, d.linear, rng)

    
    d2_layer = nn.InputLayer(120, "d2")
    d3_layer = nn.InputLayer(120, "d3")
    days_layer = nn.MergeLayer([d2_layer, d3_layer, d4_layer])

    coh_layer = nn.InputLayer(36, "city_one_hot")
    date_layer = nn.InputLayer(1, "date")
    coords_layer = nn.InputLayer(2, "coords")
    city_layer = nn.MergeLayer([coh_layer, date_layer, coords_layer])

    output_layer = nn.MergeLayer([days_layer, city_layer])
    output_layer = nn.FullConnectLayer(output_layer, 300, d.linear, rng)
    output_layer = nn.FullConnectLayer(output_layer, 140, d.linear, rng)
    output_layer = nn.FullConnectLayer(output_layer, 50, d.linear, rng)
    output_layer = nn.FullConnectLayer(output_layer, 1, d.linear, rng)

    return nn.NeuralNetwork(output_layer, loss)

## Best ones yet

In [None]:
net3 = get_nn_merge_initially([300, 100, 60, 1, 1], [d.relu, d.relu, d.relu, d.relu, d.linear], d.l2_loss)  # ~15.74%
net3.train(train_set, test_set, 1024, "output_temp", rng, 15)

net3 = get_nn3([300, 100, 60, 1, 1], [d.relu, d.relu, d.relu, d.relu, d.linear], d.l2_loss)  # ~15.77% shared weights for days
net3.train(train_set, test_set, 1024, "output_temp", rng, 15)

net3 = get_nn_merge_initially([300, 100, 60, 1, 1], [d.sigmoid, d.sigmoid, d.sigmoid, d.sigmoid, d.linear], d.l2_loss)  # ~15.91%
net3.train(train_set, test_set, 1024, "output_temp", rng, 5)

net3 = get_nn_merge_initially([300, 100, 60, 1, 1], [d.sigmoid, d.sigmoid, d.sigmoid, d.sigmoid, d.linear], d.l1_loss)  # ~16.13%
net3.train(train_set, test_set, 1024, "output_temp", rng, 5)

net3 = get_nn_merge_initially([300, 100, 60, 1, 1], [d.sigmoid, d.sigmoid, d.sigmoid, d.sigmoid, d.linear], d.l1_loss)  # ~16.29% shared weights for days
net3.train(train_set, test_set, 1024, "output_temp", rng, 5)

net3 = get_nn_merge_after_while([100, 40, 1], [d.linear, d.linear, d.linear], d.l2_loss)  # ~18.17%
net3.train(train_set, test_set, 1024, "output_temp", rng, 5)

net3_wind = get_nn_only_days([80, 40, 2, 2, 2], [d.linear, d.linear, d.linear, d.sigmoid, d.softmax], d.cross_entropy_loss)  # 61.49% no matter hinge or cross entropy
net3_wind.train(train_set, test_set, 1024, "output_wind", rng, 5)

net3_wind = get_nn_only_days([80, 40, 2, 2, 2], [d.linear, d.linear, d.linear, d.sigmoid, d.softmax], d.cross_entropy_loss)  # 61.49% no matter hinge or cross entropy shared weights for days
net3_wind.train(train_set, test_set, 1024, "output_wind", rng, 5)

net4_temp, net4_wind = get_nn4([300, 100, 60, 2, 1, 2], [d.relu, d.relu, d.relu, d.relu, d.linear, d.softmax], d.l2_loss, d.cross_entropy_loss)  # 15.66% + 61.49%
net4_temp.train(train_set, test_set, 1024, "output_temp", rng, 5)
net4_wind.train(train_set, test_set, 1024, "output_wind", rng, 5)

## Tests

In [269]:
rng = np.random.default_rng(1)

net3 = get_nn_merge_initially([300, 100, 60, 1, 1], [d.sigmoid, d.sigmoid, d.sigmoid, d.sigmoid, d.sigmoid], [0.8, 0.6, 0.6, 0.5, 1], d.l2_loss)

while True:
    net3.train(train_set1, test_set1, 1024, "output_temp", rng, 1)

    predicted = net3.predict(train_set1)
    expected = train_set1["output_temp"]

    predicted = denormalized(predicted, params1["temperature"])
    expected = denormalized(expected, params1["temperature"])

    diffs = np.abs(predicted - expected)
    print(f"[train] min: {np.min(diffs)}, max: {np.max(diffs)}, mean: {np.mean(diffs)}, median: {np.median(diffs)}")
    print(f"[train] Good predictions: {np.count_nonzero(diffs <= 2)}, bad predictions: {np.count_nonzero(diffs > 2)}, success rate: {np.count_nonzero(diffs <= 2) / diffs.size * 100 : .2f}%")


    predicted = net3.predict(test_set1)
    expected = test_set1["output_temp"]

    predicted = denormalized(predicted, params1["temperature"])
    expected = denormalized(expected, params1["temperature"])

    diffs = np.abs(predicted - expected)
    print(f"[test] min: {np.min(diffs)}, max: {np.max(diffs)}, mean: {np.mean(diffs)}, median: {np.median(diffs)}")
    print(f"[test] Good predictions: {np.count_nonzero(diffs <= 2)}, bad predictions: {np.count_nonzero(diffs > 2)}, success rate: {np.count_nonzero(diffs <= 2) / diffs.size * 100 : .2f}%")

Epoch 1/1: train: 0.08742700627252124, test: 0.04041009392645985
[train] min: 0.0017969044552046398, max: 39.60188022308989, mean: 14.23410124706065, median: 14.523046898649397
[train] Good predictions: 2427, bad predictions: 43256, success rate:  5.31%
[test] min: 0.0032031054146273163, max: 36.71096356624719, mean: 13.903480722172882, median: 14.266380257563412
[test] Good predictions: 882, bad predictions: 11529, success rate:  7.11%
Epoch 1/1: train: 0.024133177931014327, test: 0.015305317459894817
[train] min: 0.000511232052701871, max: 38.205198719121285, mean: 8.112455494737139, median: 7.384422337633055
[train] Good predictions: 6398, bad predictions: 39285, success rate:  14.01%
[test] min: 0.0003220596178152846, max: 37.85447622560622, mean: 8.12979117468888, median: 7.620738739232536
[test] Good predictions: 1689, bad predictions: 10722, success rate:  13.61%
Epoch 1/1: train: 0.014723016872641721, test: 0.014286578109348384
[train] min: 5.130460374402901e-05, max: 40.743488

KeyboardInterrupt: 

In [238]:
# rng = np.random.default_rng(1)

# net3 = get_nn_merge_initially([300, 100, 60, 1, 1], [d.relu, d.relu, d.relu, d.relu, d.linear], d.l2_loss)  # ~15.74%
net3.train(train_set1, test_set1, 1024, "output_temp", rng, 15)

Epoch 1/15: train: 0.0060670772907663924, test: 0.05497609616386401
Epoch 2/15: train: 0.001929655409781732, test: 0.05577942432225474
Epoch 3/15: train: 0.001812920854232353, test: 0.05483961363233612
Epoch 4/15: train: 0.0017941756806227696, test: 0.053025281826781415
Epoch 5/15: train: 0.0017651678758746803, test: 0.05240972642452739
Epoch 6/15: train: 0.001744873728354971, test: 0.05185866461908931
Epoch 7/15: train: 0.0018048783106294605, test: 0.05170222381353928
Epoch 8/15: train: 0.00178732141717938, test: 0.05168748776216245
Epoch 9/15: train: 0.0017694443631623914, test: 0.053164428502811716
Epoch 10/15: train: 0.0017555506643470646, test: 0.05158265230959261
Epoch 11/15: train: 0.0017634852975898273, test: 0.05361702952787019
Epoch 12/15: train: 0.001816516218349976, test: 0.05098295941423114
Epoch 13/15: train: 0.001807289489042563, test: 0.05285344664434598
Epoch 14/15: train: 0.0017283817263267329, test: 0.05231572507352986
Epoch 15/15: train: 0.0017385934560919666, test:

In [239]:
predicted = net3.predict(train_set1)
expected = train_set1["output_temp"]

predicted = denormalized(predicted, params1["temperature"])
expected = denormalized(expected, params1["temperature"])

diffs = np.abs(predicted - expected)
print(f"[train] min: {np.min(diffs)}, max: {np.max(diffs)}, mean: {np.mean(diffs)}, median: {np.median(diffs)}")
print(f"[train] Good predictions: {np.count_nonzero(diffs <= 2)}, bad predictions: {np.count_nonzero(diffs > 2)}, success rate: {np.count_nonzero(diffs <= 2) / diffs.size * 100 : .2f}%")


predicted = net3.predict(test_set1)
expected = test_set1["output_temp"]

predicted = denormalized(predicted, params1["temperature"])
expected = denormalized(expected, params1["temperature"])

diffs = np.abs(predicted - expected)
print(f"[test] min: {np.min(diffs)}, max: {np.max(diffs)}, mean: {np.mean(diffs)}, median: {np.median(diffs)}")
print(f"[test] Good predictions: {np.count_nonzero(diffs <= 2)}, bad predictions: {np.count_nonzero(diffs > 2)}, success rate: {np.count_nonzero(diffs <= 2) / diffs.size * 100 : .2f}%")

[train] min: 2.7148055721681885e-05, max: 25.38636795286976, mean: 2.351503048060779, median: 1.6902239127363714
[train] Good predictions: 25781, bad predictions: 19902, success rate:  56.43%
[test] min: 0.0005824673033885119, max: 60.784248791217436, mean: 14.128784784049726, median: 11.564250715051116
[test] Good predictions: 1161, bad predictions: 11250, success rate:  9.35%


In [None]:
net3_wind = get_nn_only_days([80, 40, 2, 2, 2], [d.linear, d.linear, d.linear, d.sigmoid, d.softmax], d.cross_entropy_loss)  # 61.49% no matter hinge or cross entropy
net3_wind.train(train_set, test_set, 1024, "output_wind", rng, 5)

In [None]:
predicted = net3_wind.predict(train_set)
print(predicted)
print(np.max(predicted[0, :]), np.min(predicted[1, :]))
predicted = np.rint(predicted[0, :])
expected = train_set["output_wind"][0, :]
print(predicted)
print(expected)
print(np.count_nonzero(predicted == 1))
print(predicted.size)
print(f"[train] Good predictions: {np.count_nonzero(predicted == expected)}, bad predictions: {np.count_nonzero(predicted != expected)}, success_rate: {np.count_nonzero(predicted == expected) / predicted.size * 100 : .2f}%")

predicted = net3_wind.predict(test_set)
print(predicted)
print(np.max(predicted[0, :]), np.min(predicted[1, :]))
predicted = np.rint(predicted[0, :])
expected = test_set["output_wind"][0, :]
print(predicted)
print(expected)
print(np.count_nonzero(predicted == 1))
print(predicted.size)
print(f"[test] Good predictions: {np.count_nonzero(predicted == expected)}, bad predictions: {np.count_nonzero(predicted != expected)}, success_rate: {np.count_nonzero(predicted == expected) / predicted.size * 100 : .2f}%")

# No neighbors, aggregation, 1 prediction

## Networks

# Best ones yet

In [None]:
net3 = get_nn_merge_after_while([100, 40, 1], [d.linear, d.linear, d.linear], d.l2_loss)  # ~12.77%
net3.train(train_set, test_set, 1024, "output_temp", rng, 5)

net3 = get_nn3([300, 100, 60, 1, 1], [d.relu, d.relu, d.relu, d.relu, d.linear], d.l2_loss)  # ~17.61% shared weights for days
net3.train(train_set, test_set, 1, "output_temp", rng, 2)

net3 = get_nn_merge_initially([300, 100, 60, 1, 1], [d.sigmoid, d.sigmoid, d.sigmoid, d.sigmoid, d.linear], d.l1_loss)  # ~17.51% shared weights for days
net3.train(train_set, test_set, 1024, "output_temp", rng, 5)

net3_wind = get_nn_only_days([80, 40, 2, 2, 2], [d.linear, d.linear, d.linear, d.sigmoid, d.softmax], d.cross_entropy_loss)  # 53.18% no matter hinge or cross entropy
net3_wind.train(train_set, test_set, 1024, "output_wind", rng, 5)

net3_wind = get_nn_only_days([80, 40, 2, 2, 2], [d.linear, d.linear, d.linear, d.sigmoid, d.softmax], d.cross_entropy_loss)  # 53.18% no matter hinge or cross entropy shared weights for days
net3_wind.train(train_set, test_set, 1024, "output_wind", rng, 5)

# No neighbors, no aggregation, 24 predictions

In [None]:
def get_nn_24_predictions():
    rng = np.random.default_rng(1)

    d1_layer = nn.InputLayer(120, "d1")
    d2_layer = nn.InputLayer(120, "d2")
    d3_layer = nn.InputLayer(120, "d3")
    days_layer = nn.MergeLayer([d1_layer, d2_layer, d3_layer])

    coh_layer = nn.InputLayer(36, "city_one_hot")
    date_layer = nn.InputLayer(1, "date")
    coords_layer = nn.InputLayer(2, "coords")
    city_layer = nn.MergeLayer([coh_layer, date_layer, coords_layer])

    d4_layer = nn.MergeLayer([days_layer, city_layer])
    d4_layer = nn.FullConnectLayer(d4_layer, 300, d.linear, rng)
    d4_layer = nn.FullConnectLayer(d4_layer, 140, d.linear, rng)
    d4_layer = nn.FullConnectLayer(d4_layer, 60, d.linear, rng)
    d4_layer = nn.FullConnectLayer(d4_layer, 24, d.linear, rng)

    return nn.NeuralNetwork(d4_layer, d.l2_loss)

In [None]:
net_24pred = get_nn_24_predictions()
net_24pred.train(train_set, test_set, 512, "output_temp", rng, 10)  # only works without batching in nn (too big dataset)

In [None]:
predicted = net_24pred.predict(train_set)
expected = train_set["output_temp"]

predicted = denormalized(predicted, params["temperature"])
expected = denormalized(expected, params["temperature"])

predicted = np.mean(predicted, axis=0)
expected = np.mean(expected, axis=0)

print(predicted)
print(expected)

diffs = np.abs(predicted - expected)
print(f"[train] min: {np.min(diffs)}, max: {np.max(diffs)}, mean: {np.mean(diffs)}, median: {np.median(diffs)}")
print(f"[train] Good predictions: {np.count_nonzero(diffs <= 2)}, bad predictions: {np.count_nonzero(diffs > 2)}, success rate: {np.count_nonzero(diffs <= 2) / diffs.size * 100 : .2f}%")

predicted = net_24pred.predict(test_set)
expected = test_set["output_temp"]

predicted = denormalized(predicted, params["temperature"])
expected = denormalized(expected, params["temperature"])

predicted = np.mean(predicted, axis=0)
expected = np.mean(expected, axis=0)

print(predicted)
print(expected)

diffs = np.abs(predicted - expected)
print(f"[test] min: {np.min(diffs)}, max: {np.max(diffs)}, mean: {np.mean(diffs)}, median: {np.median(diffs)}")
print(f"[test] Good predictions: {np.count_nonzero(diffs <= 2)}, bad predictions: {np.count_nonzero(diffs > 2)}, success rate: {np.count_nonzero(diffs <= 2) / diffs.size * 100 : .2f}%")