In [3]:
import pandas as pd
data = pd.read_csv("clean_weather.csv", index_col=0)
data = data.ffill()

In [30]:
import numpy as np
from sklearn.preprocessing import StandardScaler
PREDICTORS = ["tmax", "tmin", "rain"]
TARGET = "tmax_tomorrow"

# Scale our data so relu works better
# All temperature values in the original dataset are over 0, so relu won't do much for several epochs
# Scaling will make some of the input data negative
scaler = StandardScaler()
data[PREDICTORS] = scaler.fit_transform(data[PREDICTORS])

split_data = np.split(data, [int(.7 * len(data)), int(.85 * len(data))])
(train_x, train_y), (valid_x, valid_y), (test_x, test_y) = [[d[PREDICTORS].to_numpy(), d[[TARGET]].to_numpy()] for d in split_data]

  return bound(*args, **kwds)


In [31]:
def init_layers(inputs):
  layers = []
  for i in range(1, len(inputs)):
    layers.append([
      np.random.rand(inputs[i-1], inputs[i]) / 5 -.1,
      np.ones((1, inputs[i]))
    ])
  
  return layers

layers_conf = [3, 10, 10, 1]

layers = init_layers(layers_conf)

In [33]:
def forward(batch, layers):
  hiddens = [batch.copy()]
  for i in range(len(layers)):
    batch = np.matmul(batch, layers[i][0]) + layers[i][1]
    if i < len(layers) -1:
      batch = np.maximum(batch, 0)
    hiddens.append(batch.copy())
  return batch, hiddens

In [None]:
def mse(actual, predicted):
  return (actual - predicted) ** 2

def mse_grad(actual, predicted):
  return predicted - actual

In [36]:
def backward(layers, hidden, grad, lr):
    for i in range(len(layers)-1, -1, -1):
      if i != len(layers) - 1:
        grad = np.multiply(grad, np.heaviside(hidden[i+1], 0))

      w_grad = hidden[i].T @ grad
      b_grad = np.mean(grad, axis=0)

      layers[i][0] -= w_grad * lr
      layers[i][1] -= b_grad * lr
        
      grad = grad @ layers[i][0].T
    return layers

In [37]:
from statistics import mean

lr = 1e-6
epochs=10
batch_size = 8

layers = init_layers(layers_conf)

for epoch in range(epochs):
    epoch_loss = []

    for i in range(0, train_x.shape[0], batch_size):
        x_batch = train_x[i:(i+batch_size)]
        y_batch = train_y[i:(i+batch_size)]
        pred, hidden = forward(x_batch, layers)

        loss = mse_grad(y_batch, pred)
        epoch_loss.append(np.mean(loss ** 2))

        layers = backward(layers, hidden, loss, lr)


    valid_preds, _ = forward(valid_x, layers)

    print(f"Epoch: {epoch} Train MSE: {mean(epoch_loss)} Valid MSE: {np.mean(mse(valid_preds,valid_y))}")

Epoch: 0 Train MSE: 3346.419652661795 Valid MSE: 1793.6336011311687
Epoch: 1 Train MSE: 375.33071223010023 Valid MSE: 26.41047662451915
Epoch: 2 Train MSE: 23.35240154063674 Valid MSE: 21.31667160684363
Epoch: 3 Train MSE: 22.439479552567846 Valid MSE: 20.85441433295814
Epoch: 4 Train MSE: 22.178367842392767 Valid MSE: 20.760556053582338
Epoch: 5 Train MSE: 22.11068383487087 Valid MSE: 20.745627473547604
Epoch: 6 Train MSE: 22.091457360828787 Valid MSE: 20.74549406393329
Epoch: 7 Train MSE: 22.08459319796934 Valid MSE: 20.7469135328254
Epoch: 8 Train MSE: 22.080952319959565 Valid MSE: 20.747276248984548
Epoch: 9 Train MSE: 22.078354440963075 Valid MSE: 20.74700814721253
