In [2]:
import numpy as np
import pandas as pd

PRED = ["tmax", "tmin", "rain"]
TARGET = ["tmax_tomorrow"]

data = pd.read_csv("dataset/clean_weather.csv")
print(data.head())

   Unnamed: 0  tmax  tmin  rain  tmax_tomorrow
0  1970-01-01  60.0  35.0   0.0           52.0
1  1970-01-02  52.0  39.0   0.0           52.0
2  1970-01-03  52.0  35.0   0.0           53.0
3  1970-01-04  53.0  36.0   0.0           52.0
4  1970-01-05  52.0  35.0   0.0           50.0


In [3]:
data.isnull().sum()

Unnamed: 0         0
tmax              11
tmin              14
rain             281
tmax_tomorrow     11
dtype: int64

In [4]:
data.fillna({"tmax": data.tmax.median()}, inplace=True)
data.fillna({"tmin": data.tmin.median()}, inplace=True)
data.fillna({"rain": data.rain.median()}, inplace=True)
data.fillna({"tmax_tomorrow": data.tmax_tomorrow.median()}, inplace=True)

In [5]:
data.isnull().sum()

Unnamed: 0       0
tmax             0
tmin             0
rain             0
tmax_tomorrow    0
dtype: int64

In [6]:
split = np.split(data, {int(len(data) * .7), int(len(data) * .85)})
(train_X, train_y),(valid_X, valid_y), (test_X, test_y) = [[d[PRED].to_numpy(), d[TARGET].to_numpy()] for d in split]
print(train_X.shape, train_y.shape, valid_X.shape, valid_y.shape, test_X.shape, test_y.shape)

(9456, 3) (9456, 1) (2026, 3) (2026, 1) (2027, 3) (2027, 1)


  return bound(*args, **kwds)


In [7]:
def init_params(predictors):
    np.random.seed(0)
    w = np.random.randn(predictors, 1)
    b = np.ones((1, 1))
    params = [w, b]
    return params

In [8]:
def forward(params, X):
    w, b = params
    return  X @ w + b

In [9]:
def mse(actual, predicted):
    return np.mean((actual - predicted) ** 2)

def mse_grad(actual, predicted):
    return predicted - actual

In [10]:
def backward(params, X, gradient, learning_rate):
    w_gradient = (X.T  / X.shape[0]) @ gradient
    b_gradient = np.sum(gradient, axis=0)

    params[0] -= learning_rate * w_gradient
    params[1] -= learning_rate * b_gradient
    return params

In [11]:
learning_rate = 1e-5
epochs = 10000

params = init_params(train_X.shape[1])

for i in range(epochs):
    predictions = forward(params, train_X)
    gradient = mse_grad(train_y, predictions)

    params = backward(params, train_X, gradient, learning_rate)

    if i % 1000 == 0:
        
        loss = mse(valid_y, forward(params, valid_X))
        print(f"Epoch {i}, loss: {loss}")


Epoch 0, loss: 3805.032198160283


Epoch 1000, loss: 34.70961905101897
Epoch 2000, loss: 26.64053996346304
Epoch 3000, loss: 23.67748775994302
Epoch 4000, loss: 22.51708482168482
Epoch 5000, loss: 22.02200027940666
Epoch 6000, loss: 21.790426026125186
Epoch 7000, loss: 21.67362595971429
Epoch 8000, loss: 21.612267830639027
Epoch 9000, loss: 21.580164048281407


In [23]:
def init_layers(inputs):
    layers = []
    for i in range(1, len(inputs)):
        layers.append([
            np.random.randn(inputs[i-1], inputs[i]) / 5 - .1,
            np.ones((1, inputs[i]))
        ])
    return layers

layer_conf = [3, 10, 10, 1]

In [53]:
def forwardnn(batch, layers):
    hidden = [batch.copy()]
    for i in range(len(layers)):

        batch = np.matmul(batch, layers[i][0]) + layers[i][1]
        if i < len(layers) - 1:
            batch = np.maximum(batch, 0)
        # Store the forward pass hidden values for use in backprop
        hidden.append(batch.copy())
    return batch, hidden

In [14]:
def msenn(actual, predicted):
    return (actual - predicted) ** 2

def msenn_gradient(actual, predicted):
    return predicted - actual

In [None]:
def backwardnn(layers, hidden, grad, lr):
    for i in range(len(layers)-1, -1, -1):
        if i != len(layers) - 1:
            grad = np.multiply(grad, np.heaviside(hidden[i+1], 0))

        w_grad = hidden[i].T @ grad
        b_grad = np.mean(grad, axis=0)

        layers[i][0] -= w_grad * lr
        layers[i][1] -= b_grad * lr
        
        grad = grad @ layers[i][0].T
    return layers

In [54]:
from statistics import mean

learning_rate = 1e-5
batch_size = 8
epochs = 10

layers = init_layers(layer_conf)

for epoch in range(epochs):
    epoch_loss = []

    for i in range(0, train_X.shape[0], batch_size):
        batch_X = train_X[i:i+batch_size]
        batch_y = train_y[i:i+batch_size]

        pred, hidden = forwardnn(batch_X, layers)

        print(pred.shape)
        gradient = msenn_gradient(batch_y, pred)
        epoch_loss.append(np.mean(gradient ** 2, axis=0))

        layers = backwardnn(layers, hidden, gradient, learning_rate)

    valid_preds, _ = forwardnn(valid_X, layers)
    print(f"Epoch: {epoch} Train MSE: {mean(epoch_loss)} Valid MSE: {np.mean(msenn(valid_preds,valid_y))}")

(8, 1)
(10, 8) (8, 1)
(10, 8) (8, 10)
(3, 8) (8, 10)
(8, 1)
(10, 8) (8, 1)
(10, 8) (8, 10)
(3, 8) (8, 10)
(8, 1)
(10, 8) (8, 1)
(10, 8) (8, 10)
(3, 8) (8, 10)
(8, 1)
(10, 8) (8, 1)
(10, 8) (8, 10)
(3, 8) (8, 10)
(8, 1)
(10, 8) (8, 1)
(10, 8) (8, 10)
(3, 8) (8, 10)
(8, 1)
(10, 8) (8, 1)
(10, 8) (8, 10)
(3, 8) (8, 10)
(8, 1)
(10, 8) (8, 1)
(10, 8) (8, 10)
(3, 8) (8, 10)
(8, 1)
(10, 8) (8, 1)
(10, 8) (8, 10)
(3, 8) (8, 10)
(8, 1)
(10, 8) (8, 1)
(10, 8) (8, 10)
(3, 8) (8, 10)
(8, 1)
(10, 8) (8, 1)
(10, 8) (8, 10)
(3, 8) (8, 10)
(8, 1)
(10, 8) (8, 1)
(10, 8) (8, 10)
(3, 8) (8, 10)
(8, 1)
(10, 8) (8, 1)
(10, 8) (8, 10)
(3, 8) (8, 10)
(8, 1)
(10, 8) (8, 1)
(10, 8) (8, 10)
(3, 8) (8, 10)
(8, 1)
(10, 8) (8, 1)
(10, 8) (8, 10)
(3, 8) (8, 10)
(8, 1)
(10, 8) (8, 1)
(10, 8) (8, 10)
(3, 8) (8, 10)
(8, 1)
(10, 8) (8, 1)
(10, 8) (8, 10)
(3, 8) (8, 10)
(8, 1)
(10, 8) (8, 1)
(10, 8) (8, 10)
(3, 8) (8, 10)
(8, 1)
(10, 8) (8, 1)
(10, 8) (8, 10)
(3, 8) (8, 10)
(8, 1)
(10, 8) (8, 1)
(10, 8) (8, 10)
(3, 8) (

TypeError: can't convert type 'ndarray' to numerator/denominator