In [5]:
import random
import numpy as np
import matplotlib.pyplot as plt
from lazygrad import MLP, Value
import pandas as pd
import edamame as eda 
%matplotlib inline
np.random.seed(1337)
random.seed(1337)

### Dataset 

[link for the data](https://www.kaggle.com/datasets/quantbruce/real-estate-price-prediction?resource=download)

In [6]:
df_x = pd.read_csv("data/x.csv", sep=";")
print(df_x.shape)
df_x.head()

(414, 6)


Unnamed: 0,X1 transaction date,X2 house age,X3 distance to the nearest MRT station,X4 number of convenience stores,X5 latitude,X6 longitude
0,-0.823683,1.255628,-0.792495,2.007407,1.12543,0.448762
1,-0.823683,0.157086,-0.616612,1.667503,0.912444,0.401139
2,1.541151,-0.387791,-0.414015,0.307885,1.48686,0.688183
3,1.246435,-0.387791,-0.414015,0.307885,1.48686,0.688183
4,-1.121951,-1.117223,-0.549997,0.307885,0.834188,0.592937


In [7]:
# convert pandas dataframe to numpy
x = df_x.to_numpy()
print(x.shape)

(414, 6)


In [8]:
df_y = pd.read_csv("data/y.csv", sep=";")
y = df_y.to_numpy().squeeze()
print(y.shape)

(414,)


### Model

In [9]:
# initialize a model 
model = MLP(6, [16, 16, 1]) # 2-layer neural network
print(model)
print("number of parameters", len(model.parameters()))

MLP of [Layer of [ReLUNeuron(6), ReLUNeuron(6), ReLUNeuron(6), ReLUNeuron(6), ReLUNeuron(6), ReLUNeuron(6), ReLUNeuron(6), ReLUNeuron(6), ReLUNeuron(6), ReLUNeuron(6), ReLUNeuron(6), ReLUNeuron(6), ReLUNeuron(6), ReLUNeuron(6), ReLUNeuron(6), ReLUNeuron(6)], Layer of [ReLUNeuron(16), ReLUNeuron(16), ReLUNeuron(16), ReLUNeuron(16), ReLUNeuron(16), ReLUNeuron(16), ReLUNeuron(16), ReLUNeuron(16), ReLUNeuron(16), ReLUNeuron(16), ReLUNeuron(16), ReLUNeuron(16), ReLUNeuron(16), ReLUNeuron(16), ReLUNeuron(16), ReLUNeuron(16)], Layer of [LinearNeuron(16)]]
number of parameters 401


In [13]:
# loss function
def loss(X, y, batch_size=None):
    
    # inline DataLoader :)
    if batch_size is None:
        Xb, yb = X, y
    else:
        ri = np.random.permutation(X.shape[0])[:batch_size]
        Xb, yb = X[ri], y[ri]
    inputs = [list(map(Value, xrow)) for xrow in Xb]
    
    # forward the model to get scores
    scores = list(map(model, inputs))
    
    # mse 
    loss = sum((yout - ygt)**2 for ygt, yout in zip(yb, scores))

    return loss

tot_loss = loss(X=x, y=y, batch_size=128)
print(tot_loss)

Value(data=197264.73659309265, grad=0)


In [14]:
# optimization
for k in range(100):
    
    # forward
    total_loss = loss(X=x, y=y)
    
    # backward
    model.zero_grad()
    total_loss.backward()
    
    # update (sgd)
    learning_rate = 1.0 - 0.9*k/100
    for p in model.parameters():
        p.data -= learning_rate * p.grad
    
    if k % 1 == 0:
        print(f"step {k} loss {total_loss.data}")

step 0 loss 678557.314797544
step 1 loss 2.442258861467567e+30
step 2 loss 1.3004179990342241e+36
step 3 loss 8.576255575075952e+41
step 4 loss 5.552713539313636e+47
step 5 loss 3.528833216162385e+53
step 6 loss 2.2008954040708675e+59
step 7 loss 1.3468918973487576e+65
step 8 loss 8.086346021507796e+70
step 9 loss 4.7618729176690917e+76
