In [20]:
# Import libraries
import numpy as np
import pandas as pd
import torch
from fastai.tabular.all import TabularDataLoaders, Normalize, tabular_learner, R2Score, rmse, Categorify

# clean up data: area, date, floor, price
def load_csv():
  return pd.read_csv('./data/2305_3rm_woodlands.csv')
def price_cleanup(df):
  df['price'] = df.price.apply(lambda x:float(x.replace('$','').replace(',','')) if isinstance(x,str) else x)
  return df
def area_cleanup(df):
  df['area'] = df.area.str.split('\n',expand=True)[0].astype(float) if df.area.dtype != 'float64' else df.area
  return df  
def date_cleanup(df):
  df['date'] = pd.to_datetime(df.date, format="%m/%d/%y")
  df['date'] = df.date.apply(lambda x:float(x.toordinal()) if not isinstance(x,float) else x)
  return df
floor_dict = { f'0{i*3+1} to 0{i*3+3}' if i < 3 else f'{i*3+1} to {i*3+3}': i*3+2  for i in range(11)  }
def floor_cleanup(df):
  df['floor'] = df.floor.apply(lambda x:float(floor_dict[x]) if isinstance(x,str) else x)
  return df

def load_and_cleanup():
  df = load_csv()
  price_cleanup(df)
  area_cleanup(df)
  date_cleanup(df)
  floor_cleanup(df)
  df.drop(["block", "street", "lease"], axis=1, inplace = True)
  return df

def get_xy():
  df = load_and_cleanup()
  y = torch.tensor(df.price)
  df['date'] = df.date - 693595
  means = df.mean()
  stds = df.std()
  df = (df-means)/stds
  xs = torch.tensor(df.drop("price", axis=1).values)
  return (xs, y, means, stds)

def f(x, params, bias):
  return torch.sum(x*params, 1)+bias

def rmse(preds, targets):
  return ((preds-targets)**2).mean().sqrt()

def loss(x, params, bias, targets):
  return rmse(f(x, params,bias), targets)


In [197]:
## initialize params and bias
(xs, y, means, stds) = get_xy()

# take mean as starting point since other params are normalized
bias = y.mean()
# bias_step should be different because it is not normalized
bias_step = y.std()/100
# random initial params
params = torch.rand(4)


In [340]:
## random stepper: takes a random step, and move if the loss is lower
curr_loss = loss(xs,params,bias,y)
print(f"Iteration -1: loss ${curr_loss}")    
n = 10000
for i in range(n):
  new_params = params + torch.rand(4)*.1
  new_bias = bias + torch.rand(1)*.1
  new_loss = loss(xs,new_params,new_bias,y)
  count = 0
  stop = False
  while (new_loss > curr_loss):
    count += 1
    if (count == 1000):
      print(f'100 cycles reached')
      print(f"Iteration{i}: loss ${curr_loss}")   
      stop = True
      break
    new_params = params + torch.rand(4)
    new_bias = bias + torch.rand(1)
    new_loss = loss(xs,new_params,new_bias,y)
  if (stop):
    break
  params = new_params
  bias = new_bias
  curr_loss = new_loss
  if (i > n-5):
    print(f"Iteration{i}: loss ${curr_loss}")   

Iteration -1: loss $17510.27291863599
100 cycles reached
Iteration0: loss $17510.27291863599


In [338]:
# fine-tuning: move the params by specifying the perturbation
curr_loss = loss(xs,params,bias,y)
print(f"Iteration -1: loss ${curr_loss}")    
n = 100
for i in range(n):
  params_up = params + torch.tensor([  0, 0, 0, 0])
  params_down = params - torch.tensor([0, 0, 0, 0])
  loss_up = loss(xs,params_up,new_bias,y)
  loss_down = loss(xs,params_down,new_bias,y)
  if (loss_up < curr_loss):
    params = params_up
    curr_loss = loss_up
  elif (loss_down < curr_loss):
    params = params_down
    curr_loss = loss_down
  else:
    print(f"local min reached at iteration {i}")
    break
  if (i > n-5):
    print(f"Iteration{i}: loss ${curr_loss}")   

Iteration -1: loss $17510.272923675348
local min reached at iteration 2


In [328]:
# fine-tuning the bias
curr_loss = loss(xs,params,bias,y)
print(f"Iteration -1: loss ${curr_loss}")    
n = 100
for i in range(n):
  bias_up = bias +   torch.tensor(1)
  bias_down = bias - torch.tensor(1)
  loss_up = loss(xs,params,  bias_up,y)
  loss_down = loss(xs,params,bias_down,y)
  if (loss_up < curr_loss):
    bias = bias_down
    curr_loss = loss_up
  elif (loss_down < curr_loss):
    bias = bias_down
    curr_loss = loss_down
  else:
    print(f"local min reached at iteration {i}")
    break
  if (i > n-5):
    print(f"Iteration{i}: loss ${curr_loss}")  

Iteration -1: loss $17510.27389588415
local min reached at iteration 1
