In [2]:
# Import libraries
import numpy as np
import pandas as pd
import torch

# clean up data: area, date, floor, price
def load_csv():
  return pd.read_csv('./data/2305_3rm_woodlands.csv')
def price_cleanup(df):
  df['price'] = df.price.apply(lambda x:float(x.replace('$','').replace(',','')) if isinstance(x,str) else x)
  return df
def area_cleanup(df):
  df['area'] = df.area.str.split('\n',expand=True)[0].astype(float) if df.area.dtype != 'float64' else df.area
  return df  
def date_cleanup(df):
  df['date'] = pd.to_datetime(df.date, format="%m/%d/%y")
  df['date'] = df.date.apply(lambda x:float(x.toordinal()) if not isinstance(x,float) else x)
  return df
floor_dict = { f'0{i*3+1} to 0{i*3+3}' if i < 3 else f'{i*3+1} to {i*3+3}': i*3+2  for i in range(11)  }
def floor_cleanup(df):
  df['floor'] = df.floor.apply(lambda x:float(floor_dict[x]) if isinstance(x,str) else x)
  return df

def load_and_cleanup():
  df = load_csv()
  price_cleanup(df)
  area_cleanup(df)
  date_cleanup(df)
  floor_cleanup(df)
  df.drop(["block", "street", "lease"], axis=1, inplace = True)
  return df

def get_xy():
  df = load_and_cleanup()
  y = torch.tensor(df.price)
  df['date'] = df.date - 693595
  means = df.mean()
  stds = df.std()
  df = (df-means)/stds
  xs = torch.tensor(df.drop("price", axis=1).values)
  return (xs, y, means, stds)

def f(x, params, bias):
  return torch.sum(x*params, 1)+bias
def f1(x,params,bias):
  return torch.sum(x*params)+bias

def mse(preds, targets):
  return ((preds-targets)**2).mean()

def loss(x, params, bias, targets):
  return mse(f(x, params,bias), targets)

def grad(xrow,params,bias,target):
  return -2*xrow*(target-f1(xrow,params,bias))

def grad_bias(xrow,params,bias,target):
  return-2*(target-f1(xrow,params,bias))


In [3]:
## initialize params and bias
(xs, y, means, stds) = get_xy()

# take mean as starting point since other params are normalized
bias = y.mean()
# bias_step should be different because it is not normalized
# random initial params
params = torch.rand(4)


In [26]:
## initialize params and bias
(xs, y, means, stds) = get_xy()

# take mean as starting point since other params are normalized
bias = y.mean()
# bias_step should be different because it is not normalized
# random initial params
params = torch.rand(4)

bias_step =   1e-3
params_step = 1e-3

## random stepper: takes a random step, and move if the loss is lower
curr_loss = loss(xs,params,bias,y)
print(f"Iteration -1: loss ${curr_loss.sqrt()}")    
n = 6
for i in range(n):
  for i in range(len(xs)):
    params = params - grad(xs[i], params, bias, y[i])*params_step
    bias = bias - grad_bias(xs[i], params, bias, y[i])*bias_step  
  print(loss(xs,params,bias,y).sqrt())

curr_loss = loss(xs,params,bias,y)
print(f"Iteration {n}: loss ${curr_loss.sqrt()}")
params
bias


Iteration -1: loss $53364.45496608305
tensor(31506.2002, dtype=torch.float64)
tensor(22515.1017, dtype=torch.float64)
tensor(19244.7106, dtype=torch.float64)
tensor(18135.4988, dtype=torch.float64)
tensor(17757.4424, dtype=torch.float64)
tensor(17622.8531, dtype=torch.float64)
Iteration 6: loss $17622.853125920512


tensor(372774.9663, dtype=torch.float64)