In [2]:
# Import libraries
import numpy as np
import pandas as pd
import torch

# clean up data: area, date, floor, price
def load_csv():
  return pd.read_csv('./data/2305_3rm_woodlands.csv')
def price_cleanup(df):
  df['price'] = df.price.apply(lambda x:float(x.replace('$','').replace(',','')) if isinstance(x,str) else x)
  return df
def area_cleanup(df):
  df['area'] = df.area.str.split('\n',expand=True)[0].astype(float) if df.area.dtype != 'float64' else df.area
  return df  
def date_cleanup(df):
  df['date'] = pd.to_datetime(df.date, format="%m/%d/%y")
  df['date'] = df.date.apply(lambda x:float(x.toordinal()) if not isinstance(x,float) else x)
  return df
floor_dict = { f'0{i*3+1} to 0{i*3+3}' if i < 3 else f'{i*3+1} to {i*3+3}': i*3+2  for i in range(11)  }
def floor_cleanup(df):
  df['floor'] = df.floor.apply(lambda x:float(floor_dict[x]) if isinstance(x,str) else x)
  return df

def load_and_cleanup():
  df = load_csv()
  price_cleanup(df)
  area_cleanup(df)
  date_cleanup(df)
  floor_cleanup(df)
  df.drop(["block", "street", "lease"], axis=1, inplace = True)
  return df

def get_xy():
  df = load_and_cleanup()
  y = torch.tensor(df.price)[:,None]
  df['date'] = df.date - 693595
  means = df.mean()
  stds = df.std()
  df = (df-means)/stds
  df['bias'] = 1
  xs = torch.tensor(df.drop("price", axis=1).values).float()
  return (xs, y, means, stds, df)

(xs, y, means, stds, df) = get_xy()


def f(params):
  return (xs @ params) + y.mean()
def mse(params):
  return (torch.square(f(params)-y)).mean()


In [3]:

# initialize params
params = torch.rand(5,1)
params.requires_grad_()

lr = 1e-1

n = 100
for i in range(n):
  loss = mse(params)
  loss.backward()
  print(f'loss{loss.sqrt()} loss rate {lr}')
  if (loss.sqrt() < 19000):
    lr = 1e-4
  elif (loss.sqrt() < 20000):
    lr = 1e-3
  elif (loss.sqrt() < 30000):
    lr = 1e-2
  with torch.no_grad():
    #print(params)
    params.sub_(params.grad * lr)
    #print(params)
    new_loss = mse(params)
  if (new_loss > loss):
    print(f'local min reached at {i}, {loss.sqrt()}')
    params = params + params.grad * lr
    break
  #else:

print(f'loss {loss.sqrt()}')
params* y.std()
params
#y.mean()
#f(params)

loss53364.62395804597 loss rate 0.1
loss43321.32189362336 loss rate 0.1
loss27389.331112736567 loss rate 0.1
loss25776.145884948168 loss rate 0.01
loss24045.88069043632 loss rate 0.01
loss22328.92215681267 loss rate 0.01
loss20781.04895108481 loss rate 0.01
loss19572.880896797327 loss rate 0.01
loss19474.682245904605 loss rate 0.001
loss19376.561349671447 loss rate 0.001
loss19279.15177017224 loss rate 0.001
loss19183.07229833393 loss rate 0.001
loss19088.933331998247 loss rate 0.001
loss18997.320467620008 loss rate 0.001
loss18988.117969904204 loss rate 0.0001
loss18978.544156445558 loss rate 0.0001
loss18968.599918415348 loss rate 0.0001
loss18958.29351878958 loss rate 0.0001
loss18947.628244695177 loss rate 0.0001
loss18936.61189129994 loss rate 0.0001
loss18925.247234493894 loss rate 0.0001
loss18913.542841785908 loss rate 0.0001
loss18901.50483452154 loss rate 0.0001
loss18889.137212655467 loss rate 0.0001
loss18876.44813587793 loss rate 0.0001
loss18863.444274138743 loss rate 0.0

tensor([[ 1.0254e+04],
        [ 1.0053e+04],
        [ 4.6791e+04],
        [ 1.0278e+04],
        [-1.0977e-02]], requires_grad=True)