In [1]:
# Mount googl drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


1. Build a linear regression which predicts the land price using both the land_area and the distance_to_city feature. (land_price_1.csv)

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Read data from csv file
data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/fund_ML/land_price_1.csv').to_numpy()

print(data.shape)

x = data[:, :-1] # np(m, 2)
y = data[:, -1]

print('x:', x.shape)
print('y:', y.shape)

(30, 3)
x: (30, 2)
y: (30,)


### Feature Scaling

In [3]:
def feature_scaling(x):
  mu = np.mean(x, axis=0) #np(n)
  # sigma = np.std(x, axis=0)
  sigma = np.max(x, axis=0) - np.min(x, axis=0)
  x = (x - mu) / sigma
  return x

x_scaled = feature_scaling(x)
x_scaled = np.concatenate((np.ones((x_scaled.shape[0], 1)), x_scaled), axis=1)
print(x_scaled.shape)

(30, 3)


### Training

In [4]:
import torch

theta = torch.zeros(x_scaled.shape[1], dtype=torch.float32) # tensor(3)

theta.requires_grad_(True)

alpha = 0.1

tx = torch.tensor(x_scaled, dtype=torch.float32) # tensor(30, 3)
ty = torch.tensor(y, dtype=torch.float32) # tensor(30)

for i in range(1000):
  tz = torch.matmul(tx, theta.reshape(-1, 1)).flatten() # tensor(30)
  J = torch.mean((tz - ty) ** 2)
  J.backward()

  # turn it off in order to compute without effect computational graph
  # theta.requires_grad_(False)
  # theta += -alpha * theta.grad

  # Or we can use no_grad
  with torch.no_grad():
    theta -= alpha * theta.grad
  theta.grad.zero_() # reset gradient to zero

  if i % 10 == 0:
    print(f'{i}:, {J.item()}')




0:, 35052.921875
10:, 5749.13720703125
20:, 3747.7412109375
30:, 2584.1240234375
40:, 1787.1385498046875
50:, 1238.51806640625
60:, 860.0681762695312
70:, 598.46875
80:, 417.2741394042969
90:, 291.5186462402344
100:, 204.06741333007812
110:, 143.13523864746094
120:, 100.60005950927734
130:, 70.85297393798828
140:, 50.01198959350586
150:, 35.38547134399414
160:, 25.10333251953125
170:, 17.863826751708984
180:, 12.758794784545898
190:, 9.153727531433105
200:, 6.6044464111328125
210:, 4.799315452575684
220:, 3.5196099281311035
230:, 2.6112632751464844
240:, 1.9658162593841553
250:, 1.5067545175552368
260:, 1.1799217462539673
270:, 0.9470021724700928
280:, 0.7808814644813538
290:, 0.6623154878616333
300:, 0.5776501297950745
310:, 0.5171316266059875
320:, 0.4738444685935974
330:, 0.4428654909133911
340:, 0.42068564891815186
350:, 0.4047987163066864
360:, 0.3934048116207123
370:, 0.38523736596107483
380:, 0.37938833236694336
390:, 0.37517935037612915
400:, 0.37216517329216003
410:, 0.3700017

### Evaluation

In [5]:
tz = torch.matmul(tx, theta.reshape(-1, 1)).flatten() # tensor(30)
# J = torch.mean((tz - ty) ** 2)
# print(J.item())
z = tz.detach().numpy()

for i, (zi, yi) in enumerate(zip(z, y)):
  print('i:', i, 'zi:', zi, 'yi:', yi, 'diff:', abs(zi - yi))


i: 0 zi: 16.19491 yi: 15.7 diff: 0.4949100494384773
i: 1 zi: 10.244612 yi: 11.3 diff: 1.055388259887696
i: 2 zi: 41.95444 yi: 42.0 diff: 0.045558929443359375
i: 3 zi: 35.08228 yi: 35.0 diff: 0.08227920532226562
i: 4 zi: 38.084732 yi: 37.7 diff: 0.38473205566405966
i: 5 zi: 75.44217 yi: 75.5 diff: 0.057830810546875
i: 6 zi: 77.72384 yi: 77.1 diff: 0.6238388061523494
i: 7 zi: 89.34595 yi: 88.7 diff: 0.6459472656249972
i: 8 zi: 121.073875 yi: 122.0 diff: 0.9261245727539062
i: 9 zi: 118.72926 yi: 119.1 diff: 0.3707366943359318
i: 10 zi: 126.542435 yi: 125.6 diff: 0.9424346923828182
i: 11 zi: 145.3998 yi: 145.5 diff: 0.1002044677734375
i: 12 zi: 149.60367 yi: 150.0 diff: 0.396331787109375
i: 13 zi: 145.94806 yi: 145.0 diff: 0.94805908203125
i: 14 zi: 171.64647 yi: 172.0 diff: 0.3535308837890625
i: 15 zi: 169.26714 yi: 170.0 diff: 0.7328643798828125
i: 16 zi: 176.38484 yi: 177.0 diff: 0.6151580810546875
i: 17 zi: 170.02876 yi: 169.4 diff: 0.6287628173828068
i: 18 zi: 210.512 yi: 211.0 diff: 