In [None]:
import pandas as pd
import numpy as np

path = '/content/land_price_1.csv'

data = pd.read_csv(path).to_numpy()
print(data.shape)

x = data[:, :-1] #np(m, 2)
y = data[:, -1]
print('x', x.shape)
print('y', y.shape)

(30, 3)
x (30, 2)
y (30,)


In [None]:
#h(x) = th0*x0+th1*x1+th2*x2
def h(x, theta):
  """
  x: np(m, 3)
  theta: np(3)
  return: z: np(m)
  """
  z = np.dot(x, theta.reshape(-1, 1))
  return z.flatten()

def cost_func(z, y):
  m = y.shape[0]
  J = np.sum((z-y)**2)/m
  return J

def grad(x, y, theta):
  m = y.shape[0]
  z = h(x, theta)
  g = np.dot(x.T, (z-y).reshape((-1, 1)))*2/m #np(3, 1)
  return g.flatten() #np(3)

In [None]:
#feature scaling
def feature_scaling(x):
  mu = np.mean(x, axis=0) #np(n)
  x = (x-mu)/(np.max(x, axis=0)-np.min(x, axis=0))
  return x

x_scaled = feature_scaling(x)
x_scaled = np.concatenate((np.ones((x_scaled.shape[0], 1)), x_scaled), axis=1)
print(x_scaled.shape)

(30, 3)


In [None]:
#train model

theta = np.zeros(3)
alpha = 0.1

for i in range(1000):
  z = h(x_scaled, theta)
  J = cost_func(z, y)
  if i%10==0:
    print('i: %d, J: %f' % (i, J))

  g = grad(x_scaled, y, theta)
  theta += -alpha*g

i: 0, J: 35052.920667
i: 10, J: 5749.137007
i: 20, J: 3747.741093
i: 30, J: 2584.124131
i: 40, J: 1787.139068
i: 50, J: 1238.518382
i: 60, J: 860.067867
i: 70, J: 598.468865
i: 80, J: 417.274118
i: 90, J: 291.518619
i: 100, J: 204.067320
i: 110, J: 143.135193
i: 120, J: 100.600108
i: 130, J: 70.852867
i: 140, J: 50.011808
i: 150, J: 35.385323
i: 160, J: 25.103288
i: 170, J: 17.863801
i: 180, J: 12.758802
i: 190, J: 9.153746
i: 200, J: 6.604419
i: 210, J: 4.799302
i: 220, J: 3.519567
i: 230, J: 2.611246
i: 240, J: 1.965839
i: 250, J: 1.506775
i: 260, J: 1.179938
i: 270, J: 0.947033
i: 280, J: 0.780925
i: 290, J: 0.662363
i: 300, J: 0.577677
i: 310, J: 0.517147
i: 320, J: 0.473855
i: 330, J: 0.442874
i: 340, J: 0.420692
i: 350, J: 0.404801
i: 360, J: 0.393412
i: 370, J: 0.385246
i: 380, J: 0.379389
i: 390, J: 0.375186
i: 400, J: 0.372170
i: 410, J: 0.370004
i: 420, J: 0.368448
i: 430, J: 0.367331
i: 440, J: 0.366528
i: 450, J: 0.365951
i: 460, J: 0.365536
i: 470, J: 0.365237
i: 480, J: 0

In [None]:
#evaluate model
z = h(x_scaled, theta)

for i, (zi, yi) in enumerate(zip(z, y)):
  print('i: %02d, predict: %f, real: %f' % (i+1, zi, yi))

i: 01, predict: 16.194543, real: 15.700000
i: 02, predict: 10.244300, real: 11.300000
i: 03, predict: 41.954123, real: 42.000000
i: 04, predict: 35.082056, real: 35.000000
i: 05, predict: 38.084521, real: 37.700000
i: 06, predict: 75.441886, real: 75.500000
i: 07, predict: 77.723605, real: 77.100000
i: 08, predict: 89.345749, real: 88.700000
i: 09, predict: 121.073624, real: 122.000000
i: 10, predict: 118.729084, real: 119.100000
i: 11, predict: 126.542280, real: 125.600000
i: 12, predict: 145.399657, real: 145.500000
i: 13, predict: 149.603541, real: 150.000000
i: 14, predict: 145.948016, real: 145.000000
i: 15, predict: 171.646410, real: 172.000000
i: 16, predict: 169.267210, real: 170.000000
i: 17, predict: 176.384934, real: 177.000000
i: 18, predict: 170.028923, real: 169.400000
i: 19, predict: 210.512037, real: 211.000000
i: 20, predict: 219.907871, real: 220.000000
i: 21, predict: 231.125691, real: 231.400000
i: 22, predict: 216.739518, real: 216.900000
i: 23, predict: 228.558409