In this project I will apply linear regression to analyse the boston housing price, and a way to predict the price based on the parameters given by the dataset in "boston.txt". To achieve this I used linear regression model with SGD optimization and mean square error loss function. All methods provided by the Pytorch engine.

In [166]:
import numpy as np
import numpy.random as rd
import torch
import torch.nn as nn
import pandas as pd
import matplotlib.pyplot as plt


In [167]:
#retrieving data
ls = [item for item in range(7, 22)]
file1 = open('boston.txt', 'r')
head = []
for i, line in enumerate(file1):
  try:
    if i in ls:
      head.append(line.strip().split()[0])
  except:
    break
data = pd.read_table("boston.txt", skiprows = 22, sep='\s+', engine="python", header=None)
data = data.to_numpy()
data = np.reshape(data,(506,22))
data = data[0:,:14].astype(np.float32)

#Shuffle the rows from data so I could take arbitrary rows for test and training.
rd.shuffle(data) 
pivo = round(data.shape[0]*0.66)
train = data[0:pivo, :]
test = data[pivo+1:data.shape[0], :]

#training data manipulation
X = train[0:,:13]
Y = train[0:,13:]
Xm = np.mean(X, axis = 0)
Ym = np.mean(Y, axis = 0)
Xs = np.std(X, axis = 0)
Ys = np.std(Y, axis = 0)
im = (X-Xm)/Xs
out = (Y-Ym)/Ys

#test data manpulation
Tx = test[0:,:13]
Ty = test[0:,13:]
testX = (Tx - np.mean(Tx, axis=0))/np.std(Tx, axis=0)
testY = (Ty - np.mean(Ty, axis=0))/np.std(Ty, axis=0)


In [168]:
#creating linear regression model
model = nn.Linear(13,1)
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr= 0.01, momentum=0.7)
inputs = torch.from_numpy(im)
targets = torch.from_numpy(out)

#test data
tstX = torch.from_numpy(testX)
tstY = torch.from_numpy(testY)

In [169]:
#training the model

n_epochs = 1000
losses = []
train_losses = []

for it in range(n_epochs):
  optimizer.zero_grad()
  outputs = model(inputs)
  loss = criterion(outputs,targets)
  losses.append(loss.item())
  loss.backward()
  optimizer.step()
  #test loss
  outputs_test = model(tstX)
  loss_test = criterion(outputs_test, tstY)
  train_losses.append(loss_test.item())
  if(it % 50 == 0):
    print(f'Epoch {it+1}/{n_epochs}, Loss: {loss.item():.4f}, Test Loss: {loss_test.item():.4f}')

Epoch 1/1000, Loss: 1.0289, Test Loss: 0.9161
Epoch 51/1000, Loss: 0.3127, Test Loss: 0.2354
Epoch 101/1000, Loss: 0.2905, Test Loss: 0.2237
Epoch 151/1000, Loss: 0.2839, Test Loss: 0.2223
Epoch 201/1000, Loss: 0.2815, Test Loss: 0.2227
Epoch 251/1000, Loss: 0.2806, Test Loss: 0.2231
Epoch 301/1000, Loss: 0.2802, Test Loss: 0.2234
Epoch 351/1000, Loss: 0.2800, Test Loss: 0.2235
Epoch 401/1000, Loss: 0.2799, Test Loss: 0.2235
Epoch 451/1000, Loss: 0.2798, Test Loss: 0.2235
Epoch 501/1000, Loss: 0.2797, Test Loss: 0.2235
Epoch 551/1000, Loss: 0.2797, Test Loss: 0.2235
Epoch 601/1000, Loss: 0.2797, Test Loss: 0.2234
Epoch 651/1000, Loss: 0.2797, Test Loss: 0.2234
Epoch 701/1000, Loss: 0.2797, Test Loss: 0.2234
Epoch 751/1000, Loss: 0.2797, Test Loss: 0.2233
Epoch 801/1000, Loss: 0.2797, Test Loss: 0.2233
Epoch 851/1000, Loss: 0.2797, Test Loss: 0.2233
Epoch 901/1000, Loss: 0.2797, Test Loss: 0.2233
Epoch 951/1000, Loss: 0.2797, Test Loss: 0.2233


In [170]:
w = model.weight.data.numpy()
v = model.bias.data.numpy()
a = w*Ys/Xs
a = a[0]
values = np.dot(X, a.T)
b = np.mean(Y-values)
b_other = Ym - np.sum((Ys*Xm*w)/Xs, axis=1) + v*Ys
head.pop()
str_head = ''.join(f'{str(e)},' for e in head)

#b calculated from bias in pytorch with SGD optim exactly the same as the mean diference
print(f'Linear Coeficient from bias calculated by pytorch: {b_other}')
print(f'Linear Coeficient calculating the mean between the differences (model x targets): {b}')
print(f'Weights or angular coeficients:\n {str_head}\n {a}')
savevals = np.append(a,b_other, axis=0).reshape(1,14)
str_head += 'b'
np.savetxt('boston_housing_price_prediction.csv', savevals, delimiter=',', comments='#Weights for all variables and bias (b)\n', header= str_head)



Linear Coeficient from bias calculated by pytorch: [45.752327]
Linear Coeficient calculating the mean between the differences (model x targets): 45.75232696533203
Weights or angular coeficients:
 CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,
 [-1.19010441e-01  4.46854904e-02 -3.26025896e-02  2.83967948e+00
 -1.72440128e+01  2.93075919e+00 -1.20709585e-02 -1.85048485e+00
  2.92700738e-01 -1.17064910e-02 -1.03114569e+00  9.29468311e-03
 -5.12081861e-01]
