#### let's try to predict housing prices via linear regression
#### dataset obtained from http://lib.stat.cmu.edu/datasets/boston
#### I downloaded and created a csv file


In [1]:
#import all the libs
import torch
import torch.nn as nn
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset, random_split

In [3]:
batch_size=64
learning_rate=1e-7

input_size=13
output_size=1

In [4]:
target = 'medv'

In [5]:
df = pd.read_csv('BostonHousing.csv')

In [6]:
df.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [8]:
inputs = df.drop('medv',axis=1).values
target = df[['medv']].values
inputs.shape, target.shape

((506, 13), (506, 1))

In [9]:
#convert to tensor dataset
dataset = TensorDataset(torch.tensor(inputs,dtype=torch.float32),torch.tensor(target,dtype=torch.float32))

In [10]:
len(dataset)

506

In [11]:
train_ds, valid_ds = random_split(dataset,[406,100])

In [12]:
train_loader = DataLoader(train_ds, batch_size, shuffle=True)
valid_loader = DataLoader(valid_ds, batch_size)

In [52]:
#model class
class HousingModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(input_size,output_size)
        
    def forward(self,X_batch):
        out = self.linear(X_batch)
        return out
    
    def train_step(self,X_batch):
        inputs, targets = X_batch
        out = self(inputs)
        loss = F.mse_loss(out, targets)
        return loss
    
    def valid_step(self,X_batch):
        inputs, targets = X_batch
        out = self(inputs)
        loss = F.mse_loss(out, targets)
        return loss


In [53]:
model = HousingModel()

In [54]:
def evaluate(model,loader):
    outputs = [model.valid_step(X_batch) for X_batch in loader]
    epoch_loss = torch.stack(outputs).mean()
    print(f"Validation loss is {epoch_loss}")

In [56]:
evaluate(model,valid_loader)

Validation loss is 7450.546875


In [57]:
list(model.parameters())

[Parameter containing:
 tensor([[-0.2650,  0.2548,  0.0200,  0.1057,  0.2143,  0.1145,  0.1541, -0.2611,
           0.1231,  0.0810, -0.0394,  0.1775, -0.1201]], requires_grad=True),
 Parameter containing:
 tensor([-0.1497], requires_grad=True)]

In [58]:
def train(model,train_loader,valid_loader,epochs=10,lr=learning_rate,optim_func=torch.optim.SGD):
    optim = optim_func(model.parameters(),lr)
    for epoch in range(epochs):
        for batch in train_loader:
            loss = model.train_step(batch)
            loss.backward()
            optim.step()
            optim.zero_grad()
        evaluate(model,valid_loader)

In [59]:
train(model,train_loader,valid_loader,10,5e-7)

Validation loss is 129.44293212890625
Validation loss is 88.4856948852539
Validation loss is 79.34913635253906
Validation loss is 73.0964584350586
Validation loss is 69.0916748046875
Validation loss is 70.00171661376953
Validation loss is 63.99372100830078
Validation loss is 62.413841247558594
Validation loss is 60.722843170166016
Validation loss is 59.939903259277344


In [60]:
evaluate(model,valid_loader)

Validation loss is 59.939903259277344


In [61]:
torch.save(model.state_dict(),'BostonHousingLinear.pth')