In [43]:
import pandas as pd
import torch
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [44]:
path = '/content/drive/My Drive/Colab Notebooks/Housing.csv'
sample = pd.DataFrame(pd.read_csv(path))
print(sample.shape)
sample.head(3)

(545, 13)


Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished


In [45]:
# Converting the strings to values

list1 =  ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']
list2 = ['furnishingstatus']

# Defining the map function
def map1(x):
    return x.map({'yes': 1, 'no': 0})

def map2(x):
    return x.map({'furnished': 1, 'semi-furnished': 0})

sample[list1] = sample[list1].apply(map1)
sample[list2] = sample[list2].apply(map2)

sample.head(3)

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,1,0,0,0,1,2,1,1.0
1,12250000,8960,4,4,4,1,0,0,0,1,3,0,1.0
2,12250000,9960,3,2,2,1,0,1,0,0,2,1,0.0


In [46]:
sample1 = sample[['price', 'area', 'bedrooms', 'bathrooms', 'stories', 'parking']]
sample1.head(3)

Unnamed: 0,price,area,bedrooms,bathrooms,stories,parking
0,13300000,7420,4,2,3,2
1,12250000,8960,4,4,4,3
2,12250000,9960,3,2,2,2


In [47]:
X = sample.iloc[:, 1:5].values
Y = sample.iloc[:, 0].values
X[:1]

array([[7420,    4,    2,    3]])

In [48]:
from sklearn.preprocessing import StandardScaler
scale = StandardScaler()
X = scale.fit_transform(X)
X[:1]

array([[1.04672629, 1.40341936, 1.42181174, 1.37821692]])

In [49]:
from sklearn.model_selection import train_test_split
import numpy as np

# Splitting the training and validation sets.
x_train, x_test, y_train, y_test = train_test_split(X, Y, train_size = 0.8, test_size = 0.2, random_state = 0)

In [50]:
ten_train_x = torch.tensor(x_train)
ten_train_y = torch.tensor(y_train)
ten_test_x = torch.tensor(x_test)
ten_test_y = torch.tensor(y_test)

In [51]:
def model(ten_train_x, w0, w1, w2, w3, b):
  # Use matrix multiplication for all data points in ten_train_x
  return ten_train_x @ torch.tensor([w0, w1, w2, w3], dtype=torch.double) + b # @ represents matrix multiplication

def lossf(pred, ten_train_y):
  squared_diff = (pred-ten_train_y)**2
  return squared_diff.mean()

In [52]:
def training_loop(n_epochs, optimizer, prm, x_train, x_test, y_train, y_test):
  for epoch in range(1, n_epochs + 1):

    train_pred = model(x_train, *prm)
    train_loss = lossf(train_pred, y_train)

    with torch.no_grad():
      test_pred = model(x_test, *prm)
      test_loss = lossf(test_pred, y_test)
      assert test_loss.requires_grad == False

    optimizer.zero_grad()
    train_loss.backward()
    optimizer.step()

    if epoch % 500 == 0:
      print(f"Epoch {epoch}, Training Loss {train_loss.item():.4f}", f'Validation loss {test_loss.item():.4f}')
  return prm

In [59]:
prm = torch.tensor([1.0, 1.0, 1.0, 1.0, 0.0], requires_grad=True)
learning_rate = 1e-4
optimizer = torch.optim.SGD([prm], lr=learning_rate)


training_loop(
              n_epochs=5000,
              optimizer = optimizer,
              prm = prm,
              x_train = ten_train_x,
              x_test = ten_test_x,
              y_train = ten_train_y,
              y_test = ten_test_y
              )

Epoch 500, Training Loss 22339462622684.6289 Validation loss 21108220888092.0078
Epoch 1000, Training Loss 18950274218008.6445 Validation loss 17763910850411.4414
Epoch 1500, Training Loss 16175502079540.5098 Validation loss 15029745854728.2422
Epoch 2000, Training Loss 13903754448188.1660 Validation loss 12794740742793.6055
Epoch 2500, Training Loss 12043846656208.3281 Validation loss 10968078533937.3984
Epoch 3000, Training Loss 10521108011406.4453 Validation loss 9475421535321.2676
Epoch 3500, Training Loss 9274421073600.8203 Validation loss 8255953297175.1797
Epoch 4000, Training Loss 8253739808106.8379 Validation loss 7259900297080.8789
Epoch 4500, Training Loss 7418089122469.3486 Validation loss 6446534080498.3223
Epoch 5000, Training Loss 6733935513382.7197 Validation loss 5782543925285.3154


tensor([1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 3.0202e+06],
       requires_grad=True)