In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
RANDOM_SEED = 1

In [4]:
df = pd.read_csv("Housing.csv")

In [5]:
df.head(2)

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished


In [6]:
encoded_data = pd.get_dummies(df,columns=['mainroad','guestroom','basement','hotwaterheating','airconditioning','prefarea','furnishingstatus'])

In [7]:
df_categorical = ['mainroad_no', 'mainroad_yes', 'guestroom_no', 'guestroom_yes','basement_no', 'basement_yes', 'hotwaterheating_no','hotwaterheating_yes', 'airconditioning_no', 'airconditioning_yes','prefarea_no', 'prefarea_yes', 'furnishingstatus_furnished','furnishingstatus_semi-furnished', 'furnishingstatus_unfurnished']

In [8]:
encoded_data[df_categorical] = encoded_data[df_categorical].astype(float)

In [9]:
X = encoded_data.drop(['price'],axis=1)
y = encoded_data['price']

In [10]:
X = torch.from_numpy(X.values).type(torch.float)
y = torch.from_numpy(y.values).type(torch.float).unsqueeze(dim=1)

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=RANDOM_SEED)

In [12]:
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

torch.Size([436, 20]) torch.Size([436, 1]) torch.Size([109, 20]) torch.Size([109, 1])


In [13]:
X_train = X_train.to(device)
X_test = X_test.to(device)
y_train = y_train.to(device)
y_test = y_test.to(device)

In [14]:
class HousingPriceModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer = nn.Sequential(
            nn.Linear(in_features=20,out_features=64),
            nn.ReLU(),
            nn.Linear(in_features=64,out_features=32),
            nn.ReLU(),
            nn.Linear(in_features=32,out_features=16),
            nn.ReLU(),
            nn.Linear(in_features=16,out_features=8),
            nn.ReLU(),
            nn.Linear(in_features=8,out_features=1)
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.layer(x)


In [15]:
model = HousingPriceModel().to(device)
#model.state_dict()

In [25]:
# setup loss
loss_fn = nn.L1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [26]:
# Training 
EPOCHS = 100000
epoch_count = []
loss_values = []
test_loss_values = []

for epoch in range(EPOCHS):
    model.train()

    y_pred = model(X_train)
    loss = loss_fn(y_pred, y_train)

     # 3. Optimizer zero grad
    optimizer.zero_grad()

    # 4. Perform backprop wrt parameters
    loss.backward()

    # 5. step optimizer ( perform gradient descent)
    optimizer.step()

    model.eval()
    with torch.inference_mode():
        test_pred = model(X_test)
        test_loss = loss_fn(test_pred, y_test)

    if epoch % 1000 == 0:
        epoch_count.append(epoch)
        loss_values.append(loss)
        test_loss_values.append(test_loss)
        print('Epoch', epoch, 'Training Loss:', loss.item(), 'Test loss:', test_loss.item())


Epoch 0 Training Loss: 718474.25 Test loss: 2296870.25
Epoch 1000 Training Loss: 708187.875 Test loss: 859731.0625
Epoch 2000 Training Loss: 705676.3125 Test loss: 860933.125
Epoch 3000 Training Loss: 700260.25 Test loss: 866629.375
Epoch 4000 Training Loss: 679058.0 Test loss: 866352.5625
Epoch 5000 Training Loss: 681010.0 Test loss: 884404.125
Epoch 6000 Training Loss: 658740.9375 Test loss: 880894.5
Epoch 7000 Training Loss: 646620.0 Test loss: 897254.25
Epoch 8000 Training Loss: 634154.9375 Test loss: 916042.0625
Epoch 9000 Training Loss: 639014.5625 Test loss: 946169.5625
Epoch 10000 Training Loss: 630299.125 Test loss: 951919.875
Epoch 11000 Training Loss: 621921.5625 Test loss: 891018.875
Epoch 12000 Training Loss: 612069.3125 Test loss: 913221.875
Epoch 13000 Training Loss: 637412.9375 Test loss: 901520.375
Epoch 14000 Training Loss: 605639.3125 Test loss: 925639.5
Epoch 15000 Training Loss: 581398.75 Test loss: 959353.8125
Epoch 16000 Training Loss: 573964.8125 Test loss: 9152

KeyboardInterrupt: 

In [18]:
df.columns

Index(['price', 'area', 'bedrooms', 'bathrooms', 'stories', 'mainroad',
       'guestroom', 'basement', 'hotwaterheating', 'airconditioning',
       'parking', 'prefarea', 'furnishingstatus'],
      dtype='object')

In [19]:
xtrain, xtest, ytrain, ytest = train_test_split(df,df['price'],test_size=0.2, random_state=RANDOM_SEED)

In [20]:
xtest

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
62,7070000,6240,4,2,2,yes,no,no,no,yes,1,no,furnished
247,4550000,8400,4,1,4,yes,no,no,no,no,3,no,unfurnished
142,5600000,10500,4,2,2,yes,no,no,no,no,1,no,semi-furnished
107,6125000,6420,3,1,3,yes,no,yes,no,no,0,yes,unfurnished
483,2940000,6615,3,1,2,yes,no,no,no,no,0,no,semi-furnished
...,...,...,...,...,...,...,...,...,...,...,...,...,...
201,4900000,4095,3,1,2,no,yes,yes,no,yes,0,no,semi-furnished
92,6405000,4800,3,2,4,yes,yes,no,no,yes,0,no,furnished
66,6930000,13200,2,1,1,yes,no,yes,yes,no,1,no,furnished
284,4270000,7770,2,1,1,yes,no,no,no,no,1,no,furnished


In [21]:
ytest

62     7070000
247    4550000
142    5600000
107    6125000
483    2940000
        ...   
201    4900000
92     6405000
66     6930000
284    4270000
187    5110000
Name: price, Length: 109, dtype: int64

In [22]:
test_pred = model(X_test)

In [23]:
test_pred

tensor([[6236976.5000],
        [5953892.0000],
        [5802649.5000],
        [5734757.0000],
        [4229213.0000],
        [3151382.0000],
        [2856216.0000],
        [7359124.5000],
        [2787469.2500],
        [4770251.5000],
        [3547756.7500],
        [3866000.0000],
        [4888339.5000],
        [4765624.0000],
        [4571133.5000],
        [7837267.0000],
        [4307058.5000],
        [3402633.2500],
        [6759305.0000],
        [2785106.2500],
        [4135174.7500],
        [5078390.5000],
        [4471789.5000],
        [3632910.0000],
        [4712847.0000],
        [6837272.0000],
        [2891491.2500],
        [6886183.0000],
        [7008028.0000],
        [4100365.7500],
        [7298785.5000],
        [6462732.0000],
        [4057241.0000],
        [4487408.5000],
        [5726980.5000],
        [4346434.5000],
        [6164195.0000],
        [3254943.7500],
        [5300293.5000],
        [5847595.5000],
        [4404110.5000],
        [3613491

In [24]:
df

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished
...,...,...,...,...,...,...,...,...,...,...,...,...,...
540,1820000,3000,2,1,1,yes,no,yes,no,no,2,no,unfurnished
541,1767150,2400,3,1,1,no,no,no,no,no,0,no,semi-furnished
542,1750000,3620,2,1,1,yes,no,no,no,no,0,no,unfurnished
543,1750000,2910,3,1,1,no,no,no,no,no,0,no,furnished
