In [4]:
import torch
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
np.random.seed(1244)

# Prepocessing

In [2]:
data = pd.read_csv("train.csv")
label = data["SalePrice"].values
data = data.drop(["Id", "SalePrice"], axis=1)
data = pd.get_dummies(data, dummy_na=True, drop_first=True)
data = data.fillna(data.median())
data = MinMaxScaler().fit_transform(data)
print(data.shape)

(1460, 288)


  return self.partial_fit(X, y)


# Splitting into train and test. Dividing Training into Batches

In [3]:
x_train, x_test, y_train, y_test = train_test_split(data,label,test_size=0.25)
data = []
x_batch = np.split(x_train, 15)
y_batch = np.split(y_train, 15)
for i in range(len(x_batch)):
    x_batch[i] = torch.Tensor(x_batch[i])
    y_batch[i] = torch.Tensor(y_batch[i]).view(-1, 1)
    
x_test = torch.Tensor(x_test)
y_test = torch.Tensor(y_test).view(-1, 1)

print(x_batch[0].shape)
print(y_batch[0].shape)

torch.Size([73, 288])
torch.Size([73, 1])


# MLP Training Function

In [0]:
def learn(model, loss_fn, optimizer, X, Y, epoch):
    for i in range(epoch):
        training_loss = 0.0
        model.train()
        for j in range(len(X)):
            x = X[j]
            y = torch.log(Y[j])
            y_pred = torch.log(model(x))
            loss = loss_fn(y_pred, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            training_loss += loss.item()
        if i%50 == 49:
            print(i+1, "th epoch Loss:",training_loss/len(X))    
        
            
    return model

# MLP Testing Function

In [0]:
def test(model, loss_fn, X, Y):
    y = torch.log(Y)  
    with torch.no_grad():
        model.eval()
        y_pred = torch.log(model(X))
        
    print("RMSE Error =",torch.sqrt(loss_fn(y_pred, y)).item())
        

# Model 1: Adam Optimiser with learning rate=1e-3

In [6]:
torch.manual_seed(3520)
model = torch.nn.Sequential(
    torch.nn.Linear(288, 144),
    torch.nn.ReLU(),
    torch.nn.Linear(144, 72),
    torch.nn.ReLU(),
    torch.nn.Linear(72, 36),
    torch.nn.ReLU(),
    torch.nn.Linear(36, 10),
    torch.nn.ReLU(),
    torch.nn.Linear(10, 1)
)
loss_fn = torch.nn.MSELoss(reduction='mean')
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
model = learn(model, loss_fn, optimizer, x_batch, y_batch, 500)

50 th epoch Loss: 1.9384838819503785
100 th epoch Loss: 0.13554490407307943
150 th epoch Loss: 0.11949323614438374
200 th epoch Loss: 0.0996669165790081
250 th epoch Loss: 0.07505549142758051
300 th epoch Loss: 0.05257995935777823
350 th epoch Loss: 0.040292231862743694
400 th epoch Loss: 0.033244146530826885
450 th epoch Loss: 0.027676005909840266
500 th epoch Loss: 0.023435751721262933


# Model 1: Results

In [7]:
test(model, loss_fn, x_test, y_test)

RMSE Error = 0.17495395243167877


# Model 2: Adam Optimizer with learning rate=1e-3 and weight_decay=1e-4

In [8]:
torch.manual_seed(3520)
model = torch.nn.Sequential(
    torch.nn.Linear(288, 144),
    torch.nn.ReLU(),
    torch.nn.Linear(144, 72),
    torch.nn.ReLU(),
    torch.nn.Linear(72, 36),
    torch.nn.ReLU(),
    torch.nn.Linear(36, 10),
    torch.nn.ReLU(),
    torch.nn.Linear(10, 1)
)
loss_fn = torch.nn.MSELoss(reduction='mean')
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)
model = learn(model, loss_fn, optimizer, x_batch, y_batch, 500)

50 th epoch Loss: 0.7605692585309346
100 th epoch Loss: 0.12800170481204987
150 th epoch Loss: 0.10976973474025727
200 th epoch Loss: 0.08520388851563136
250 th epoch Loss: 0.04556691224376361
300 th epoch Loss: 0.037467964986960096
350 th epoch Loss: 0.03240322880446911
400 th epoch Loss: 0.028466566652059554
450 th epoch Loss: 0.02525445744395256
500 th epoch Loss: 0.022612623622020087


# Model 2: Results

In [9]:
test(model, loss_fn, x_test, y_test)

RMSE Error = 0.1722087860107422


# Model 3: RMSProp Optimizer

In [10]:
torch.manual_seed(3520)
model = torch.nn.Sequential(
    torch.nn.Linear(288, 144),
    torch.nn.ReLU(),
    torch.nn.Linear(144, 72),
    torch.nn.ReLU(),
    torch.nn.Linear(72, 36),
    torch.nn.ReLU(),
    torch.nn.Linear(36, 10),
    torch.nn.ReLU(),
    torch.nn.Linear(10, 1)
)
loss_fn = torch.nn.MSELoss(reduction='mean')
optimizer = torch.optim.RMSprop(model.parameters(), weight_decay=1e-5)
model = learn(model, loss_fn, optimizer, x_batch, y_batch, 500)

50 th epoch Loss: 0.02331392802298069
100 th epoch Loss: 0.018487158790230752
150 th epoch Loss: 0.015262494857112566
200 th epoch Loss: 0.013620435229192178
250 th epoch Loss: 0.012325422745198012
300 th epoch Loss: 0.01304126080746452
350 th epoch Loss: 0.012300129979848862
400 th epoch Loss: 0.009383811770627895
450 th epoch Loss: 0.011749154732873043
500 th epoch Loss: 0.01110613097747167


# Model 3: Results

In [11]:
test(model, loss_fn, x_test, y_test)

RMSE Error = 0.1470322608947754


# Model 4: Adadelta Optimizer

In [12]:
torch.manual_seed(3520)
model = torch.nn.Sequential(
    torch.nn.Linear(288, 144),
    torch.nn.ReLU(),
    torch.nn.Linear(144, 72),
    torch.nn.ReLU(),
    torch.nn.Linear(72, 36),
    torch.nn.ReLU(),
    torch.nn.Linear(36, 10),
    torch.nn.ReLU(),
    torch.nn.Linear(10, 1)
)
loss_fn = torch.nn.MSELoss(reduction='mean')
optimizer = torch.optim.Adadelta(model.parameters(), weight_decay=1e-5)
model = learn(model, loss_fn, optimizer, x_batch, y_batch, 500)

50 th epoch Loss: 0.11055731723705928
100 th epoch Loss: 0.08080211902658145
150 th epoch Loss: 0.05945805658896764
200 th epoch Loss: 0.0486931266884009
250 th epoch Loss: 0.044367670888702075
300 th epoch Loss: 0.04118291462461154
350 th epoch Loss: 0.03834023351470629
400 th epoch Loss: 0.03580118442575137
450 th epoch Loss: 0.03355800087253253
500 th epoch Loss: 0.031590966135263444


# Model 4: Results

In [13]:
test(model, loss_fn, x_test, y_test)

RMSE Error = 0.19461393356323242


# Counclusion

Following architecture is used:
model = 

torch.nn.Sequential(
> 
    torch.nn.Linear(288, 144),
    torch.nn.ReLU(),
    torch.nn.Linear(144, 72),
    torch.nn.ReLU(),
    torch.nn.Linear(72, 36),
    torch.nn.ReLU(),
    torch.nn.Linear(36, 10),
    torch.nn.ReLU(),
    torch.nn.Linear(10, 1)
)

Best Accuracy is obtained using RMSprop optimizer with weight decay of 1e-5 and learning rate of 1e-2. It outperformed Adam Optimizer with and without weight decay.  The RMSprop optimizer is similar to the gradient descent algorithm with momentum. The RMSprop optimizer restricts the oscillations in the vertical direction. The choice of optimizer depends on the given data. The results can vary based on seed given to random number generator.


Model 1 Error: 0.17495395243167877

> 


Model 2 Error: 0.1722087860107422

> 


Model 3 Error: 0.1470322608947754

> 


Model 4 Error: 0.19461393356323242

