In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
data = pd.read_csv('../input/housesalesprediction/kc_house_data.csv')

In [None]:
data.head()

In [None]:
data.info()

# Preprocessing

In [None]:
def onehot_encode(df, column, prefix):
    df = df.copy()
    dummies = pd.get_dummies(df[column], prefix=prefix)
    df = pd.concat([df, dummies], axis=1)
    df = df.drop(column, axis=1)
    return df

In [None]:
data = data.drop('id', axis=1)

In [None]:
data['year'] = data['date'].apply(lambda x: x[0:4])
data['month'] = data['date'].apply(lambda x: x[4:6])
data = data.drop('date', axis=1)

In [None]:
len(data['zipcode'].unique())

In [None]:
data = onehot_encode(data, 'zipcode', 'zip')

In [None]:
data.query('yr_renovated != 0')

In [None]:
len(data.query('yr_renovated != 0'))/len(data)

Remove the column yr_renovated as the majority of entries don't have this value set

In [None]:
data = data.drop('yr_renovated', axis=1)

In [None]:
data

# Splitting and Scaling

In [None]:
y = data['price'].copy()
X = data.drop('price', axis=1).copy()

In [None]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [None]:
tf_X_train, tf_X_test, tf_y_train, tf_y_test = train_test_split(X, y, train_size=0.7, random_state=1)

In [None]:
tf_X_train.shape

# Training with PyTorch

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__ ()
        self.layer1 = nn.Linear(88, 64)
        self.layer2 = nn.Linear(64, 64)
        self.out = nn.Linear(64, 1)
        
    
    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        x = self.out(x)
        return x
        

net = Net()

In [None]:
net.parameters

In [None]:
list(net.parameters())

In [None]:
for i in range(len(list(net.parameters()))):
    print(list(net.parameters())[i].shape)

In [None]:
torch_X_train = torch.tensor(tf_X_train).type(torch.float32)
torch_y_train = torch.tensor(np.array(tf_y_train)).type(torch.float32)
torch_X_test = torch.tensor(tf_X_test).type(torch.float32)
torch_y_test = torch.tensor(np.array(tf_y_test)).type(torch.float32)

In [None]:
optimizer = torch.optim.Adam(net.parameters(), lr=0.01)
criterion = nn.MSELoss()

In [None]:
for x, target in zip(torch_X_train, torch_y_train):
    optimizer.zero_grad()
    output = net(x)
    loss = criterion(output, target)
    loss.backward()
    optimizer.step()

In [None]:
total_loss = 0

for x, target in zip(torch_X_test, torch_y_test):
    output = net(x)
    loss = criterion(output, target)
    total_loss += loss
avg_loss = total_loss / len(torch_X_test)
    

In [None]:
torch_rmse = torch.sqrt(avg_loss).detach().numpy()

# Results

In [None]:
print(f'PyTorch RMSE: {torch_rmse}')