In [16]:
import numpy as np
import pandas as pd
import joblib

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset

from sklearn.preprocessing import MinMaxScaler

In [3]:
X = pd.read_csv('X.csv')

with open('y.npy', 'rb') as f:
    y = np.load(f)
f.close()

In [4]:
X = X[['OverallQual', 
       'GrLivArea',
       'GarageCars',
       'GarageArea',
       'TotalBsmtSF',
       '1stFlrSF',
       'FullBath',
       'LotShape_rank']]
X

Unnamed: 0,OverallQual,GrLivArea,GarageCars,GarageArea,TotalBsmtSF,1stFlrSF,FullBath,LotShape_rank
0,7,1710,2,548,856,856,2,1
1,6,1262,2,460,1262,1262,2,1
2,7,1786,2,608,920,920,2,2
3,7,1717,3,642,756,961,1,2
4,8,2198,3,836,1145,1145,2,2
...,...,...,...,...,...,...,...,...
1455,6,1647,2,460,953,953,2,1
1456,6,2073,2,500,1542,2073,2,1
1457,7,2340,1,252,1152,1188,2,1
1458,5,1078,1,240,1078,1078,1,1


In [5]:
X.shape

(1460, 8)

In [6]:
x_min_max_scaler = MinMaxScaler()
x_min_max_scaler.fit(X)
scaled_X = x_min_max_scaler.transform(X)

y =  y.reshape(-1, 1)
y_min_max_scaler = MinMaxScaler()
y_min_max_scaler.fit(y)
scaled_y = y_min_max_scaler.transform(y)

In [7]:
scaled_X.shape, scaled_y.shape

((1460, 8), (1460, 1))

In [17]:
class TensorData(Dataset):
    def __init__(self, X, y):
        self.X = torch.FloatTensor(X)
        self.y = torch.FloatTensor(y)
        self.len = self.y.shape[0]
    def __getitem__(self, index):
        return self.X[index], self.y[index]
    
    def __len__(self):
        return self.len

In [19]:
trainset = TensorData(scaled_X, scaled_y)
trainloader = DataLoader(trainset, batch_size=2, shuffle=True)


In [20]:
# Training
# from gc import callbacks
# from pickletools import optimize

device = torch.device('mps')

linear1 = nn.Linear(scaled_X.shape[-1], 96 , bias=True)
linear2 = nn.Linear(96, 48, bias=True)
linear3 = nn.Linear(48, 1, bias=True)

relu = nn.ReLU()
nn.init.xavier_uniform_(linear1.weight)
nn.init.xavier_uniform_(linear2.weight)
nn.init.xavier_uniform_(linear3.weight)

model = nn.Sequential(
    linear1, relu,
    linear2, relu,
    linear3
)
criterion = nn.MSELoss().to(device)
optimizer = torch.optim.Adam(model.parameters())

n = len(trainloader)

for epoch in range(150):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, values = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, values)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
    

# model = keras.Sequential(
#     [
#         keras.Input(shape=scaled_X.shape[-1]),
#         layers.Dense(96, activation='relu'),
#         layers.Dense(48, activation='relu'),
#         layers.Dense(1)
#     ]
# )

# model.compile(loss='mse', optimizer='adam')

# early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=15)

# model.fit(scaled_X, scaled_y,
#           batch_size=2, epochs=150,
#           callbacks=[early_stopping_callback],
#           validation_split=0.05)

In [30]:
with torch.no_grad():
    model.eval()
    inputs, values = next(iter(trainloader))
    outputs = model(inputs)
    
    print(values[0])
    print(outputs[0])

tensor([0.2501])
tensor([0.2689])


In [31]:
# pred = model.predict(scaled_X[:5])
pred = y_min_max_scaler.inverse_transform(outputs)

In [32]:
pred

array([[228509.76462364],
       [202625.27777851]])

In [17]:
y[:5]

array([[208500],
       [181500],
       [223500],
       [140000],
       [250000]])

In [35]:
torch.save(model,'./tmp_pytorch/model.pt')

In [44]:
joblib.dump(x_min_max_scaler, './tmp/x_min_max_scaler.save')
joblib.dump(y_min_max_scaler, './tmp/y_min_max_scaler.save')

['./tmp/y_min_max_scaler.save']