In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
!cp /content/drive/MyDrive/2-folder/kaggle/df_utils.py /content/
!cp /content/drive/MyDrive/2-folder/kaggle/nn_utils.py /content/
import df_utils
import nn_utils

In [None]:
df_train_loaded = pd.read_csv('/content/drive/MyDrive/2-folder/kaggle/housing-prices-competition/train.csv')
df_test_loaded = pd.read_csv('/content/drive/MyDrive/2-folder/kaggle/housing-prices-competition/test.csv')
print(f"df_train_loaded = {df_train_loaded.shape}, df_test_loaded = {df_test_loaded.shape}")

# num_col = len(df_train.columns)
# print(f"num_col = {num_col}")
# print(df_train["SalePrice"][:5])

df_train = df_train_loaded.drop(['Id'], axis=1)
df_test = df_test_loaded.drop(['Id'], axis=1)

# Drop all columns with with more than 20% of missing values
df_utils.drop_col_miss_val(df_train, df_test, 20)

# Fix categorical values and missing values
df_train, df_test = df_utils.prepare_df(df_train, df_test, 5, ['SalePrice'])
# print(df_train)

# Calculate the correlation of features with the target
correlation = df_train.corr()
sorted_corr = correlation['SalePrice'].sort_values(ascending=False)
# print(sorted_corr)
columns = []
for i, v in sorted_corr.items():
  if v > 0.2 and i != 'SalePrice':
    # print('index: ', i, 'value: ', v)
    columns.append(i)
# print(columns)

# Remove target label from training set
y_train = df_train['SalePrice'].values
# print(y[0:5])
df_train = df_train.drop(['SalePrice'], axis=1)

# Extract columns with high correlaton
df_train = df_train[columns]
df_test = df_test[columns]
# print(len(df_train.columns))
# print(len(df_test.columns))
# print(df_train.head(5))

# normalize dataframe
mean = df_train.mean()
std = df_train.std()
df_train=(df_train-mean)/std
df_test=(df_test-mean)/std

X = df_train.values
y = y_train

# create train, validation and test splits
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)
print(f"X_train = {X_train.shape}, y_train = {y_train.shape}")
print(f"X_val = {X_val.shape}, y_val = {y_val.shape}")
print(f"X_test = {X_test.shape}, y_test = {y_test.shape}")

df_train_loaded = (1460, 81), df_test_loaded = (1459, 80)
X_train = (876, 36), y_train = (876,)
X_val = (292, 36), y_val = (292,)
X_test = (292, 36), y_test = (292,)


In [None]:
# Convert data to PyTorch tensors
X_train_t = torch.tensor(X_train, dtype=torch.float32)
y_train_t = torch.tensor(y_train, dtype=torch.float32)

X_val_t = torch.tensor(X_val, dtype=torch.float32)
y_val_t = torch.tensor(y_val, dtype=torch.float32)

X_test_t = torch.tensor(X_test, dtype=torch.float32)
y_test_t = torch.tensor(y_test, dtype=torch.float32)

# Define the model
class RegressionModel(nn.Module):
    def __init__(self, input_dim):
        super(RegressionModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, 32*4)
        self.fc3 = nn.Linear(32*4, 32*4)
        self.fc4 = nn.Linear(32*4, 1)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.fc4(x)
        return x

input_dim = X_train_t.shape[1]
model_0 = RegressionModel(input_dim)
criterion = nn.MSELoss()
optimizer = optim.Adam(model_0.parameters(), lr=0.001)

nn_utils.training_loop(10000, 30, model_0, criterion, optimizer, X_train_t, y_train_t, X_val_t, y_val_t)

model_0.eval()
with torch.no_grad():
  model_0_preds = model_0(X_val_t).squeeze()
print(f"model_0_preds {type(model_0_preds)} {model_0_preds.shape}")

Epoch [200/10000], Train Loss: 9564741632.0000, Test Loss: 1788263040.0000
Epoch [400/10000], Train Loss: 434878880.0000, Test Loss: 1573952896.0000
Epoch [600/10000], Train Loss: 559769728.0000, Test Loss: 1365901056.0000
Epoch [800/10000], Train Loss: 164789184.0000, Test Loss: 1079301888.0000
Epoch [1000/10000], Train Loss: 234852624.0000, Test Loss: 1256705280.0000
Epoch [1200/10000], Train Loss: 176006592.0000, Test Loss: 1233879040.0000
Epoch [1400/10000], Train Loss: 94751456.0000, Test Loss: 1381570048.0000
Epoch [1600/10000], Train Loss: 86518432.0000, Test Loss: 1308227072.0000
Epoch [1800/10000], Train Loss: 241945712.0000, Test Loss: 1308553216.0000
Epoch [2000/10000], Train Loss: 184578048.0000, Test Loss: 1497323264.0000
Epoch [2200/10000], Train Loss: 211054208.0000, Test Loss: 1583749760.0000
Epoch [2400/10000], Train Loss: 361096736.0000, Test Loss: 1505456896.0000
Epoch [2600/10000], Train Loss: 385944320.0000, Test Loss: 1518869248.0000


KeyboardInterrupt: 

In [None]:
class HousePricePredictor(nn.Module):
    def __init__(self, input_size):
        super(HousePricePredictor, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 1)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)  # No activation for regression
        return x

input_size = X_train_t.shape[1]  # Number of features
model_1 = HousePricePredictor(input_size)

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model_1.parameters(), lr=0.01)

nn_utils.training_loop(800, 30, model_1, criterion, optimizer, X_train_t, y_train_t, X_val_t, y_val_t)

model_1.eval()
with torch.no_grad():
  model_1_preds = model_1(X_val_t).squeeze()
print(f"model_0_preds {type(model_1_preds)} {model_1_preds.shape}")

Epoch [200/800], Train Loss: 188671296.0000, Test Loss: 978553600.0000
Epoch [400/800], Train Loss: 74088672.0000, Test Loss: 1028306496.0000
Epoch [600/800], Train Loss: 115633952.0000, Test Loss: 1040427776.0000
Epoch [800/800], Train Loss: 181309328.0000, Test Loss: 1219121152.0000
model_0_preds <class 'torch.Tensor'> torch.Size([292])


In [None]:
# t1 = torch.tensor([1, 2, 3])
# print(t1)

# t2 = torch.tensor([4, 5, 6])
# print(t2)

# t3 = torch.stack((t1, t2), -1)
# print(t3)
# print(t3.shape)

X_train_meta = torch.stack((model_0_preds, model_1_preds), -1)
print(X_train_meta.shape)

torch.Size([292, 2])


In [None]:
class RegressionNN(nn.Module):
    def __init__(self, input_dim):
        super(RegressionNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, 32*4)
        self.fc3 = nn.Linear(32*4, 32*4)
        self.fc4 = nn.Linear(32*4, 1)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.fc4(x)
        return x

input_size = X_train_meta.shape[1]  # Number of features
meta_model = RegressionNN(input_size)

# Define loss function and optimizer
criterion = nn.MSELoss()  # Mean Squared Error Loss
optimizer = optim.Adam(meta_model.parameters(), lr=0.0001)

nn_utils.training_loop(8000, 30, meta_model, criterion, optimizer, X_train_meta, y_val_t)

# model_1.eval()
# with torch.no_grad():
#   model_1_preds = model_1(X_val_t).squeeze()
# print(f"model_0_preds {type(model_1_preds)} {model_1_preds.shape}")

Epoch [200/8000], Train Loss: 257312736.0000
Epoch [400/8000], Train Loss: 3136257.2500
Epoch [600/8000], Train Loss: 169960736.0000
Epoch [800/8000], Train Loss: 79578680.0000
Epoch [1000/8000], Train Loss: 263503376.0000
Epoch [1200/8000], Train Loss: 122281704.0000
Epoch [1400/8000], Train Loss: 1559606144.0000
Epoch [1600/8000], Train Loss: 181266592.0000
Epoch [1800/8000], Train Loss: 1978906624.0000
Epoch [2000/8000], Train Loss: 2213876992.0000
Epoch [2200/8000], Train Loss: 154152624.0000
Epoch [2400/8000], Train Loss: 239357360.0000
Epoch [2600/8000], Train Loss: 448164928.0000


KeyboardInterrupt: 