In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import matplotlib.pyplot as plt

import sklearn
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer, make_column_selector
from sklearn.preprocessing import StandardScaler, OneHotEncoder


In [None]:
from google.colab import files
uploaded = files.upload()


TypeError: 'NoneType' object is not subscriptable

In [None]:
data = pd.read_csv("housePrice.csv")
data=data.drop(columns=['Price(USD)'])

data['Area']=data['Area'].str.replace(',', '').astype(float)
data['Room']=data['Room'].astype(int)



In [None]:

target_col='Price'
X = data.drop(columns=[target_col])
y = data[target_col]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

preprocessor = ColumnTransformer([
    ("num", StandardScaler(), make_column_selector(dtype_include="number")),
    ("cat", OneHotEncoder(handle_unknown='ignore'), make_column_selector(dtype_include=["object", "bool"]))
])

pipeline = Pipeline([
    ('preprocessor', preprocessor)
])

# Fit pipeline on train features only
pipeline.fit(X_train)

# Transform both train and test features with the same pipeline
X_train_processed = pipeline.transform(X_train)
X_test_processed = pipeline.transform(X_test)


from sklearn.preprocessing import StandardScaler

y_scaler = StandardScaler()
y_train_scaled = y_scaler.fit_transform(y_train.values.reshape(-1, 1))
y_test_scaled = y_scaler.transform(y_test.values.reshape(-1, 1))

In [None]:
X_train_tensor=torch.Tensor(X_train_processed.toarray())
X_test_tensor=torch.Tensor(X_test_processed.toarray())
from sklearn.preprocessing import StandardScaler

target_scaler = StandardScaler()
y_train_scaled = target_scaler.fit_transform(y_train.to_numpy().reshape(-1,1))
y_test_scaled = target_scaler.transform(y_test.to_numpy().reshape(-1,1))

y_train_tensor = torch.tensor(y_train_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_scaled, dtype=torch.float32)


train_dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = torch.utils.data.TensorDataset(X_test_tensor, y_test_tensor)

In [None]:
class LinReg(nn.Module):
  def __init__(self, input_dim, output_dim):
    super().__init__()

    self.net = nn.Sequential(
      nn.Linear(input_dim, 128), nn.Sigmoid(),
      nn.Linear(128, 128), nn.Sigmoid(),
      nn.Linear(128, 128), nn.Sigmoid(),
      nn.Linear(128, output_dim)
    )

  def forward(self,x):
    x=self.net(x)
    return x

In [None]:
device="cuda"

lr = LinReg(input_dim=X_train_tensor.shape[1], output_dim=1)
lr.to(device)

loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(lr.parameters(), lr=0.001)

n_epochs = 100
batch_size = 32

train_dataloader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
test_dataloader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True, drop_last=True)


train_loss = 0.0
test_loss = 0.0
train_r2_score = torch.zeros(batch_size,batch_size).to(device)
test_r2_score = torch.zeros(batch_size,batch_size).to(device)
for epoch in range(n_epochs):
  lr.train()
  for X_batch, y_batch in train_dataloader:
    X_batch, y_batch = X_batch.to(device), y_batch.to(device)

    y_pred = lr(X_batch)
    loss = loss_fn(y_pred, y_batch)
    r2 = 1-(y_pred- y_batch)**2/(y_pred**2-y_pred.mean())**2
    train_r2_score += r2

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    train_loss+=loss.item()

  lr.eval()
  with torch.no_grad():
    for X_batch, y_batch in test_dataloader:
      X_batch, y_batch = X_batch.to(device), y_batch.to(device)

      y_pred = lr(X_batch)
      loss = loss_fn(y_pred, y_batch)

      r2 = 1-(y_pred- y_batch)**2/(y_pred**2-y_pred.mean())**2
      test_r2_score += r2

      test_loss+=loss.item()

  print(f"Epoch {epoch+1}: train loss {train_loss:.2f}, r2 {train_r2_score.mean().item():.2f}, test loss {test_loss:.2f}, r2 {test_r2_score.mean().item():.2f}")

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1: train loss 0.9908, r2 -32.4070, test loss 0.7180, r2 -32.0351
Epoch 2: train loss 1.0121, r2 -32.3137, test loss 0.7294, r2 -32.7547
Epoch 3: train loss 1.0171, r2 -32.3314, test loss 0.7049, r2 -32.4749
Epoch 4: train loss 1.0138, r2 -32.3040, test loss 0.7090, r2 -31.9160
Epoch 5: train loss 1.0125, r2 -31.9447, test loss 0.6767, r2 -31.7729
Epoch 6: train loss 1.0148, r2 -32.1416, test loss 0.7131, r2 -32.3747
Epoch 7: train loss 1.0111, r2 -32.0745, test loss 0.7292, r2 -33.1720
Epoch 8: train loss 0.9970, r2 -32.0585, test loss 0.7059, r2 -32.0047
Epoch 9: train loss 1.0017, r2 -32.3643, test loss 0.7331, r2 -33.2997
Epoch 10: train loss 1.0090, r2 -32.0169, test loss 0.7140, r2 -32.0792
Epoch 11: train loss 1.0100, r2 -32.2887, test loss 0.7053, r2 -33.5048
Epoch 12: train loss 1.0076, r2 -32.5036, test loss 0.6953, r2 -31.7061
Epoch 13: train loss 1.0086, r2 -32.1485, test loss 0.7063, r2 -32.1479
Epoch 14: train loss 1.0132, r2 -32.4009, test loss 0.7198, r2 -32.8214
E

KeyboardInterrupt: 