In [1]:
import pandas as pd
import numpy as np
from numpy.linalg import lstsq, solve
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

In [2]:
file_path = 'data/socal.csv'
df = pd.read_csv(file_path)

# Select only the square footage and price columns
df = df[['sqft', 'n_citi', 'bed','bath', 'price']]

# Display the first few rows of the selected data
df.head()

Unnamed: 0,sqft,n_citi,bed,bath,price
0,1560,317,3,2.0,201900
1,713,48,3,2.0,228500
2,800,152,3,1.0,273950
3,1082,48,3,1.0,350000
4,2547,55,4,3.0,385100


In [3]:
def MSE(y, yhat):
    return np.mean((y - yhat) ** 2)

def RMSE(y,yhat):
    return np.sqrt(MSE(y,yhat))

In [4]:
#SIMPLE BIAS REGRESSOR    
y = df["price"]    
yhat = y.mean()

print(f"Simple Bias Regressor Results")
print(f"MSE: {MSE(y, yhat)}")
print(f"RMSE: {RMSE(y, yhat)}")

Simple Bias Regressor Results
MSE: 142101837143.94485
RMSE: 376963.973270583


In [5]:
#Least Squares
df['bias'] = 1
X = df[['bias','sqft', 'bed','bath', 'n_citi']].values
y = df.price.values
w = lstsq(X,y,rcond=None)[0]
yhat = np.matmul(X,w)
mse = MSE(y,yhat)
rmse = RMSE(y,yhat)

print(f"Least Squares Results")
print(f"MSE: {MSE(y, yhat)}")
print(f"RMSE: {RMSE(y, yhat)}")

Least Squares Results
MSE: 92149117766.18396
RMSE: 303560.7315944932


In [6]:
df = pd.read_csv(file_path)
df = df[['sqft', 'n_citi', 'bed','bath', 'price']]

# Create PyTorch dataset
class CreateDataset(Dataset):
    def __init__(self,features,targets):
        self.features = torch.tensor(features,dtype=torch.float32)
        self.targets  = torch.tensor(targets,dtype=torch.float32)
    
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self,idx):
        return self.features[idx], self.targets[idx]
    

# Define the neural network model
class NeuralNetRegressor(nn.Module):
    def __init__(self, input_size=4, hidden_size=64, output_size=2):
        super(NeuralNetRegressor, self).__init__()
        
        # Define the layers
        self.fc1 = nn.Linear(input_size, hidden_size)  # Input layer -> Hidden layer
        self.relu = nn.ReLU()                          # Activation function
        self.fc2 = nn.Linear(hidden_size, output_size) # Hidden layer -> Output layer
    
    def forward(self, x):
        x = self.fc1(x)    # Input to hidden
        x = self.relu(x)   # ReLU activation
        x = self.fc2(x)    # Hidden to output
        return x

features = df[['sqft', 'n_citi', 'bed', 'bath']].values
targets = df.price

# Create PyTorch dataset
X = features
Y = targets.to_numpy()
dataset = CreateDataset(X,Y)
data_loader = DataLoader(dataset,batch_size=200,shuffle=True)

# Create the model instance
model = NeuralNetRegressor(input_size=4,hidden_size=32,output_size=1)

# Define the loss function and the optimizer
cost_function = nn.MSELoss()  # Mean Squared Error Loss for regression
optimizer = optim.Adam(model.parameters(), lr=0.0001)

num_epochs = 2400
# Example of how to train the model (assuming you have your data loaders)
for epoch in range(num_epochs):
    for X,Y in data_loader:
        # Zero the gradients
        optimizer.zero_grad()
        
        # Forward pass
        Yh = model(X)
        
        # Compute the loss
        loss = cost_function(Yh,torch.unsqueeze(Y,1))
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
    if epoch % 480 == 0:
        print('epoch',epoch,'loss',loss.detach().numpy())
        
# neural network RMSE
X = torch.tensor(features,dtype=torch.float32)
Yh = model(X)
Yh = Yh.detach().numpy().flatten() #GPT4 suggested change to add flatten to match shapes
Y = targets.to_numpy()
RMSE(Y,Yh)

epoch 0 loss 627152900000.0
epoch 480 loss 76056490000.0
epoch 960 loss 78432920000.0
epoch 1440 loss 99496890000.0
epoch 1920 loss 172232750000.0


312227.65581936616

In [7]:
#epoch 0 loss 627152900000.0
#epoch 480 loss 76056490000.0
#epoch 960 loss 78432920000.0
#epoch 1440 loss 99496890000.0
#epoch 1920 loss 172232750000.0
#RMSE: 312227.65581936616