In [59]:
import pandas as pd
import numpy as np
from numpy.linalg import lstsq, solve
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

In [60]:
file_path = 'data/socal.csv'
df = pd.read_csv(file_path)

# Select only the square footage and price columns
df = df[['sqft', 'price']]

# Display the first few rows of the selected data
df.head()

Unnamed: 0,sqft,price
0,1560,201900
1,713,228500
2,800,273950
3,1082,350000
4,2547,385100


In [61]:
def MSE(y, yhat):
    return np.mean((y - yhat) ** 2)

def RMSE(y,yhat):
    return np.sqrt(MSE(y,yhat))

In [62]:
#SIMPLE BIAS REGRESSOR    
y = df["price"]    
yhat = y.mean()

print(f"Simple Bias Regressor Results")
print(f"MSE: {MSE(y, yhat)}")
print(f"RMSE: {RMSE(y, yhat)}")

Simple Bias Regressor Results
MSE: 142101837143.94485
RMSE: 376963.973270583


In [63]:
#Least Squares
df['bias'] = 1
X = df[['bias','sqft']].values
y = df.price.values
w = lstsq(X,y,rcond=None)[0]
yhat = np.matmul(X,w)
mse = MSE(y,yhat)
rmse = RMSE(y,yhat)

print(f"Least Squares Results")
print(f"MSE: {MSE(y, yhat)}")
print(f"RMSE: {RMSE(y, yhat)}")

Least Squares Results
MSE: 93727246681.71817
RMSE: 306149.05957999965


In [72]:
df = pd.read_csv(file_path)
df = df[['sqft', 'price']]

# Create PyTorch dataset
class CreateDataset(Dataset):
    def __init__(self,features,targets):
        self.features = torch.tensor(features,dtype=torch.float32)
        self.targets  = torch.tensor(targets,dtype=torch.float32)
    
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self,idx):
        return self.features[idx], self.targets[idx]
    

# Define the neural network model
class NeuralNetRegressor(nn.Module):
    def __init__(self, input_size=4, hidden_size=64, output_size=2):
        super(NeuralNetRegressor, self).__init__()
        
        # Define the layers
        self.fc1 = nn.Linear(input_size, hidden_size)  # Input layer -> Hidden layer
        self.relu = nn.ReLU()                          # Activation function
        self.fc2 = nn.Linear(hidden_size, output_size) # Hidden layer -> Output layer
    
    def forward(self, x):
        x = self.fc1(x)    # Input to hidden
        x = self.relu(x)   # ReLU activation
        x = self.fc2(x)    # Hidden to output
        return x

features = df.sqft.values.reshape(-1, 1)
targets = df.price

# Create PyTorch dataset
X = features
Y = targets.to_numpy()
dataset = CreateDataset(X,Y)
data_loader = DataLoader(dataset,batch_size=500,shuffle=True)

# Create the model instance
model = NeuralNetRegressor(input_size=1,hidden_size=64,output_size=1)

# Define the loss function and the optimizer
cost_function = nn.MSELoss()  # Mean Squared Error Loss for regression
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 200
# Example of how to train the model (assuming you have your data loaders)
for epoch in range(num_epochs):
    for X,Y in data_loader:
        # Zero the gradients
        optimizer.zero_grad()
        
        # Forward pass
        Yh = model(X)
        
        # Compute the loss
        loss = cost_function(Yh,torch.unsqueeze(Y,1))
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
    if epoch % 20 == 0:
        print('epoch',epoch,'loss',loss.detach().numpy())
        
# neural network RMSE
X = torch.tensor(features,dtype=torch.float32)
Yh = model(X)
Yh = Yh.detach().numpy().flatten() #GPT4 suggested change to add flatten to match shapes
Y = targets.to_numpy()
RMSE(Y,Yh)

epoch 0 loss 623788560000.0
epoch 20 loss 520532620000.0
epoch 40 loss 287985830000.0
epoch 60 loss 125900505000.0
epoch 80 loss 103899490000.0
epoch 100 loss 109490080000.0
epoch 120 loss 88466416000.0
epoch 140 loss 96987080000.0
epoch 160 loss 98583160000.0
epoch 180 loss 127836496000.0


322323.73058616964