In [1]:
#Import libraries
import wandb
import torch
import pandas as pd
import numpy as np
from torch import nn
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, TensorDataset, random_split
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error


In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"
# device = 'cpu'
print(device)

# wandb.init(project="my-test-project")

batch_size = 1024
epochs = 1000
progres_print_rate = 10
learning_rate = 0.01

config = {
  "learning_rate": learning_rate,
  "epochs": epochs,
  "batch_size": batch_size
}

cuda


In [None]:
#Loading the data set
column_names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']
boston_raw = pd.read_csv('../data/housing.csv', header=None, delimiter=r"\s+", names=column_names)

data = boston_raw.copy()

In [None]:
train, _test = train_test_split(data,  test_size = 0.2, shuffle=True)
test, validate = train_test_split(_test, test_size = 0.5)

train_targets = train.pop("MEDV")
test_targets = test.pop("MEDV")
validate_targets = validate.pop('MEDV')

train_stats = train.describe()
train_stats = train_stats.transpose()

In [None]:
def df_to_tensor(df):
    return torch.from_numpy(df.values).float().to(device)
def norm(x):
  return (x - train_stats['mean']) / train_stats['std']

In [None]:
#Converting training data into tensors for Pytorch
train_dataset = TensorDataset(df_to_tensor(norm(train)), torch.Tensor([[x] for x in list(train_targets)]))
validate_dataset = TensorDataset(df_to_tensor(norm(validate)), torch.Tensor([[x] for x in list(validate_targets)]))
x_test, y_test = (df_to_tensor(norm(test)),[x for x in list(test_targets)])

train_dataloader = DataLoader(train_dataset,batch_size=batch_size)
validate_dataloader = DataLoader(validate_dataset,batch_size=batch_size)
# test_dataloader = DataLoader(test_dataset,batch_size=5)



In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self,in_size, out_size):
        super(NeuralNetwork, self).__init__()
        # self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(in_size, 5),
            nn.ReLU(),
            nn.Linear(5, 5),
            nn.ReLU(),
            nn.Linear(5, 5),
            nn.ReLU(),
            nn.Linear(5, out_size),
            )

    def forward(self, x):
        # x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [None]:
# Define model
model = NeuralNetwork(13, 1).to(device)

In [None]:
#Define the loss function
loss_fun = nn.MSELoss()

In [None]:
# Define Adam optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
# Utility function to train the model
def lrmodel(num_epochs, model, loss_fun, optimizer, train_dl):
    # train_dl.to(device)
    # Repeat for given number of epochs
    for epoch in range(num_epochs):
        # Train with batches of data
        for xb,yb in train_dl:
            xb = xb.to(device)
            yb = yb.to(device)
            # 1. Generate predictions
            pred = model(xb)
            # 2. Calculate loss
            loss = loss_fun(pred, yb)
            # 3. Compute gradients
            loss.backward()
            # 4. Update parameters using gradients
            optimizer.step()
            # 5. Reset the gradients to zero
            optimizer.zero_grad()
            
            wandb.log({"loss": loss})
            # Optional
            wandb.watch(model)
            
        # Print the progress
        if (epoch+1) % progres_print_rate == 0:
            print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))
            validate_model(loader=validate_dataloader, model=model, loss_fun=loss_fun)
            
def validate_model(loader, model, loss_fun):                       
    running_loss = 0.0                                        
    model.eval()                                              
    with torch.no_grad():                                     
        for _, (xb, yb) in enumerate(loader):                     
            xb = xb.to(device)                        
            yb = yb.to(device)                                                                  
            outputs = model(xb)                           
            loss = loss_fun(outputs, yb)                 
            running_loss = running_loss + loss.item()         
            wandb.log({"val_loss": running_loss})
            # Optional
            wandb.watch(model)
    mean_val_loss = ( running_loss )                  
    print('Validation Loss:'  ,mean_val_loss)

In [None]:
#Training for 100 epochs
lrmodel(epochs, model, loss_fun, optimizer, train_dataloader)


In [None]:
#Converting predictions from tensor objects into a list
y_pred_test = model(x_test)
y_pred_test = [y_pred_test[x].item() for x in range(len(y_pred_test))]
# Comparing Actual and predicted values
# print(numpy.multiply(y_test,1000))
df = {}
df['Actual Observation'] = np.multiply(y_test,1000)
df['Predicted Salary'] = np.multiply(y_pred_test,1000) 
df = pd.DataFrame(df)
print(df)

In [None]:
# Let's check how much we are off on average
# y_pred = model(df_to_tensor(norm(train)))
mae = mean_absolute_error(df['Actual Observation'], df['Predicted Salary'])

print(f"We are off on average by {round(mae, 2)} US dollars, while the mean price of a house in the dataset is {round(test_targets.mean()*1000, 2)}.")