# Homework_2
## Real Time Machine Learning
### Authors: Mark McAfoose
####

In [1]:
import torch
import torch.optim as optim
import torch.nn as nn
import numpy as np 
import pandas as pd 
import datetime
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from torchvision import datasets, transforms

import warnings
warnings.filterwarnings('ignore')
seed = np.random.seed(100)

# Question 1 Pre Code

In [2]:
#Define functions
def training_loop(n_epochs, optimizer, model, loss_fn, in_train, in_val, out_train, out_val):
    for epoch in range(1, n_epochs + 1):
        p_train = model(in_train) # <1>
        loss_train = loss_fn(p_train, out_train)

        p_val = model(in_val) # <1>
        loss_val = loss_fn(p_val, out_val)
        
        optimizer.zero_grad()
        loss_train.backward() # <2>
        optimizer.step()

        if epoch == 1 or epoch % 10 == 0:
            print(datetime.datetime.now(), f"Epoch {epoch}, Training loss {loss_train.item():.4f},"
                  f" Validation loss {loss_val.item():.4f}")
            
def binary_map(x):
    return x.map({'yes':1,"no":0})

In [3]:
# load the dataset
housing = pd.DataFrame(pd.read_csv("./Housing.csv")) 
num_vars = ['area', 'bedrooms', 'bathrooms', 'stories', 'mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'parking', 'prefarea', 'price']
varList=['mainroad','guestroom','basement','hotwaterheating','airconditioning', 'prefarea']
input_size = len(num_vars)-1

housing[varList] = housing[varList].apply(binary_map)

scaler = StandardScaler()
# scaler = MinMaxScaler()

#Split data into training and validation sets
df_train, df_test = train_test_split(housing, train_size=0.8, test_size=0.2, random_state=seed)

df_Newtrain = df_train[num_vars]
df_Newtest = df_test[num_vars]

# scaling the data
df_Newtrain[num_vars] = scaler.fit_transform(df_Newtrain[num_vars])
df_Newtest[num_vars] = scaler.fit_transform(df_Newtest[num_vars])

#Create input and output arrays for both training and validation
out_Newtrain = df_Newtrain.pop('price')
in_Newtrain = df_Newtrain
out_Newtest = df_Newtest.pop('price')
in_Newtest = df_Newtest

# convert the data to tensors
in_train = torch.tensor(in_Newtrain.values).float()
in_val = torch.tensor(in_Newtest.values).float()
out_train = torch.tensor(out_Newtrain.values).float().unsqueeze(-1)
out_val = torch.tensor(out_Newtest.values).float().unsqueeze(-1)

# Question 1 Part 1

In [4]:
seq_model = nn.Sequential(
            nn.Linear(input_size, 8), #hidden layer 1
            nn.Tanh(),
            nn.Linear(8, 1)) #output layer

optimizer = optim.SGD(seq_model.parameters(), lr=1e-3)

training_loop(
    n_epochs = 200, 
    optimizer = optimizer,
    model = seq_model,
    loss_fn = nn.MSELoss(),
    in_train = in_train,
    in_val = in_val, 
    out_train = out_train,
    out_val = out_val)

2022-03-05 23:23:06.606076 Epoch 1, Training loss 1.0614, Validation loss 1.0826
2022-03-05 23:23:06.610077 Epoch 10, Training loss 1.0414, Validation loss 1.0613
2022-03-05 23:23:06.614077 Epoch 20, Training loss 1.0198, Validation loss 1.0385
2022-03-05 23:23:06.618078 Epoch 30, Training loss 0.9990, Validation loss 1.0166
2022-03-05 23:23:06.622079 Epoch 40, Training loss 0.9790, Validation loss 0.9953
2022-03-05 23:23:06.626080 Epoch 50, Training loss 0.9595, Validation loss 0.9748
2022-03-05 23:23:06.630081 Epoch 60, Training loss 0.9408, Validation loss 0.9549
2022-03-05 23:23:06.634082 Epoch 70, Training loss 0.9226, Validation loss 0.9357
2022-03-05 23:23:06.642084 Epoch 80, Training loss 0.9049, Validation loss 0.9171
2022-03-05 23:23:06.646085 Epoch 90, Training loss 0.8878, Validation loss 0.8990
2022-03-05 23:23:06.650085 Epoch 100, Training loss 0.8712, Validation loss 0.8815
2022-03-05 23:23:06.654086 Epoch 110, Training loss 0.8551, Validation loss 0.8645
2022-03-05 23:2

# Question 1 Part 2

In [5]:
seq_model_2 = nn.Sequential(
            nn.Linear(input_size, 8), #hidden layer 1
            nn.Tanh(),
            nn.Linear(8, 4), #hidden layer 2
            nn.Tanh(),
            nn.Linear(4, 2), #hidden layer 3
            nn.Tanh(),
            nn.Linear(2, 1)) #output layer


optimizer = optim.SGD(seq_model_2.parameters(), lr=1e-3)

training_loop(
    n_epochs = 200, 
    optimizer = optimizer,
    model = seq_model_2,
    loss_fn = nn.MSELoss(),
    in_train = in_train,
    in_val = in_val, 
    out_train = out_train,
    out_val = out_val)

2022-03-05 23:23:06.712100 Epoch 1, Training loss 1.5664, Validation loss 1.5722
2022-03-05 23:23:06.724103 Epoch 10, Training loss 1.5380, Validation loss 1.5439
2022-03-05 23:23:06.739106 Epoch 20, Training loss 1.5080, Validation loss 1.5140
2022-03-05 23:23:06.747108 Epoch 30, Training loss 1.4796, Validation loss 1.4858
2022-03-05 23:23:06.757110 Epoch 40, Training loss 1.4527, Validation loss 1.4591
2022-03-05 23:23:06.770114 Epoch 50, Training loss 1.4273, Validation loss 1.4338
2022-03-05 23:23:06.777114 Epoch 60, Training loss 1.4032, Validation loss 1.4098
2022-03-05 23:23:06.784117 Epoch 70, Training loss 1.3804, Validation loss 1.3871
2022-03-05 23:23:06.798119 Epoch 80, Training loss 1.3587, Validation loss 1.3656
2022-03-05 23:23:06.805121 Epoch 90, Training loss 1.3382, Validation loss 1.3452
2022-03-05 23:23:06.816123 Epoch 100, Training loss 1.3187, Validation loss 1.3259
2022-03-05 23:23:06.826126 Epoch 110, Training loss 1.3002, Validation loss 1.3075
2022-03-05 23:2

# Question 2 Pre Code

In [6]:
#Load and preprocess dataset
class_names = ['airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck']

data_path = '../data-unversioned/p1ch7/'
cifar10 = datasets.CIFAR10(
    data_path, train=True, download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))

cifar10_val = datasets.CIFAR10(
    data_path, train=False, download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))

cifar10_train = [(img, label) for img, label in cifar10]
cifar10_test = [(img, label) for img, label in cifar10_val]

#Use GPU if available
device = torch.device(
    "cuda") if torch.cuda.is_available() else torch.device("cpu")
print(device)

Files already downloaded and verified
Files already downloaded and verified
cuda


# Question 2 Part 1

In [None]:
train_loader = torch.utils.data.DataLoader(cifar10_train, batch_size=64, shuffle=True)

model_cifar = nn.Sequential(
            nn.Linear(3072, 512), #Hidden Layer 1
            nn.Tanh(),
            nn.Linear(512, 10)) #Output layer

model_cifar.to(device)

learning_rate = 1e-2

optimizer = optim.SGD(model_cifar.parameters(), lr=learning_rate)

loss_fn = nn.CrossEntropyLoss()

n_epochs = 301

for epoch in range(n_epochs):
    for imgs, labels in train_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        outputs = model_cifar(imgs.view(imgs.shape[0], -1))
        loss = loss_fn(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    if epoch % 10 == 0:
      print('{} Epoch {}, Training loss {}'.format(datetime.datetime.now(), epoch, loss)) 

train_loader = torch.utils.data.DataLoader(cifar10_train, batch_size=64, shuffle=False)

correct = 0
total = 0

with torch.no_grad():
    for imgs, labels in train_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        outputs = model_cifar(imgs.view(imgs.shape[0], -1))
        _, predicted = torch.max(outputs, dim=1)
        total += labels.shape[0]
        correct += int((predicted == labels).sum())
        
print("Train Accuracy: %f" % (correct / total))

val_loader = torch.utils.data.DataLoader(cifar10_test, batch_size=64, shuffle=False)

correct = 0
total = 0

with torch.no_grad():
    for imgs, labels in val_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        outputs = model_cifar(imgs.view(imgs.shape[0], -1))
        _, predicted = torch.max(outputs, dim=1)
        total += labels.shape[0]
        correct += int((predicted == labels).sum())
        
print("Val Accuracy: %f" % (correct / total))

2022-03-05 23:23:23.085878 Epoch 0, Training loss 2.101949691772461
2022-03-05 23:23:43.512543 Epoch 10, Training loss 1.612116813659668
2022-03-05 23:23:53.408569 Epoch 20, Training loss 1.585538625717163
2022-03-05 23:24:03.936954 Epoch 30, Training loss 0.4230346977710724
2022-03-05 23:24:20.225934 Epoch 40, Training loss 0.5442031621932983
2022-03-05 23:24:44.915783 Epoch 50, Training loss 0.24527835845947266
2022-03-05 23:25:06.262392 Epoch 60, Training loss 0.376009464263916
2022-03-05 23:25:27.259402 Epoch 70, Training loss 0.07200481742620468
2022-03-05 23:25:50.407587 Epoch 80, Training loss 0.03949423134326935
2022-03-05 23:26:03.049776 Epoch 90, Training loss 0.034284189343452454
2022-03-05 23:26:17.049218 Epoch 100, Training loss 0.060630038380622864
2022-03-05 23:26:28.764108 Epoch 110, Training loss 0.03002581186592579
2022-03-05 23:26:48.933494 Epoch 120, Training loss 0.033986691385507584
2022-03-05 23:27:12.507886 Epoch 130, Training loss 0.019289325922727585
2022-03-0

# Could not figure out part 2 for the life of me
# If you could convincce the professor to go over this homework in great detail after Spring Break, I would greatly appreciate it