# Problem 2

In [196]:
## setup

# load libraries
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from torch.utils.data import Dataset


# define custom data class
class WineData(Dataset):
    def __init__(self, data):
        # load dataset
        self.data = data
        self.header = self.data.columns.tolist()
        self.preds = torch.Tensor(self.data.drop("quality", axis=1).values)
        self.targets = torch.Tensor(self.data[["quality"]].values)
        
    def __len__(self):
        return len(self.preds)

    def __getitem__(self, rownum):
        preds = self.preds[rownum, :]
        target = self.targets[rownum, :]

        return preds, target
    

# load data
qual_r_raw = pd.read_csv("winequality-red.csv", delimiter=";", dtype="float")
qual_w_raw = pd.read_csv("winequality-white.csv", delimiter=";", dtype="float")

# standardize dataset


# split into training/test/validation data
## shuffle data
## pick indexes at 64/16/20


In [200]:
# define model architecture

class WiNet(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.regression = nn.Sequential(
            nn.Linear(11, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, 512),
            nn.ReLU(inplace=True),
            nn.Linear(512, 2048),
            nn.ReLU(inplace=True),
            nn.Linear(2048, 1),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.regression(x)
        return x

In [273]:
# set up model training

def train(model, dataset, epoch):
    # set up variable storing loss values, by batch/epoch    
    # set up loss function
    losses = []
    criterion = nn.MSELoss()
    optimizer = optim.SGD(model.parameters(),lr=0.005,momentum=0, weight_decay=0)
    train_loader = torch.utils.data.DataLoader(dataset, shuffle=True, batch_size=32)
    
    # loop through batches
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        
        # calculate loss
        loss = criterion(output, target)
        loss.backward()
        
        # do SGD optimization
        optimizer.step()
        
        # save loss, by step
        losses.append([epoch, loss.item()])
    return losses

In [275]:
## debugging section!

model = WiNet()
losses = []
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(),lr=0.0005,momentum=0, weight_decay=0)
train_loader = torch.utils.data.DataLoader(dataset, shuffle=True, batch_size=32)

i = 0
for batch_idx, (data, target) in enumerate(train_loader):
    optimizer.zero_grad()
    output = model(data)
    loss = criterion(output, target)
    loss.backward()
    optimizer.step()
    losses.append([epoch, loss.item()])
    if(i == 5):
        break
    i += 1
print(losses)  # <----- exploding losses!

[[1, 20.308671951293945], [1, 578.8284301757812], [1, 4474.51220703125], [1, 422421.25], [1, 14276022501376.0], [1, inf]]


In [None]:
# dataset = WineData(qual_r_raw)
# model =  WiNet()
# epoch = 1
# losses = train(m, dataset, 1)