In [3]:
# Imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
import scipy.stats as stats

pd.set_option("mode.chained_assignment", None)
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt
import matplotlib_inline.backend_inline as backend_inline

backend_inline.set_matplotlib_formats("svg")

# Uciml repo
from ucimlrepo import fetch_ucirepo

# Pytorch device specific configuration
# Pytorch Gpu Configuration for cuda
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Pytorch Gpu Configuration for dml devices
import torch_directml

device = torch_directml.device()

# Set default devices
torch.set_default_device(device)

In [None]:
# import the data
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
raw_data = pd.read_csv(url, sep=";")
raw_data = raw_data[raw_data["total sulfur dioxide"] < 200]  # drop a few outliers

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
1,7.8,0.880,0.00,2.6,0.098,25.0,67.0,0.99680,3.20,0.68,9.8,5
2,7.8,0.760,0.04,2.3,0.092,15.0,54.0,0.99700,3.26,0.65,9.8,5
3,11.2,0.280,0.56,1.9,0.075,17.0,60.0,0.99800,3.16,0.58,9.8,6
4,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
...,...,...,...,...,...,...,...,...,...,...,...,...
1594,6.2,0.600,0.08,2.0,0.090,32.0,44.0,0.99490,3.45,0.58,10.5,5
1595,5.9,0.550,0.10,2.2,0.062,39.0,51.0,0.99512,3.52,0.76,11.2,6
1596,6.3,0.510,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0,6
1597,5.9,0.645,0.12,2.0,0.075,32.0,44.0,0.99547,3.57,0.71,10.2,5


In [None]:
# Data pre-processing
output_labels = raw_data["quality"].values[0:10]
input_labels = raw_data["quality"].values[10:]

cols2zscore = raw_data.keys()
cols2zscore = cols2zscore.drop("quality")


keys = raw_data.keys()
for key in keys:
    raw_data[key] = raw_data[key].fillna(raw_data[key].mean())

data = raw_data[cols2zscore].apply(stats.zscore)
output_data = data.values[0:10]
input_data = data.values[10:]

In [32]:
# convert data into tensors
train_data = torch.tensor(input_data).float()
train_labels = torch.tensor(input_labels).float()

test_data = torch.tensor(output_data).float()
test_labels = torch.tensor(output_labels).float()


# Pytorch datasets
train_dataT = TensorDataset(train_data, train_labels)
test_dataT = TensorDataset(test_data, test_labels)

# Lead data into data loaders
batchSize = 16
train_loader = DataLoader(
    train_dataT,
    batch_size=batchSize,
    shuffle=True,
    drop_last=True,
    generator=torch.Generator(device),
)
test_loader = DataLoader(
    test_dataT,
    batch_size=test_dataT.tensors[0].shape[0],
    generator=torch.Generator(device),
)

In [34]:
def createTheModel():
    class WineQualityModel(nn.Module):
        def __init__(self):
            super().__init__()
            # Input Layer
            self.input = nn.Linear(11, 64)
            # Hidden Layers
            self.hidden1 = nn.Linear(64, 128)
            self.hidden2 = nn.Linear(128, 128)
            self.hidden3 = nn.Linear(128, 16)
            # Output Layers
            self.output = nn.Linear(16, 1)

        def forward(self, x):
            x = F.relu(self.input(x))
            x = F.relu(self.hidden1(x))
            x = F.relu(self.hidden2(x))
            x = F.relu(self.hidden3(x))
            return self.output(x)

    net = WineQualityModel()

    # Loss Function
    lossFun = nn.MSELoss()

    # Optimizer
    optimizer = torch.optim.Adam(net.parameters(), lr=0.001)

    return net, lossFun, optimizer

In [35]:
# Test the model
net, lossFun, optimizer = createTheModel()
tempData = torch.tensor(np.random.randn(100, 11)).float()
print(net(tempData))

tensor([[0.1555],
        [0.1611],
        [0.1619],
        [0.1566],
        [0.1611],
        [0.1609],
        [0.1732],
        [0.1617],
        [0.1638],
        [0.1717],
        [0.1711],
        [0.1667],
        [0.1635],
        [0.1510],
        [0.1717],
        [0.1555],
        [0.1624],
        [0.1634],
        [0.1599],
        [0.1598],
        [0.1525],
        [0.1687],
        [0.1649],
        [0.1755],
        [0.1635],
        [0.1680],
        [0.1689],
        [0.1635],
        [0.1636],
        [0.1797],
        [0.1597],
        [0.1700],
        [0.1707],
        [0.1652],
        [0.1587],
        [0.1618],
        [0.1638],
        [0.1529],
        [0.1640],
        [0.1590],
        [0.1584],
        [0.1726],
        [0.1621],
        [0.1720],
        [0.1673],
        [0.1618],
        [0.1664],
        [0.1690],
        [0.1588],
        [0.1581],
        [0.1688],
        [0.1537],
        [0.1569],
        [0.1670],
        [0.1674],
        [0

In [None]:
def trainTheModel(nEpochs):
    # Initialize variables
    trainAcc = np.zeros(nEpochs)
    losses = np.zeros(nEpochs)

    # The loop
    for epoch in range(nEpochs):
        # Model training mode
        net.train()

        # Batch loss and accuracy initialization
        batchLoss = []
        batchAcc = []

        # The batch loop
        for X, y in train_loader:

            # Forward pass
            yHat = net(X)
            loss = lossFun(yHat, y)

            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Batch Loss and accuracy
            batchLoss.append(loss.cpu().item())
            batchAcc.append(
                (100 * torch.mean((torch.abs(yHat - y) < 0.5).float())).cpu()
            )

        # Batch loop end

        # Train Acc
        trainAcc[epoch] = np.mean(batchAcc)
        losses[epoch] = np.mean(batchLoss)

    # The Loop End

    return trainAcc, losses