In [1]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, TensorDataset, random_split, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt


In [2]:
white = pd.read_csv('winequality-white.csv',sep = ';')
red = pd.read_csv('winequality-red.csv',sep = ';')
wine = pd.concat([white, red], ignore_index=True, sort=False)
wine.shape


(6497, 12)

In [27]:
input_cols=list(wine.columns)[:-1]
output_cols = ['quality']

In [28]:
def dataframe_to_arrays(dataframe):
    # Make a copy of the original dataframe
    df = wine.copy(deep=True)
    inputs_array = df[input_cols].to_numpy()
    targets_array = df[output_cols].to_numpy()
    return inputs_array, targets_array

inputs_array, targets_array = dataframe_to_arrays(wine)


In [30]:
inputs = torch.from_numpy(inputs_array).type(torch.float)
targets = torch.from_numpy(targets_array).type(torch.float)
dataset = TensorDataset(inputs, targets)

In [31]:
train_ds, val_ds = random_split(dataset, [5198, 1299])
batch_size=50
train_loader = DataLoader(train_ds, batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size)
input_size = len(input_cols)
output_size = len(output_cols)

nn.Linear applies a linear transformation to the input data, using adjusted weights and bias. Forward feeds the input of first layer to the second layer. The training and validation step both add a loss function from 1L_loss to get the Mean Absolute Loss between the model output and the target output. 

In [20]:
class WineQuality(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(input_size,output_size) 
        
    def forward(self, xb): 
        out = self.linear(xb)                         
        return out
    
    def training_step(self, batch):
        inputs, targets = batch 
        out = self(inputs)          
        # using L1 loss function that is Mean absolute error between output and target
        loss = F.l1_loss(out,targets)
        return loss
    
    def validation_step(self, batch):
        inputs, targets = batch
        out = self(inputs)
        # using L1 loss function that is Mean absolute error between output and target
        loss = F.l1_loss(out,targets)                              
        return {'val_loss': loss.detach()}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        return {'val_loss': epoch_loss.item()}
    
    def epoch_end(self, epoch, result, num_epochs):
        if (epoch+1) % 100 == 0 or epoch == num_epochs-1:
            print("Epoch [{}], val_loss: {:.4f}".format(epoch+1, result['val_loss']))

In [21]:
model=WineQuality()

def evaluate(model, val_loader):
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    history = []
    optimizer = opt_func(model.parameters(), lr)
    for epoch in range(epochs):
        # Training 
        for batch in train_loader:
            loss = model.training_step(batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        # Validation 
        result = evaluate(model, val_loader)
        model.epoch_end(epoch, result, epochs)
        history.append(result)
    return history

In [22]:
epochs = 6497
lr = 1e-6
history5 = fit(epochs, lr, model, train_loader, val_loader)

Epoch [100], val_loss: 2.9461
Epoch [200], val_loss: 2.3374
Epoch [300], val_loss: 1.8395
Epoch [400], val_loss: 1.4804
Epoch [500], val_loss: 1.2376
Epoch [600], val_loss: 1.0865
Epoch [700], val_loss: 0.9853
Epoch [800], val_loss: 0.9167
Epoch [900], val_loss: 0.8703
Epoch [1000], val_loss: 0.8416
Epoch [1100], val_loss: 0.8212
Epoch [1200], val_loss: 0.8052
Epoch [1300], val_loss: 0.7932
Epoch [1400], val_loss: 0.7837
Epoch [1500], val_loss: 0.7768
Epoch [1600], val_loss: 0.7700
Epoch [1700], val_loss: 0.7642
Epoch [1800], val_loss: 0.7594
Epoch [1900], val_loss: 0.7546
Epoch [2000], val_loss: 0.7506
Epoch [2100], val_loss: 0.7467
Epoch [2200], val_loss: 0.7430
Epoch [2300], val_loss: 0.7391
Epoch [2400], val_loss: 0.7358
Epoch [2500], val_loss: 0.7329
Epoch [2600], val_loss: 0.7296
Epoch [2700], val_loss: 0.7259
Epoch [2800], val_loss: 0.7231
Epoch [2900], val_loss: 0.7202
Epoch [3000], val_loss: 0.7176
Epoch [3100], val_loss: 0.7146
Epoch [3200], val_loss: 0.7121
Epoch [3300], val

In [23]:
error = []
error_dict = {1.0:[],2.0:[],3.0:[],4.0:[],5.0:[],6.0:[],7.0:[],8.0:[],9.0:[]}
for i,t in val_ds:
    inputs = i.unsqueeze(0)
    predictions = model(inputs)
    pred = predictions[0].detach()
    error.append(t-pred)
    if t.item() in error_dict:
        error_dict[t.item()].append(error)
mean_error = torch.mean(torch.stack(error))
mean_error

tensor(-0.0285)