In [29]:
from collections import OrderedDict

import torch
import torch.nn as nn
import torch.optim as optim

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

from matplotlib import pyplot as plt
import seaborn as sns

In [2]:
path = '../data/deep-learning-with-python-data/tabular-wine/winequality-white.csv'
df = pd.read_csv(path, sep=';')
df

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.00100,3.00,0.45,8.8,6
1,6.3,0.30,0.34,1.6,0.049,14.0,132.0,0.99400,3.30,0.49,9.5,6
2,8.1,0.28,0.40,6.9,0.050,30.0,97.0,0.99510,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.99560,3.19,0.40,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.99560,3.19,0.40,9.9,6
...,...,...,...,...,...,...,...,...,...,...,...,...
4893,6.2,0.21,0.29,1.6,0.039,24.0,92.0,0.99114,3.27,0.50,11.2,6
4894,6.6,0.32,0.36,8.0,0.047,57.0,168.0,0.99490,3.15,0.46,9.6,5
4895,6.5,0.24,0.19,1.2,0.041,30.0,111.0,0.99254,2.99,0.46,9.4,6
4896,5.5,0.29,0.30,1.1,0.022,20.0,110.0,0.98869,3.34,0.38,12.8,7


In [3]:
y = df['quality']
x = df.loc[:, :'alcohol']

In [4]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2)

In [25]:
x_train_t = torch.from_numpy(x_train.to_numpy()).float()
x_test_t = torch.from_numpy(x_test.to_numpy()).float()
x_val_t = torch.from_numpy(x_val.to_numpy()).float()
y_train_t = torch.from_numpy(y_train.to_numpy()).float()
y_test_t = torch.from_numpy(y_test.to_numpy()).float()
y_val_t = torch.from_numpy(y_val.to_numpy()).float()
y_train_t = y_train_t.view(-1, 1).contiguous()
y_test_t = y_test_t.view(-1, 1).contiguous()
y_val_t = y_val_t.view(-1, 1).contiguous()

In [26]:
def training_loop(epochs, optimizer, model, loss_fn, x_train_t, x_val_t, y_train_t, y_val_t):
    for epoch in range(1, epochs + 1):
        pred_x_train = model(x_train_t)
        loss_train = loss_fn(pred_x_train, y_train_t)
        
        pred_x_val = model(x_val_t)
        loss_val = loss_fn(pred_x_val, y_val_t)
        
        optimizer.zero_grad()
        loss_train.backward()
        optimizer.step()
        
        if epoch == 1 or epoch % 1000 == 0:
            print(f'Epoch {epoch}, Training loss {loss_train.item():.4f},'
                  f' Validation loss {loss_val.item():.4f}')

In [23]:
seq_model = nn.Sequential(OrderedDict([
    ('hidden_linear', nn.Linear(x_train_t.shape[1], 8)),
    ('hidden_activation', nn.Tanh()),
    ('output_linear', nn.Linear(8, 1))
]))

seq_model

Sequential(
  (hidden_linear): Linear(in_features=11, out_features=8, bias=True)
  (hidden_activation): Tanh()
  (output_linear): Linear(in_features=8, out_features=1, bias=True)
)

In [85]:
training_loop(
    epochs=10000,
    optimizer=optim.Adam(seq_model.parameters(), lr=1e-3),
    model=seq_model,
    loss_fn=nn.MSELoss(),
    x_train_t=x_train_t,
    x_val_t=x_val_t,
    y_train_t=y_train_t,
    y_val_t=y_val_t
)

print('ouptut', seq_model(x_val_t))
print('answer', y_val_t)
print('hidden', seq_model.hidden_linear.weight.grad)

Epoch 1, Training loss 0.5229, Validation loss 0.5575
Epoch 1000, Training loss 0.5154, Validation loss 0.5500
Epoch 2000, Training loss 0.5130, Validation loss 0.5512
Epoch 3000, Training loss 0.5123, Validation loss 0.5516
Epoch 4000, Training loss 0.5119, Validation loss 0.5517
Epoch 5000, Training loss 0.5117, Validation loss 0.5515
Epoch 6000, Training loss 0.5114, Validation loss 0.5519
Epoch 7000, Training loss 0.5112, Validation loss 0.5520
Epoch 8000, Training loss 0.5111, Validation loss 0.5521
Epoch 9000, Training loss 0.5110, Validation loss 0.5522
Epoch 10000, Training loss 0.5109, Validation loss 0.5523
ouptut tensor([[5.6080],
        [5.4785],
        [5.8027],
        [6.3334],
        [4.7505],
        [5.4538],
        [6.1909],
        [6.3649],
        [6.3143],
        [5.5572],
        [5.6195],
        [6.5317],
        [6.5403],
        [6.4749],
        [5.7204],
        [5.3556],
        [5.3414],
        [5.6732],
        [5.6920],
        [5.7796],
        

In [86]:
def is_close(row):
    if abs(row['predicted'] - row['actual']) <= 0.5:
        row['accurate'] = True
    else:
        row['accurate'] = False
    return row

In [87]:
predicted = pd.DataFrame(seq_model(x_test_t).detach().numpy()).rename({0:'predicted'}, axis=1)
actual = pd.DataFrame(y_test_t.detach().numpy()).rename({0:'actual'}, axis=1)
predicted['actual'] = actual['actual']
predicted = predicted.apply(is_close, axis=1)
predicted

Unnamed: 0,predicted,actual,accurate
0,5.499261,5.0,1.0
1,6.116472,5.0,0.0
2,5.878850,5.0,0.0
3,6.259006,5.0,0.0
4,5.518592,5.0,0.0
...,...,...,...
975,4.436187,4.0,1.0
976,5.634325,6.0,1.0
977,6.386552,6.0,1.0
978,5.972862,6.0,1.0


In [88]:
predicted[predicted['accurate'] == 1.0]

Unnamed: 0,predicted,actual,accurate
0,5.499261,5.0,1.0
5,5.263652,5.0,1.0
7,5.881484,6.0,1.0
8,5.334069,5.0,1.0
9,5.425977,5.0,1.0
...,...,...,...
975,4.436187,4.0,1.0
976,5.634325,6.0,1.0
977,6.386552,6.0,1.0
978,5.972862,6.0,1.0


In [89]:
510/980

0.5204081632653061