In [1]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader,TensorDataset, random_split

import torch.nn as nn
import torch.optim as optim

In [2]:
dataframe_raw = pd.read_csv("./winequality-white.csv",delimiter=";")
dataframe_raw

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.00100,3.00,0.45,8.8,6
1,6.3,0.30,0.34,1.6,0.049,14.0,132.0,0.99400,3.30,0.49,9.5,6
2,8.1,0.28,0.40,6.9,0.050,30.0,97.0,0.99510,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.99560,3.19,0.40,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.99560,3.19,0.40,9.9,6
...,...,...,...,...,...,...,...,...,...,...,...,...
4893,6.2,0.21,0.29,1.6,0.039,24.0,92.0,0.99114,3.27,0.50,11.2,6
4894,6.6,0.32,0.36,8.0,0.047,57.0,168.0,0.99490,3.15,0.46,9.6,5
4895,6.5,0.24,0.19,1.2,0.041,30.0,111.0,0.99254,2.99,0.46,9.4,6
4896,5.5,0.29,0.30,1.1,0.022,20.0,110.0,0.98869,3.34,0.38,12.8,7


In [3]:
def dataframe_to_arrays(dataframe):
    input_cols = list(dataframe.columns)[:-1]
    output_cols=list(dataframe.columns)[-1:]
    dataframe1 = dataframe_raw.copy(deep=True)
    inputs_array = dataframe1[input_cols].to_numpy(dtype=np.float32)
    targets_array = dataframe1[output_cols].to_numpy(dtype=int).squeeze(1)
    targets_array[targets_array<=5]=0
    targets_array[targets_array>5]=1   
    print(targets_array)
    encoding =np.eye(2)[targets_array]

    return inputs_array, encoding
inputs_array, targets_array = dataframe_to_arrays(dataframe_raw)
inputs_array.shape, targets_array.shape

[1 1 1 ... 1 1 1]


((4898, 11), (4898, 2))

In [18]:
inputs = torch.from_numpy(inputs_array)
targets = torch.from_numpy(targets_array)

dataset = TensorDataset(inputs, targets )
num_samples = inputs.shape[0]
num_train = int(num_samples*0.8); num_val = num_samples-num_train
train_ds, val_ds = random_split(dataset,[num_train,num_val])
train_loader = DataLoader(train_ds, batch_size=256, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=256)

In [23]:
def training_loop(model, optimizer, loss_fn, n_epochs, train_loader, val_loader):
    for epoch in range(n_epochs):
        for inputs, labels in train_loader:
            inputs, labels = inputs.to('cuda'), labels.to('cuda')
            # inputs, labels = inputs, labels
            outputs = model(inputs)
            loss = loss_fn(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        if(epoch<=2 or epoch%10==0):
            correct=0;total=0
            with torch.no_grad():
                for inputs, labels in val_loader:
                    inputs, labels = inputs.to('cuda'), labels.to('cuda')
                    # inputs, labels = inputs , labels 
                    outputs = model(inputs)
                    _, predicted = torch.max(outputs, dim=1)
                    for label, p in zip(labels, predicted):
                        if(int(label[int(p)])==1):
                            correct+=1                    
                    total+= labels.shape[0]
            print(f"Epoch: {epoch:4d} Loss: {float(loss):4f}  accuracy: {correct}/{total} = {100.0*correct/total:4.1f}%")
        

In [25]:

model = nn.Sequential(
    nn.Linear(11,64), 
    nn.ReLU(), 
    nn.Linear(64,64), 
    nn.ReLU(),
    nn.Linear(64,2)
).to('cuda')
optimizer = optim.Adam(model.parameters(), lr = 1e-4)
loss_fn = nn.CrossEntropyLoss()
training_loop(model, optimizer, loss_fn,500, train_loader, val_loader)  

Epoch:    0 Loss: 0.790757  accuracy: 464/980 = 47.3%
Epoch:    1 Loss: 0.773850  accuracy: 558/980 = 56.9%
Epoch:    2 Loss: 0.741404  accuracy: 453/980 = 46.2%
Epoch:   10 Loss: 0.607320  accuracy: 617/980 = 63.0%
Epoch:   20 Loss: 0.541687  accuracy: 614/980 = 62.7%
Epoch:   30 Loss: 0.560864  accuracy: 628/980 = 64.1%
Epoch:   40 Loss: 0.554002  accuracy: 625/980 = 63.8%
Epoch:   50 Loss: 0.562459  accuracy: 650/980 = 66.3%
Epoch:   60 Loss: 0.556414  accuracy: 642/980 = 65.5%
Epoch:   70 Loss: 0.628238  accuracy: 625/980 = 63.8%
Epoch:   80 Loss: 0.510828  accuracy: 639/980 = 65.2%
Epoch:   90 Loss: 0.466213  accuracy: 631/980 = 64.4%
Epoch:  100 Loss: 0.496366  accuracy: 632/980 = 64.5%
Epoch:  110 Loss: 0.456326  accuracy: 648/980 = 66.1%
Epoch:  120 Loss: 0.584667  accuracy: 648/980 = 66.1%
Epoch:  130 Loss: 0.469737  accuracy: 654/980 = 66.7%
Epoch:  140 Loss: 0.594329  accuracy: 665/980 = 67.9%
Epoch:  150 Loss: 0.495311  accuracy: 668/980 = 68.2%
Epoch:  160 Loss: 0.509183  

In [None]:
# https://pkm294.tistory.com/45?category=997016

In [7]:
optimizer = optim.SGD(model.parameters(), lr = 1e-4)
training_loop(model, optimizer, loss_fn,500, train_loader, val_loader)

Epoch: 0 Loss: 0.393201  accuracy: 749/980 = 76.4%
Epoch: 1 Loss: 0.412156  accuracy: 761/980 = 77.7%
Epoch: 2 Loss: 0.319431  accuracy: 757/980 = 77.2%
Epoch: 100 Loss: 0.434837  accuracy: 756/980 = 77.1%
Epoch: 200 Loss: 0.503171  accuracy: 751/980 = 76.6%
Epoch: 300 Loss: 0.440710  accuracy: 756/980 = 77.1%
Epoch: 400 Loss: 0.448246  accuracy: 759/980 = 77.4%
