In [1]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader,TensorDataset, random_split

import torch.nn as nn
import torch.optim as optim

In [2]:
dataframe_raw = pd.read_csv("./winequality-white.csv",delimiter=";")

In [6]:
def dataframe_to_arrays(dataframe):
    input_cols = list(dataframe.columns)[:-1]
    output_cols=list(dataframe.columns)[-1:]
    dataframe1 = dataframe_raw.copy(deep=True)
    inputs_array = dataframe1[input_cols].to_numpy(dtype=np.float32)
    targets_array = dataframe1[output_cols].to_numpy(dtype=int).squeeze(1)
    targets_array[targets_array<=5]=0
    targets_array[targets_array>5]=1   
    print(targets_array)
    encoding =np.eye(2)[targets_array]

    return inputs_array, encoding
inputs_array, targets_array = dataframe_to_arrays(dataframe_raw)
inputs_array.shape, targets_array.shape

[1 1 1 ... 1 1 1]


((4898, 11), (4898, 2))

In [7]:
inputs = torch.from_numpy(inputs_array)
targets = torch.from_numpy(targets_array)

dataset = TensorDataset(inputs, targets )
num_samples = inputs.shape[0]
num_train = int(num_samples*0.8); num_val = num_samples-num_train
train_ds, val_ds = random_split(dataset,[num_train,num_val])
train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=64)

In [19]:
def training_loop(model, optimizer, loss_fn, n_epochs, train_loader, val_loader):
    for epoch in range(n_epochs):
        for inputs, labels in train_loader:
            # inputs, labels = inputs.to('cuda'), labels.to('cuda')
            inputs, labels = inputs, labels
            outputs = model(inputs)
            loss = loss_fn(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        if(epoch<=2 or epoch%100==0):
            correct=0;total=0
            with torch.no_grad():
                for inputs, labels in val_loader:
                    # inputs, labels = inputs.to('cuda'), labels.to('cuda')
                    inputs, labels = inputs , labels 
                    outputs = model(inputs)
                    _, predicted = torch.max(outputs, dim=1)
                    for label, p in zip(labels, predicted):
                        if(int(label[int(p)])==1):
                            correct+=1                    
                    total+= labels.shape[0]
            print(f"Epoch: {epoch} Loss: {float(loss):4f}  accuracy: {correct}/{total} = {100.0*correct/total:4.1f}%")
        

In [20]:

model = nn.Sequential(
    nn.Linear(11,64), 
    nn.ReLU(), 
    nn.Linear(64,64), 
    nn.ReLU(),
    nn.Linear(64,2)
)#.to('cuda')
optimizer = optim.Adam(model.parameters(), lr = 1e-3)
loss_fn = nn.CrossEntropyLoss()
training_loop(model, optimizer, loss_fn,500, train_loader, val_loader)

Epoch: 0 Loss: 0.543963  accuracy: 652/980 = 66.5%
Epoch: 1 Loss: 0.156139  accuracy: 650/980 = 66.3%
Epoch: 2 Loss: 0.754717  accuracy: 646/980 = 65.9%
Epoch: 100 Loss: 0.509321  accuracy: 729/980 = 74.4%
Epoch: 200 Loss: 0.547864  accuracy: 737/980 = 75.2%
Epoch: 300 Loss: 0.610639  accuracy: 731/980 = 74.6%
Epoch: 400 Loss: 0.468597  accuracy: 742/980 = 75.7%


In [None]:
# https://pkm294.tistory.com/45?category=997016

In [21]:
optimizer = optim.SGD(model.parameters(), lr = 1e-5)
training_loop(model, optimizer, loss_fn,500, train_loader, val_loader)

Epoch: 0 Loss: 0.281384  accuracy: 744/980 = 75.9%
Epoch: 1 Loss: 0.256302  accuracy: 751/980 = 76.6%
Epoch: 2 Loss: 0.367107  accuracy: 752/980 = 76.7%
Epoch: 100 Loss: 0.499860  accuracy: 753/980 = 76.8%
Epoch: 200 Loss: 0.738450  accuracy: 752/980 = 76.7%
Epoch: 300 Loss: 0.539132  accuracy: 752/980 = 76.7%
Epoch: 400 Loss: 0.557846  accuracy: 752/980 = 76.7%
