In [1]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader,TensorDataset, random_split

import torch.nn as nn
import torch.optim as optim

In [2]:
dataframe_raw = pd.read_csv("./winequality-white.csv",delimiter=";")

In [57]:
def dataframe_to_arrays(dataframe):
    input_cols = list(dataframe.columns)[:-1]
    output_cols=list(dataframe.columns)[-1:]
    dataframe1 = dataframe_raw.copy(deep=True)
    inputs_array = dataframe1[input_cols].to_numpy(dtype=np.float32)
    targets_array = dataframe1[output_cols].to_numpy(dtype=int).squeeze(1)-3
    encoding =np.eye(7)[targets_array]

    return inputs_array, encoding
inputs_array, targets_array = dataframe_to_arrays(dataframe_raw)
inputs_array.shape, targets_array.shape

((4898, 11), (4898, 7))

In [58]:
inputs = torch.from_numpy(inputs_array)
targets = torch.from_numpy(targets_array)

dataset = TensorDataset(inputs, targets )
num_samples = inputs.shape[0]
num_train = int(num_samples*0.8); num_val = num_samples-num_train
train_ds, val_ds = random_split(dataset,[num_train,num_val])
train_loader = DataLoader(train_ds, batch_size=50, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=50)

In [70]:
def training_loop(model, optimizer, loss_fn, n_epochs, train_loader, val_loader):
    for epoch in range(n_epochs):
        for inputs, labels in train_loader:
            inputs, labels = inputs.to('cuda'), labels.to('cuda')
            outputs = model(inputs)
            loss = loss_fn(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        if(epoch<=2 or epoch%100==0):
            correct=0;total=0
            with torch.no_grad():
                for inputs, labels in val_loader:
                    inputs, labels = inputs.to('cuda'), labels.to('cuda')
                    outputs = model(inputs)
                    _, predicted = torch.max(outputs, dim=1)
                    for label, p in zip(labels, predicted):
                        if(int(label[int(p)])==1):
                            correct+=1                    
                    total+= labels.shape[0]
            print(f"Epoch: {epoch} Loss: {float(loss)}  accuracy: {correct}/{total} = {100.0*correct/total:4.1f}%")
        

In [72]:

model = nn.Sequential(
    nn.Linear(11,100), 
    nn.ReLU(), 
    nn.Linear(100,100), 
    nn.ReLU(), 
    nn.Linear(100,100),
    nn.ReLU(),
    nn.Linear(100,7)
).to('cuda')
optimizer = optim.SGD(model.parameters(), lr = 1e-4)
loss_fn = nn.CrossEntropyLoss()
training_loop(model, optimizer, loss_fn,2000, train_loader, val_loader)

Epoch: 0 Loss: 1.547162695063485  accuracy: 456/980 = 46.5%
Epoch: 1 Loss: 1.0841140117910173  accuracy: 440/980 = 44.9%
Epoch: 2 Loss: 1.1337741679615443  accuracy: 453/980 = 46.2%
Epoch: 100 Loss: 1.2473480701446533  accuracy: 443/980 = 45.2%
Epoch: 200 Loss: 1.4056227670775518  accuracy: 448/980 = 45.7%
Epoch: 300 Loss: 1.9363383187188041  accuracy: 459/980 = 46.8%
Epoch: 400 Loss: 1.0530235601796043  accuracy: 455/980 = 46.4%
Epoch: 500 Loss: 0.953823255168067  accuracy: 450/980 = 45.9%
Epoch: 600 Loss: 1.137210930387179  accuracy: 448/980 = 45.7%
Epoch: 700 Loss: 1.3084136810567644  accuracy: 456/980 = 46.5%
Epoch: 800 Loss: 1.1865714655982122  accuracy: 454/980 = 46.3%
Epoch: 900 Loss: 1.3573980463875663  accuracy: 458/980 = 46.7%
Epoch: 1000 Loss: 1.1618153088622623  accuracy: 453/980 = 46.2%
Epoch: 1100 Loss: 1.159296327167087  accuracy: 458/980 = 46.7%
Epoch: 1200 Loss: 1.4308652944034999  accuracy: 461/980 = 47.0%
Epoch: 1300 Loss: 1.1232680214775932  accuracy: 456/980 = 46.5

In [None]:
# https://pkm294.tistory.com/45?category=997016