In [None]:
import numpy as np
import pandas as pd

data = pd.read_csv('/content/wine_quality.csv')
x = data.iloc[:, :-1].to_numpy()
y = data.iloc[:, -1].to_numpy()

labels = {
    'Medium': 0,
    'Good': 1,
    'Excellent': 2
}
y = np.array([labels[i] for i in y])

print(x.shape)
print(y.shape)

(4898, 11)
(4898,)


In [None]:
#Dataset

from torch.utils.data import Dataset, DataLoader
import torch

class WineDataset(Dataset):
  def __init__(self, x, y):
    self.x = x
    self.y = y

  def __len__(self):
    return self.x.shape[0]

  def __getitem__(self, idx):
    tx = torch.tensor(self.x, dtype=torch.float32)
    ty = torch.tensor(self.y, dtype=torch.long)
    return tx[idx], ty[idx]

In [None]:
#Model

import torch.nn as nn
import torch.optim as optim

class MyModel(nn.Module):
  def __init__(self, hid_szs):
    super().__init__()
    self.hid_szs = hid_szs
    act_func = nn.ReLU()
    layers = nn.ModuleList()
    layers.append(nn.Linear(11, hid_szs[0]))
    layers.append(act_func)
    for i in range(len(hid_szs)-1):
      layers.append(nn.Linear(hid_szs[i], hid_szs[i+1]))
      layers.append(act_func)
    layers.append(nn.Linear(hid_szs[-1], 3))
    self.layers = layers

  def forward(self, x):
    for layer in self.layers:
      x = layer(x)
    return x

  def train(self, dataset, b_sz=300, num_epoch=10):
    opt = optim.Adam(self.parameters())
    loss_fn = nn.CrossEntropyLoss()
    loader = DataLoader(dataset, batch_size=b_sz, shuffle=True)

    for epoch in range(num_epoch):
      for i, (bx, by) in enumerate(loader):
        output = self(bx)
        loss = loss_fn(output, by)
        loss.backward()
        if epoch%10==0 and i==0:
          print('Epoch: %d, iter: %d, Loss: %f' % (epoch, i, loss.item()))

        opt.step()
        opt.zero_grad()

  def evaluate(self, dataset):
    loader = DataLoader(dataset, batch_size=100, shuffle=False)
    correct = 0
    for bx, by in loader:
      output = self(bx)
      pred = torch.argmax(output, dim=1)
      correct += (pred==by).sum().item()
    self.accuracy_on_val = correct/len(dataset)
    return self.accuracy_on_val

tmp = torch.rand(100, 11)
model = MyModel([32, 16, 8])
print(model(tmp).shape)


torch.Size([100, 3])


In [None]:
m = x.shape[0]
m_train = int(m*0.6)
m_val = int(m*0.1)
m_test = m-m_train-m_val

def feature_scaling(x):
  return (x-x.mean(axis=0))/x.std(axis=0)

x_scaled = feature_scaling(x)

ds_train = WineDataset(x_scaled[:m_train], y[:m_train])
ds_val = WineDataset(x_scaled[m_train:m_train+m_val], y[m_train:m_train+m_val])
ds_test = WineDataset(x_scaled[m_train+m_val:], y[m_train+m_val:])

In [None]:
#Training

num_hids = [2, 3, 4]
hid_szs = [25, 50, 100]

model = MyModel([64, 64, 32, 16])
model.train(ds_train, num_epoch=100)
print('Accuracy on validation set: %f' % model.evaluate(ds_val))

Epoch: 0, iter: 0, Loss: 1.080249
Epoch: 10, iter: 0, Loss: 0.863017
Epoch: 20, iter: 0, Loss: 0.799983
Epoch: 30, iter: 0, Loss: 0.725069
Epoch: 40, iter: 0, Loss: 0.695611
Epoch: 50, iter: 0, Loss: 0.650573
Epoch: 60, iter: 0, Loss: 0.664190
Epoch: 70, iter: 0, Loss: 0.584646
Epoch: 80, iter: 0, Loss: 0.593820
Epoch: 90, iter: 0, Loss: 0.610291
Accuracy on validation set: 0.529652


In [None]:
print('Accuracy on test set: %f' % model.evaluate(ds_test))
print('Accuracy on training set: %f' % model.evaluate(ds_train))

Accuracy on test set: 0.525493
Accuracy on training set: 0.796801
