In [None]:
import numpy as np
import pandas as pd

data = pd.read_csv('/content/wine_quality.csv')
x = data.iloc[:, :-1].to_numpy()
y = data.iloc[:, -1].to_numpy()

labels = {
    'Medium': 0,
    'Good': 1,
    'Excellent': 2
}
y = np.array([labels[i] for i in y])

print(x.shape)
print(y.shape)

(4898, 11)
(4898,)


In [None]:
#Dataset

from torch.utils.data import Dataset, DataLoader
import torch

class WineDataset(Dataset):
  def __init__(self, x, y):
    self.x = x
    self.y = y

  def __len__(self):
    return self.x.shape[0]

  def __getitem__(self, idx):
    tx = torch.tensor(self.x, dtype=torch.float32)
    ty = torch.tensor(self.y, dtype=torch.long)
    return tx[idx], ty[idx]

In [None]:
#Model

import torch.nn as nn
import torch.optim as optim

class MyModel(nn.Module):
  def __init__(self, num_hid, hid_sz):
    self.num_hid = num_hid
    self.hid_sz = hid_sz
    self.accuracy_on_val = -1
    super().__init__()
    layers = [nn.Linear(11, hid_sz)] #input layer
    for i in range(num_hid-1):
      layers.append(nn.Sigmoid())
      layers.append(nn.Linear(hid_sz, hid_sz))
    layers.append(nn.Sigmoid())
    layers.append(nn.Linear(hid_sz, 3)) #output layer
    self.layers = nn.ModuleList(layers)

  def forward(self, x):
    for layer in self.layers:
      x = layer(x)
    return x

  def train(self, dataset, b_sz=300, num_epoch=10):
    opt = optim.Adam(self.parameters())
    loss_fn = nn.CrossEntropyLoss()
    loader = DataLoader(dataset, batch_size=b_sz, shuffle=True)

    for epoch in range(num_epoch):
      for i, (bx, by) in enumerate(loader):
        output = self(bx)
        loss = loss_fn(output, by)
        loss.backward()
        if epoch%10==0 and i==0:
          print('Epoch: %d, iter: %d, Loss: %f' % (epoch, i, loss.item()))

        opt.step()
        opt.zero_grad()

  def evaluate(self, dataset):
    loader = DataLoader(dataset, batch_size=100, shuffle=False)
    correct = 0
    for bx, by in loader:
      output = self(bx)
      pred = torch.argmax(output, dim=1)
      correct += (pred==by).sum().item()
    self.accuracy_on_val = correct/len(dataset)
    return self.accuracy_on_val

tmp = torch.rand(100, 11)
model = MyModel(3, 50)
print(model(tmp).shape)


torch.Size([100, 3])


In [None]:
m = x.shape[0]
m_train = int(m*0.6)
m_val = int(m*0.1)
m_test = m-m_train-m_val

ds_train = WineDataset(x[:m_train], y[:m_train])
ds_val = WineDataset(x[m_train:m_train+m_val], y[m_train:m_train+m_val])
ds_test = WineDataset(x[m_train+m_val:], y[m_train+m_val:])

In [None]:
#Training

num_hids = [2, 3, 4]
hid_szs = [25, 50, 100]

models = []
for num_hid in num_hids:
  for hid_sz in hid_szs:
    model = MyModel(num_hid, hid_sz)
    print('\n\nTraining model with num_hid=%d, hid_sz=%d' % (num_hid, hid_sz))
    model.train(ds_train, num_epoch=50)
    models.append(model)

for model in models:
  print('Accuracy on validation set: %f' % model.evaluate(ds_val))



Training model with num_hid=2, hid_sz=25
Epoch: 0, iter: 0, Loss: 1.205536
Epoch: 10, iter: 0, Loss: 1.046679
Epoch: 20, iter: 0, Loss: 1.027595
Epoch: 30, iter: 0, Loss: 1.010409
Epoch: 40, iter: 0, Loss: 0.993768


Training model with num_hid=2, hid_sz=50
Epoch: 0, iter: 0, Loss: 1.243775
Epoch: 10, iter: 0, Loss: 1.055571
Epoch: 20, iter: 0, Loss: 1.015911
Epoch: 30, iter: 0, Loss: 0.971202
Epoch: 40, iter: 0, Loss: 0.940706


Training model with num_hid=2, hid_sz=100
Epoch: 0, iter: 0, Loss: 1.079934
Epoch: 10, iter: 0, Loss: 1.017123
Epoch: 20, iter: 0, Loss: 1.002577
Epoch: 30, iter: 0, Loss: 0.958627
Epoch: 40, iter: 0, Loss: 0.939703


Training model with num_hid=3, hid_sz=25
Epoch: 0, iter: 0, Loss: 1.078858
Epoch: 10, iter: 0, Loss: 1.059983
Epoch: 20, iter: 0, Loss: 1.024975
Epoch: 30, iter: 0, Loss: 0.996827
Epoch: 40, iter: 0, Loss: 0.980152


Training model with num_hid=3, hid_sz=50
Epoch: 0, iter: 0, Loss: 1.121107
Epoch: 10, iter: 0, Loss: 1.078552
Epoch: 20, iter: 0,

In [None]:
for model in models:
  print('Accuracy of model (num_hid=%d, hid_sz=%d): %f' % (model.num_hid, model.hid_sz, model.accuracy_on_val))

Accuracy of model (num_hid=2, hid_sz=25): 0.429448
Accuracy of model (num_hid=2, hid_sz=50): 0.458078
Accuracy of model (num_hid=2, hid_sz=100): 0.466258
Accuracy of model (num_hid=3, hid_sz=25): 0.462168
Accuracy of model (num_hid=3, hid_sz=50): 0.478528
Accuracy of model (num_hid=3, hid_sz=100): 0.484663
Accuracy of model (num_hid=4, hid_sz=25): 0.478528
Accuracy of model (num_hid=4, hid_sz=50): 0.490798
Accuracy of model (num_hid=4, hid_sz=100): 0.494888


In [None]:
best_model = models[-1]

print('Accuracy on test set: %f' % best_model.evaluate(ds_test))
print('Accuracy on training set: %f' % best_model.evaluate(ds_train))

Accuracy on test set: 0.498300
Accuracy on training set: 0.552417
