In [0]:
wine_dataset = '/content/drive/My Drive/Colab Notebooks/Datasets/winequality-red.csv'

In [0]:
#imports 
import numpy as np
import math
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset, random_split

In [50]:
df = pd.read_csv(wine_dataset, delimiter=';')
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [51]:
features = df.drop(['quality'], axis = 1)
targets = df.iloc[:, -1]
targets.unique()

array([5, 6, 7, 4, 8, 3])

In [52]:
train_size = int((.8 * len(df))) # 80% of dataset for training

val_size = math.ceil(.1 * len(df))  # 10% for validation

test_size = math.ceil((.1 * len(df))) # 10% for testing

train_size, val_size, test_size

(1279, 160, 160)

In [0]:
dataset = TensorDataset(torch.Tensor(np.array(features)).float(), torch.Tensor(targets).long() - 3) # numpy arrays to torch tensors

train_ds, val_ds, test_ds = random_split(dataset, [train_size, val_size, test_size])


In [0]:
batch_size = 64
# Dataloader

train_loader = DataLoader(train_ds, batch_size, shuffle = True)
val_loader = DataLoader(val_ds, batch_size)
test_loader = DataLoader(test_ds, batch_size)

In [0]:
input_size = len(df.columns) - 1
output_size = 6
threshold = .5


In [0]:
class WineModel(nn.Module):
  
  def __init__(self):
    super().__init__()
    self.linear = nn.Linear(input_size, output_size)
  
  def forward(self, xb):
    out = self.linear(xb)
    return out

In [0]:
model = WineModel()

In [58]:
n_iters = 8000
num_epochs = n_iters / (len(train_ds) / batch_size)
num_epochs = int(num_epochs)
num_epochs
for p in model.parameters():
  print(p.data)

tensor([[-0.1528, -0.2056,  0.0508, -0.1019,  0.0559, -0.2409,  0.0814, -0.1260,
          0.2536,  0.0832,  0.2067],
        [ 0.2674, -0.1047,  0.2256,  0.0791, -0.2002,  0.2370,  0.2685, -0.0535,
         -0.1985, -0.2632,  0.0884],
        [-0.2106,  0.2063, -0.2830,  0.1223,  0.0462, -0.0174, -0.2070,  0.0792,
         -0.1589,  0.1398,  0.2058],
        [ 0.2655,  0.1918, -0.1071,  0.1255,  0.1698, -0.2272,  0.0717,  0.1699,
          0.2990, -0.2717, -0.0149],
        [ 0.1049, -0.0683, -0.2175,  0.0332,  0.2611, -0.0806, -0.2597,  0.2054,
          0.2800, -0.2699, -0.2454],
        [-0.2448,  0.0965,  0.0180,  0.1299, -0.0208,  0.0658, -0.2169, -0.2523,
         -0.2942, -0.0825, -0.1590]])
tensor([ 0.0920, -0.1472, -0.1999, -0.0460,  0.0115, -0.1112])


In [0]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-7)

In [0]:

for epoch in range(num_epochs):
  for i, (x, y) in enumerate(train_loader):
    outputs = model(x)
    loss = criterion(outputs, y)
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

  for z, (r, w) in enumerate(val_loader):
    outputs = model(r)
    loss = criterion(outputs, w)
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

  if (epoch+1 % 250) == 0:
    print('hello')


In [0]:
def predict(features, model):
  with torch.no_grad():
    y_pred = model(features)
    y_pred = F.softmax(y_pred.unsqueeze(0), dim=1)
    print(y_pred)
    _, preds = torch.max(y_pred, 1)
    print(preds)
    return preds[0]

In [79]:
x, y = dataset[3]
print('act_val: {}, pred: {}'.format(y, predict(x, model)))
print(x)

tensor([[0.0006, 0.0444, 0.3459, 0.4664, 0.1410, 0.0017]])
tensor([3])
act_val: 3, pred: 3
tensor([11.2000,  0.2800,  0.5600,  1.9000,  0.0750, 17.0000, 60.0000,  0.9980,
         3.1600,  0.5800,  9.8000])
