In [1]:
%matplotlib inline
import pandas as pd
import os
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable

In [2]:
PATH = './data'

In [3]:
! ls {PATH}

winequality-red.csv  winequality-white.csv


In [4]:
os.listdir(PATH)

['winequality-red.csv', 'winequality-white.csv']

In [5]:
df = pd.read_csv(f'{PATH}/winequality-red.csv',sep = ';',quotechar = '"')

In [6]:
print(df.shape)

(1599, 12)


In [7]:
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [8]:
df.columns

Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol', 'quality'],
      dtype='object')

In [9]:
df['quality'].value_counts()

5    681
6    638
7    199
4     53
8     18
3     10
Name: quality, dtype: int64

#### High Quality Classification

In [10]:
def is_high_quality(quality):
    if  quality > 6:
        return 1 
    else :
        return 0

In [11]:
df['qualitybool'] = df['quality'].apply(is_high_quality)

In [12]:
X = df.drop(['qualitybool','quality'],axis = 1)
target = df['qualitybool']

In [13]:
print('XShape:{},targetShape{}'.format(X.shape,target.shape))

XShape:(1599, 11),targetShape(1599,)


In [14]:
X_train, X_test, target_train, target_test = train_test_split(X,target,test_size = 0.33,random_state=123)

The input has 11 columns. So the input layer should have 11 rows amd any no of c columns.Final output should have 1 column and (how many rows ?)

In [15]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.l1 = nn.Linear(11,50)
        self.l2 = nn.Linear(50,2)
    
    def forward(self,x):
        x = F.relu(self.l1(x))
        return self.l2(x)
    

In [16]:
model = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(),lr=0.01,momentum = 0.5)

In [17]:
def train_data(epochs,bz):
    for epoch in range(epochs):
        train_loss = 0
        batch = 0
        # Need to convert the data from Numpy to Tensor
        for start, end in zip(range(0, len(X_train), bz), 
                              range(bz, len(X_train), bz)):
            inputs = torch.from_numpy((X_train[start:end]).as_matrix())
            inputs = inputs.float()
            #labels = torch.Tensor(target_train[start:end])
            labels = torch.from_numpy((target_train[start:end]).as_matrix())
            labels = labels.long()
            
            # wrap them in variable
            inputs = Variable(inputs)
            labels = Variable(labels)
            
            # zero the parameter gradients
            optimizer.zero_grad()
            
            outputs = model(inputs)
            loss = criterion(outputs,labels)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.data[0]
            batch += 1
            #if i % 1 == 0:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %(epoch + 1, batch + 1, train_loss / 100))
        train_loss = 0.0
    print('Training Completed !')
    

In [18]:
bz = 100
epochs = 10

In [19]:
train_data(20,100)

[1,     2] loss: 0.039
[1,     3] loss: 0.090
[1,     4] loss: 0.124
[1,     5] loss: 0.152
[1,     6] loss: 0.175
[1,     7] loss: 0.201
[1,     8] loss: 0.206
[1,     9] loss: 0.218
[1,    10] loss: 0.224
[1,    11] loss: 0.232
[2,     2] loss: 0.005
[2,     3] loss: 0.016
[2,     4] loss: 0.020
[2,     5] loss: 0.025
[2,     6] loss: 0.028
[2,     7] loss: 0.033
[2,     8] loss: 0.035
[2,     9] loss: 0.040
[2,    10] loss: 0.044
[2,    11] loss: 0.048
[3,     2] loss: 0.003
[3,     3] loss: 0.010
[3,     4] loss: 0.014
[3,     5] loss: 0.017
[3,     6] loss: 0.021
[3,     7] loss: 0.025
[3,     8] loss: 0.028
[3,     9] loss: 0.033
[3,    10] loss: 0.037
[3,    11] loss: 0.041
[4,     2] loss: 0.003
[4,     3] loss: 0.009
[4,     4] loss: 0.013
[4,     5] loss: 0.016
[4,     6] loss: 0.020
[4,     7] loss: 0.024
[4,     8] loss: 0.027
[4,     9] loss: 0.032
[4,    10] loss: 0.035
[4,    11] loss: 0.039
[5,     2] loss: 0.003
[5,     3] loss: 0.009
[5,     4] loss: 0.012
[5,     5] 

In [48]:
def test_data(bz):
    test_loss = 0
    batch = 0
    correct = 0
    # Need to convert the data from Numpy to Tensor
    for start, end in zip(range(0, len(X_test), bz), 
                          range(bz, len(X_test), bz)):
        inputs = torch.from_numpy((X_test[start:end]).as_matrix())
        inputs = inputs.float()
        #labels = torch.Tensor(target_train[start:end])
        labels = torch.from_numpy((target_test[start:end]).as_matrix())
        labels = labels.long()

        # wrap them in variable
        inputs = Variable(inputs)
        labels = Variable(labels)

        # zero the parameter gradients
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs,labels)
        #loss.backward()
        #optimizer.step()
        _, preds = torch.max(outputs.data , 1)
        correct += preds.eq(labels.data.view_as(preds)).sum()
        test_loss += loss.data[0]
        batch += 1
        #if i % 1 == 0:    # print every 2000 mini-batches
        print('Accuracy:{}'.format(correct/bz))
        print('[%5d] loss: %.3f' %(batch, test_loss / 100))
    print('Test Completed !')
        

In [49]:
test_data(500)

Accuracy:0.872
[    1] loss: 0.004
Test Completed !


The Accuracy with this model is  87%

More info :https://github.com/PythonWorkshop/Intro-to-TensorFlow-and-PyTorch/blob/master/PyTorch%20Tutorial.ipynb