In [63]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
from torch.nn import functional as F

In [64]:
df = pd.read_csv("./data/wine_data.csv")

In [65]:
df.head()

Unnamed: 0,Class,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,OD280/OD315 of diluted wines,Proline
0,0,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,0,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,0,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,0,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,0,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


In [66]:
df.shape

(178, 14)

In [67]:
df.describe()

Unnamed: 0,Class,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,OD280/OD315 of diluted wines,Proline
count,178.0,178.0,178.0,178.0,178.0,178.0,178.0,178.0,178.0,178.0,178.0,178.0,178.0,178.0
mean,0.938202,13.000618,2.336348,2.366517,19.494944,99.741573,2.295112,2.02927,0.361854,1.590899,5.05809,0.957449,2.611685,746.893258
std,0.775035,0.811827,1.117146,0.274344,3.339564,14.282484,0.625851,0.998859,0.124453,0.572359,2.318286,0.228572,0.70999,314.907474
min,0.0,11.03,0.74,1.36,10.6,70.0,0.98,0.34,0.13,0.41,1.28,0.48,1.27,278.0
25%,0.0,12.3625,1.6025,2.21,17.2,88.0,1.7425,1.205,0.27,1.25,3.22,0.7825,1.9375,500.5
50%,1.0,13.05,1.865,2.36,19.5,98.0,2.355,2.135,0.34,1.555,4.69,0.965,2.78,673.5
75%,2.0,13.6775,3.0825,2.5575,21.5,107.0,2.8,2.875,0.4375,1.95,6.2,1.12,3.17,985.0
max,2.0,14.83,5.8,3.23,30.0,162.0,3.88,5.08,0.66,3.58,13.0,1.71,4.0,1680.0


# DATA WRANGLING

In [68]:
df.isna().sum()

Class                           0
Alcohol                         0
Malic acid                      0
Ash                             0
Alcalinity of ash               0
Magnesium                       0
Total phenols                   0
Flavanoids                      0
Nonflavanoid phenols            0
Proanthocyanins                 0
Color intensity                 0
Hue                             0
OD280/OD315 of diluted wines    0
Proline                         0
dtype: int64

In [69]:
df.duplicated().sum()

0

# MACHINE LEARNING
### ML PREP

In [70]:
# Encode target labels with value between 0 and n_classes-1.

from sklearn.preprocessing import LabelEncoder


In [71]:
le = LabelEncoder()
df['Class'] = le.fit_transform(df['Class'])
df.sample(10)

Unnamed: 0,Class,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,OD280/OD315 of diluted wines,Proline
52,0,13.82,1.75,2.42,14.0,111,3.88,3.74,0.32,1.87,7.05,1.01,3.26,1190
100,1,12.08,2.08,1.7,17.5,97,2.23,2.17,0.26,1.4,3.3,1.27,2.96,710
42,0,13.88,1.89,2.59,15.0,101,3.25,3.56,0.17,1.7,5.43,0.88,3.56,1095
154,2,12.58,1.29,2.1,20.0,103,1.48,0.58,0.53,1.4,7.6,0.58,1.55,640
87,1,11.65,1.67,2.62,26.0,88,1.92,1.61,0.4,1.34,2.6,1.36,3.21,562
145,2,13.16,3.57,2.15,21.0,102,1.5,0.55,0.43,1.3,4.0,0.6,1.68,830
114,1,12.08,1.39,2.5,22.5,84,2.56,2.29,0.43,1.04,2.9,0.93,3.19,385
63,1,12.37,1.13,2.16,19.0,87,3.5,3.1,0.19,1.87,4.45,1.22,2.87,420
38,0,13.07,1.5,2.1,15.5,98,2.4,2.64,0.28,1.37,3.7,1.18,2.69,1020
58,0,13.72,1.43,2.5,16.7,108,3.4,3.67,0.19,2.04,6.8,0.89,2.87,1285


In [72]:
df['Class'].unique()

array([0, 1, 2], dtype=int64)

### SEPARATE FETURES AND TARGET

In [73]:
df_features = df.drop('Class', axis=1)
df_features.head()

Unnamed: 0,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,OD280/OD315 of diluted wines,Proline
0,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


In [74]:
df_target = df[['Class']]
df_target.head()

Unnamed: 0,Class
0,0
1,0
2,0
3,0
4,0


In [75]:
#SPLIT DATA
from sklearn.model_selection import train_test_split

In [76]:
X_train, x_test, Y_train, y_test = train_test_split(df_features, 
                                                    df_target,
                                                    test_size=0.3,
                                                     random_state=42)

In [77]:
X_train.shape, x_test.shape,

((124, 13), (54, 13))

In [78]:
 Y_train.shape, y_test.shape

((124, 1), (54, 1))

### Convert data to Tensors for Pytorch

In [79]:
Xtrain = torch.from_numpy(X_train.values).float()
Xtest = torch.from_numpy(x_test.values).float()
print(Xtrain.shape, Xtest.shape)

torch.Size([124, 13]) torch.Size([54, 13])


In [80]:
print(Xtrain.dtype, Xtest.dtype)

torch.float32 torch.float32


We have successfully converted our  X_data into torch tensors of float32 datatype

In [81]:
Ytrain = torch.from_numpy(Y_train.values).view(1,-1)[0]
Ytest = torch.from_numpy(y_test.values).view(1, -1)[0]
print(Ytrain.shape, Ytest.shape)

torch.Size([124]) torch.Size([54])


We use the **view()** to reshape the tensor.<br>
The loss function doesn't support multi-target and therefore, we should use a 1D Tensor of 1 row containing the labels.<br>
We have successfully converted our y_data

In [82]:
print(Ytrain.dtype, Ytest.dtype)

torch.int64 torch.int64


## PyTorch
### We create a classifier and define our neural network for our model

### Hyperparameterss

In [83]:
input_size = 13
output_size = 3
hidden_size = 100

### Define the neural network


class Net(nn.Module):
# define the linear model with input, hidden aand output sizes
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(input_size, hidden_size)
        self.fc3 = nn.Linear(input_size, output_size)

# define the forward network with sigmoid activation
    def forward(self, X):
        X = torch.sigmoid((self.fc1(X)))
        X = torch.sigmoid((self.fc2(X)))
        X = self.fc3(X)

        return F.log_softmax(X, dim=-1)

class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)

    def forward(self, X):
        X = torch.sigmoid((self.fc1(X)))
        X = torch.sigmoid(self.fc2(X))
        X = self.fc3(X)

        return F.log_softmax(X, dim=-1)

In [84]:
# instantiate our model
model = Net()

In [85]:
# preview our model
model

Net(
  (fc1): Linear(in_features=13, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=100, bias=True)
  (fc3): Linear(in_features=100, out_features=3, bias=True)
)

### Define Optimizer and Loss Function

In [86]:
import torch.optim as optim

In [87]:
optimizer = optim.Adam(model.parameters(), lr=0.01)
loss_fn = nn.NLLLoss()

In [88]:
epochs = 1000

for epoch in range(epochs):
    optimizer.zero_grad()
    Ypred = model(Xtrain)

    loss = loss_fn(Ypred, Ytrain)
    loss.backward()

    optimizer.step()

    print('Epoch: ', epoch, "Loss: ", loss.item())

Epoch:  0 Loss:  1.133582353591919
Epoch:  1 Loss:  1.1829636096954346
Epoch:  2 Loss:  1.1547188758850098
Epoch:  3 Loss:  1.0816279649734497
Epoch:  4 Loss:  1.0735265016555786
Epoch:  5 Loss:  1.093727946281433
Epoch:  6 Loss:  1.0803120136260986
Epoch:  7 Loss:  1.0541654825210571
Epoch:  8 Loss:  1.0367985963821411
Epoch:  9 Loss:  1.0291202068328857
Epoch:  10 Loss:  1.0195050239562988
Epoch:  11 Loss:  1.006834626197815
Epoch:  12 Loss:  0.9895039200782776
Epoch:  13 Loss:  0.9652305841445923
Epoch:  14 Loss:  0.9357722997665405
Epoch:  15 Loss:  0.9143531322479248
Epoch:  16 Loss:  0.8909392356872559
Epoch:  17 Loss:  0.877334475517273
Epoch:  18 Loss:  0.8502752184867859
Epoch:  19 Loss:  0.8255081176757812
Epoch:  20 Loss:  0.8000901937484741
Epoch:  21 Loss:  0.7743162512779236
Epoch:  22 Loss:  0.7550342679023743
Epoch:  23 Loss:  0.7381184697151184
Epoch:  24 Loss:  0.7155164480209351
Epoch:  25 Loss:  0.698341429233551
Epoch:  26 Loss:  0.6860606074333191
Epoch:  27 Loss:

### Save model to disk

In [109]:
torch.save(model.state_dict(), "./models/home_state_dict.pt")

### Load model from Disk

In [110]:
new_model = Net()

In [117]:
new_model.load_state_dict(torch.load("./models/home_state_dict.pt"))

<All keys matched successfully>

In [118]:
new_model.eval()

Net(
  (fc1): Linear(in_features=13, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=100, bias=True)
  (fc3): Linear(in_features=100, out_features=3, bias=True)
)

In [119]:
predict = new_model(Xtest)
_, predict_y = torch.max(predict, 1)

In [137]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, classification_report

In [123]:
print("accuracy_score",accuracy_score(Ytest, predict_y))
print("precision_score",precision_score(Ytest, predict_y, average='weighted'))
print("recall_score",recall_score(Ytest, predict_y, average="weighted"))


accuracy_score 0.9444444444444444
precision_score 0.9453703703703703
recall_score 0.9444444444444444


In [138]:
print(classification_report(Ytest,predict_y, ))

              precision    recall  f1-score   support

           0       0.90      0.95      0.92        19
           1       0.95      0.90      0.93        21
           2       1.00      1.00      1.00        14

    accuracy                           0.94        54
   macro avg       0.95      0.95      0.95        54
weighted avg       0.95      0.94      0.94        54



In [122]:
confusion_matrix(Ytest, predict_y)

array([[18,  1,  0],
       [ 2, 19,  0],
       [ 0,  0, 14]], dtype=int64)