# Introduction

This notebook predicts the `beer_style` using a neural network on the PyTorch
framework.

In [1]:
%load_ext autoreload
%autoreload 2

In [10]:
import pandas as pd
from pathlib import Path
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from category_encoders.binary import BinaryEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from joblib import dump, load

from src.data.sets import save_sets, load_sets
from src.visualization.visualize import classification_reports
from src.models.pytorch import PytorchClassification
from src.models.pytorch import get_device
from src.models.pytorch import train_classification
from src.models.pytorch import test_classification
from src.models.pytorch import PytorchDataset
from src.models.pipes import create_preprocessing_pipe
from src.visualization.visualize import plot_nn_model

# Set up directories

In [3]:
project_dir = Path.cwd().parent
data_dir = project_dir / 'data'
raw_data_dir = data_dir / 'raw'
interim_data_dir = data_dir / 'interim'
processed_data_dir = data_dir / 'processed'
reports_dir = project_dir / 'reports'
models_dir = project_dir / 'models'

# Load data

In [4]:
X_train, X_test, y_train, y_test = load_sets()

In [5]:
X_train

Unnamed: 0,brewery_name,review_aroma,review_appearance,review_palate,review_taste
0,Weyerbacher Brewing Co.,4.0,4.0,4.0,4.5
1,Ballast Point Brewing Company,4.0,4.0,4.0,4.5
2,10 Barrel Brewing Co.,3.5,3.5,3.5,3.5
3,Brouwerij Slaghmuylder,4.0,4.0,3.5,4.0
4,Dogfish Head Brewery,5.0,4.5,4.0,4.5
...,...,...,...,...,...
1269286,Camo Brewing Company,2.0,2.0,3.0,2.5
1269287,Shipyard Brewing Co.,3.5,4.0,4.0,4.0
1269288,Port Brewing Company / Pizza Port,4.0,4.0,4.0,3.5
1269289,Ithaca Beer Company,3.5,4.0,4.0,4.0


Check for excluded classes.

In [6]:
set(y_train.unique()) - set(y_test.unique())

set()

In [7]:
set(y_test.unique()) - set(y_train.unique())

set()

# Preprocess data

1. The `brewery_name` is a feature with a very high cardinality, ~5700. One hot encoding is not feasible as it will introduce 5700 very sparse columns. Another option is to use binary encoding, which would result in 14 new columns.
1. Standard scaling is used to ensure that the binary columns ([0, 1])and the review columns ([1, 5]) are on the same scale.

In [8]:
pipe = Pipeline([
    ('bin_encoder', BinaryEncoder(cols=['brewery_name'])),
    ('scaler', StandardScaler())
])

In [54]:
X_train_trans = pipe.fit_transform(X_train)
X_test_trans = pipe.transform(X_test)

In [55]:
X_train_trans.shape

(1269291, 18)

In [56]:
n_features = X_train_trans.shape[1]
n_features

18

In [57]:
n_classes = y_train.nunique()
n_classes

104

## Encoding

PyTorch accepts only numerical labels.

In [15]:
le = LabelEncoder()
y_train_trans = le.fit_transform(y_train.to_frame())
y_test_trans = le.transform(y_test.to_frame())

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [34]:
y_test_trans

array([ 4, 35, 47, ..., 17, 12, 84])

## Convert to Pytorch tensors

In [35]:
device = get_device()
device

device(type='cuda', index=0)

In [36]:
train_dataset = PytorchDataset(X=X_train_trans, y=y_train_trans)
test_dataset = PytorchDataset(X=X_test_trans, y=y_test_trans)

# Classification model

In [15]:
model = PytorchClassification(n_features=n_features, n_classes=n_classes)

In [16]:
model.to(device)

PytorchClassification(
  (layer_1): Linear(in_features=18, out_features=512, bias=True)
  (layer_2): Linear(in_features=512, out_features=128, bias=True)
  (layer_3): Linear(in_features=128, out_features=64, bias=True)
  (layer_out): Linear(in_features=64, out_features=104, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.2, inplace=False)
  (batchnorm1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm3): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)

In [17]:
criterion = nn.CrossEntropyLoss()

In [18]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Train the model

In [19]:
N_EPOCHS = 10
BATCH_SIZE = 512
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.9)

In [23]:
for epoch in range(N_EPOCHS):
    train_loss, train_acc = train_classification(train_dataset,
                                                 model=model,
                                                 criterion=criterion, 
                                                 optimizer=optimizer,
                                                 batch_size=BATCH_SIZE,
                                                 device=device,
                                                 scheduler=scheduler)
    valid_loss, valid_acc = test_classification(test_dataset,
                                                model=model,
                                                criterion=criterion, 
                                                batch_size=BATCH_SIZE, 
                                                device=device)

    print(f'Epoch: {epoch}')
    print(f'\t(train)\tLoss: {train_loss:.4f}\t|\tAcc: {train_acc * 100:.1f}%')
    print(f'\t(valid)\tLoss: {valid_loss:.4f}\t|\tAcc: {valid_acc * 100:.1f}%')


Epoch: 0
	(train)	Loss: 0.0055	|	Acc: 25.0%
	(valid)	Loss: 0.0051	|	Acc: 28.0%
Epoch: 1
	(train)	Loss: 0.0054	|	Acc: 25.4%
	(valid)	Loss: 0.0050	|	Acc: 28.1%
Epoch: 2
	(train)	Loss: 0.0054	|	Acc: 25.7%
	(valid)	Loss: 0.0050	|	Acc: 28.5%
Epoch: 3
	(train)	Loss: 0.0053	|	Acc: 26.0%
	(valid)	Loss: 0.0050	|	Acc: 28.7%
Epoch: 4
	(train)	Loss: 0.0053	|	Acc: 26.2%
	(valid)	Loss: 0.0049	|	Acc: 28.9%
Epoch: 5
	(train)	Loss: 0.0053	|	Acc: 26.3%
	(valid)	Loss: 0.0049	|	Acc: 28.8%
Epoch: 6
	(train)	Loss: 0.0052	|	Acc: 26.4%
	(valid)	Loss: 0.0049	|	Acc: 29.1%
Epoch: 7
	(train)	Loss: 0.0052	|	Acc: 26.6%
	(valid)	Loss: 0.0049	|	Acc: 29.1%
Epoch: 8
	(train)	Loss: 0.0052	|	Acc: 26.6%
	(valid)	Loss: 0.0049	|	Acc: 29.3%
Epoch: 9
	(train)	Loss: 0.0052	|	Acc: 26.7%
	(valid)	Loss: 0.0049	|	Acc: 29.3%


# Prediction

In [40]:
preds = model(test_dataset.X_tensor.to(device)).argmax(1)
preds

tensor([12, 60, 47,  ..., 11, 60, 47], device='cuda:0')

# Plot model

In [44]:
# TODO
plot_nn_model(model=model,
              preds=preds)

PermissionError: [Errno 13] Permission denied: 'D:\\git\\assignment_2\\reports'

In [45]:
from torchviz import make_dot
make_dot(preds, params=dict(list(model.named_parameters())))

ExecutableNotFound: failed to execute ['dot', '-Kdot', '-Tsvg'], make sure the Graphviz executables are on your systems' PATH

<graphviz.dot.Digraph at 0x23e8a3757c8>

# Save objects for production

## Save model

In [81]:
path = models_dir / '2_pytorch'
torch.save(model, path.with_suffix('.torch'))

## Create pipe object

This is for transforming the input prior to prediction.

In [33]:
X = pd.concat([X_train, X_test])
prod_pipe = create_preprocessing_pipe(X)

path = models_dir / 'pipe'
dump(prod_pipe, path.with_suffix('.sav'))

['D:\\git\\assignment_2\\models\\pipe.sav']

## Save `LabelEncoder`

This is required to get back the name of the name of the `beer_style`.

In [16]:
path = models_dir / 'label_encoder'
dump(le, path.with_suffix('.sav'))

['D:\\git\\assignment_2\\models\\label_encoder.sav']

PytorchClassification(
  (layer_1): Linear(in_features=18, out_features=512, bias=True)
  (layer_2): Linear(in_features=512, out_features=128, bias=True)
  (layer_3): Linear(in_features=128, out_features=64, bias=True)
  (layer_out): Linear(in_features=64, out_features=104, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.2, inplace=False)
  (batchnorm1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm3): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)