## Checkpoint PyTorch Models
Store model and optimizer state_dict and epochs, score data to resume training at later time

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score

In [2]:
wine = pd.read_csv('wine_data.csv')
wine.head(5)

Unnamed: 0,Class,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,OD280/OD315 of diluted wines,Proline
0,0,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,0,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,0,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,0,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,0,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [4]:
x = wine.drop('Class', axis=1)
y = wine['Class']

X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size=0.25)

x_train = torch.tensor(X_train.values, device=device, dtype=torch.float)
x_test = torch.tensor(X_test.values, device=device, dtype=torch.float)
y_train = torch.from_numpy(Y_train.values).view(-1).long()
y_test = torch.from_numpy(Y_test.values).view(-1).long()

y_train = y_train.to(device)
y_test = y_test.to(device)

In [5]:
input_size = len(x.columns)
hidden_size = 100
output_size = y.nunique()

In [6]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)
        
    def forward(self, X):
        X = torch.sigmoid(self.fc1(X))
        X = torch.sigmoid(self.fc2(X))
        X = self.fc3(X)
        
        return F.log_softmax(X, dim=-1)

In [7]:
model = Net()
model.to(device)

Net(
  (fc1): Linear(in_features=13, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=100, bias=True)
  (fc3): Linear(in_features=100, out_features=3, bias=True)
)

In [8]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = nn.NLLLoss()

In [9]:
epochs = 200

for epoch in range(epochs+1):
    optimizer.zero_grad()
    y_pred = model(x_train)
    loss = loss_fn(y_pred, y_train)
    loss.backward()
    optimizer.step()
    
    if epoch%50 == 0:
        print(f'| Epoch: {epoch:02} | Loss: {loss.item():.5f} |')

| Epoch: 00 | Loss: 1.10900 |
| Epoch: 50 | Loss: 0.51261 |
| Epoch: 100 | Loss: 0.25942 |
| Epoch: 150 | Loss: 0.07352 |
| Epoch: 200 | Loss: 0.05376 |


In [10]:
# save all the required info
torch.save({
    'epoch': epoch,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'loss': loss,
}, 'models/clf_checkpoint.pt')

<hr style="border:2px solid gray"> </hr>

In [11]:
new_model = Net()
new_model.to(device)

Net(
  (fc1): Linear(in_features=13, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=100, bias=True)
  (fc3): Linear(in_features=100, out_features=3, bias=True)
)

In [12]:
checkpoint = torch.load('models/clf_checkpoint.pt')

In [13]:
new_epoch = checkpoint['epoch']
new_loss = checkpoint['loss']

In [14]:
new_optimizer = torch.optim.Adam(new_model.parameters())
new_optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

In [15]:
new_model.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [16]:
# Resume training
epochs = 1000

for epoch in range(new_epoch, epochs+1):
    new_optimizer.zero_grad()
    y_pred = new_model(x_train)
    new_loss = loss_fn(y_pred, y_train)
    new_loss.backward()
    new_optimizer.step()
    
    if epoch%50 == 0:
        print(f'| Epoch: {epoch:02} | Loss: {new_loss.item():.5f} |')

| Epoch: 200 | Loss: 0.05353 |
| Epoch: 250 | Loss: 0.04294 |
| Epoch: 300 | Loss: 0.36711 |
| Epoch: 350 | Loss: 0.16712 |
| Epoch: 400 | Loss: 0.13141 |
| Epoch: 450 | Loss: 0.11019 |
| Epoch: 500 | Loss: 0.09839 |
| Epoch: 550 | Loss: 0.09446 |
| Epoch: 600 | Loss: 0.08129 |
| Epoch: 650 | Loss: 0.06579 |
| Epoch: 700 | Loss: 0.05366 |
| Epoch: 750 | Loss: 0.19186 |
| Epoch: 800 | Loss: 0.05429 |
| Epoch: 850 | Loss: 0.04485 |
| Epoch: 900 | Loss: 0.03788 |
| Epoch: 950 | Loss: 0.03303 |
| Epoch: 1000 | Loss: 0.02954 |


In [17]:
new_model.eval()

Net(
  (fc1): Linear(in_features=13, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=100, bias=True)
  (fc3): Linear(in_features=100, out_features=3, bias=True)
)

In [18]:
prediction = new_model(x_test)

_, pred = torch.max(prediction, 1)

pred

tensor([2, 1, 0, 0, 2, 1, 2, 0, 2, 0, 2, 2, 2, 1, 1, 1, 2, 1, 0, 1, 2, 1, 1, 1,
        0, 2, 0, 1, 0, 0, 2, 1, 2, 0, 0, 0, 1, 1, 2, 0, 1, 2, 1, 2, 1],
       device='cuda:0')

In [19]:
actual = y_test.cpu().detach().numpy()
predicted = pred.cpu().detach().numpy()

In [20]:
print(f'Accuracy: {accuracy_score(actual, predicted):.3f}')
print(f'Precision: {precision_score(actual, predicted, average="weighted"):.3f}')
print(f'Recall: {recall_score(actual, predicted, average="weighted"):.3f}')

Accuracy: 0.867
Precision: 0.898
Recall: 0.867
