#### Saving Out PyTorch Models
## using state_dict

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score

In [2]:
wine = pd.read_csv('wine_data.csv')
wine.head(5)

Unnamed: 0,Class,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,OD280/OD315 of diluted wines,Proline
0,0,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,0,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,0,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,0,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,0,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [4]:
x = wine.drop('Class', axis=1)
y = wine['Class']

X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size=0.25)

x_train = torch.tensor(X_train.values, device=device, dtype=torch.float)
x_test = torch.tensor(X_test.values, device=device, dtype=torch.float)
y_train = torch.from_numpy(Y_train.values).view(-1).long()
y_test = torch.from_numpy(Y_test.values).view(-1).long()

y_train = y_train.to(device)
y_test = y_test.to(device)

In [5]:
input_size = len(x.columns)
hidden_size = 100
output_size = y.nunique()

In [6]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)
        
    def forward(self, X):
        X = torch.sigmoid(self.fc1(X))
        X = torch.sigmoid(self.fc2(X))
        X = self.fc3(X)
        
        return F.log_softmax(X, dim=-1)

In [7]:
model = Net()
model.to(device)

Net(
  (fc1): Linear(in_features=13, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=100, bias=True)
  (fc3): Linear(in_features=100, out_features=3, bias=True)
)

In [8]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = nn.NLLLoss()

In [9]:
epochs = 1000

for epoch in range(epochs+1):
    optimizer.zero_grad()
    y_pred = model(x_train)
    loss = loss_fn(y_pred, y_train)
    loss.backward()
    optimizer.step()
    
    if epoch%200 == 0:
        print(f'| Epoch: {epoch:02} | Loss: {loss.item()} |')

| Epoch: 00 | Loss: 1.120957851409912 |
| Epoch: 200 | Loss: 0.08118202537298203 |
| Epoch: 400 | Loss: 0.07488358020782471 |
| Epoch: 600 | Loss: 0.06929833441972733 |
| Epoch: 800 | Loss: 0.06742268800735474 |
| Epoch: 1000 | Loss: 0.06634220480918884 |


In [10]:
model.state_dict()

OrderedDict([('fc1.weight',
              tensor([[ 0.3174, -0.7476, -0.5077,  ...,  1.7408,  0.9695, -0.0112],
                      [ 0.0984, -0.1170,  0.0505,  ..., -0.0309,  0.0694, -0.1982],
                      [ 0.0289, -0.1681,  0.0736,  ...,  0.1236,  0.0858, -0.2163],
                      ...,
                      [-0.2032, -0.1376,  0.1451,  ..., -0.0629, -0.1519, -0.0685],
                      [ 0.0701, -0.1985, -0.0361,  ..., -0.0561,  0.1514,  0.1154],
                      [ 0.1549,  0.0719,  0.2771,  ...,  0.0184, -0.1196, -0.2086]],
                     device='cuda:0')),
             ('fc1.bias',
              tensor([ 0.4417,  0.2118, -0.1233,  0.0740, -0.1571,  0.0940,  0.2314, -0.0332,
                       0.1918, -0.2418, -0.0343, -0.1892,  0.3358,  0.1843, -0.0535,  0.1941,
                       0.1401, -0.0870, -0.1239, -0.1529, -0.0864, -0.0067,  0.1287, -0.1820,
                      -0.2435, -0.0351,  0.1811, -0.2138, -0.0472,  0.0312, -0.1684,  0.2421

- For prediction purposes ~ save and load only the model parameters
- If you need to train the saved model/ checkpointing, need to save more than just the model state_dict. 
**Also need to save the state of the optimizer, epochs, score, etc**

In [11]:
optimizer.state_dict()

{'state': {2341974338280: {'step': 1001,
   'exp_avg': tensor([[-3.0795e-03, -2.2138e-04,  5.4252e-04,  ..., -3.5876e-04,
             2.9269e-04,  1.0649e-02],
           [ 1.9973e-29,  2.1143e-30,  3.3752e-30,  ...,  1.5037e-30,
             3.7130e-30,  3.4874e-27],
           [-2.5035e-32, -2.1109e-33, -4.1899e-33,  ..., -2.8523e-33,
            -6.5197e-33, -5.8051e-31],
           ...,
           [-6.9455e-18, -7.5638e-19, -1.1791e-18,  ..., -7.6257e-19,
            -1.8111e-18, -1.6229e-16],
           [-5.6052e-45, -5.6052e-45, -5.6052e-45,  ..., -5.6052e-45,
            -5.6052e-45, -5.6052e-45],
           [-1.2676e-31, -1.1918e-32, -2.1327e-32,  ..., -1.4270e-32,
            -3.3079e-32, -2.9428e-30]], device='cuda:0'),
   'exp_avg_sq': tensor([[4.2967e-02, 1.8723e-03, 1.5117e-03,  ..., 2.2175e-04, 1.2669e-03,
            1.0323e+02],
           [7.3790e-16, 8.3132e-18, 2.1071e-17,  ..., 4.1849e-18, 2.5544e-17,
            2.2336e-11],
           [0.0000e+00, 0.0000e+00, 0.0

In [12]:
torch.save(model.state_dict(), 'models/clf_st_dict')

<hr style="border:2px solid gray"> </hr>

In [13]:
new_model = Net()
new_model.to(device)

Net(
  (fc1): Linear(in_features=13, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=100, bias=True)
  (fc3): Linear(in_features=100, out_features=3, bias=True)
)

In [14]:
new_model.load_state_dict(torch.load('models/clf_st_dict'))

<All keys matched successfully>

In [15]:
new_model.eval()

Net(
  (fc1): Linear(in_features=13, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=100, bias=True)
  (fc3): Linear(in_features=100, out_features=3, bias=True)
)

In [16]:
prediction = new_model(x_test)

_, pred = torch.max(prediction, 1)

pred

tensor([2, 2, 0, 0, 2, 2, 1, 0, 0, 1, 1, 1, 2, 2, 2, 1, 0, 2, 1, 0, 2, 0, 1, 2,
        1, 1, 0, 2, 1, 2, 1, 0, 1, 1, 1, 0, 1, 2, 2, 0, 2, 1, 1, 1, 1],
       device='cuda:0')

In [17]:
actual = y_test.cpu().detach().numpy()
predicted = pred.cpu().detach().numpy()

In [18]:
print(f'Accuracy: {accuracy_score(actual, predicted):.3f}')
print(f'Precision: {precision_score(actual, predicted, average="weighted"):.3f}')
print(f'Recall: {recall_score(actual, predicted, average="weighted"):.3f}')

Accuracy: 0.956
Precision: 0.957
Recall: 0.956
