# 1. Define Model

In [1]:
import torch 
import torch.nn as nn

class Model(nn.Module):
    def __init__(self, n_input_features):
        super(Model, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)

    def forward(self, x):
        y_pred = torch.sigmoid(self.linear(x))
        return y_pred

model = Model(n_input_features=6)

# train your model...


# 2. torch.save, torch.load

In [None]:
import torch
import torch.nn as nn

torch.save(arg, PATH)
torch.load(PATH)

# 3. Save Model

## VERSION 1 : SAVE COMPLETE MODEL(lazy version)

In [None]:
#### VERSION 1 : SAVE COMPLETE MODEL(lazy version) ####
FILE = "model.pth" # pth = short for pytorch

# save
torch.save(model, FILE)

# load
model = torch.load(FILE)
model.eval()

for param in model.parameters():
    print(param)

## VERSION 2 : SAVE MODEL STATE_DICT (recommanded)

In [None]:
#### VERSION 2 : SAVE MODEL STATE_DICT (recommanded) ####
FILE = "model.pth"

# save
torch.save(model.state_dict(), FILE)
print(model.state_dict())

# load model
loaded_model = Model(n_input_features=6)

for param in loaded_model.parameters():
    print(param)

loaded_model.load_state_dict(torch.load(FILE))
loaded_model.eval()

for param in loaded_model.parameters():
    print(param)


# 4. State Dict 

In [2]:
print(model.state_dict())

OrderedDict([('linear.weight', tensor([[-0.1179, -0.1340, -0.3754,  0.2554,  0.0309,  0.1495]])), ('linear.bias', tensor([0.2086]))])


In [6]:
# We can save any dictionary here
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
print(optimizer.state_dict())

{'state': {}, 'param_groups': [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'params': [0, 1]}]}


# 5. Save whole checkpoint informs

In [7]:
checkpoint = {
    "epoch" : 90,
    "model_state": model.state_dict(),
    "optim_state": optimizer.state_dict()
}

torch.save(checkpoint, "checkpoint.pth")

In [8]:
# load checkpoint
from tabnanny import check


loaded_checkpoint = torch.load("checkpoint.pth")


epoch = loaded_checkpoint["epoch"]

model = Model(n_input_features=6)
optimizer = torch.optim.SGD(model.parameters(), lr=0)

print(model.state_dict())
print(optimizer.state_dict())

model.load_state_dict(loaded_checkpoint['model_state'])
optimizer.load_state_dict(loaded_checkpoint['optim_state'])

OrderedDict([('linear.weight', tensor([[-0.3552,  0.2848,  0.2458, -0.1207,  0.1241, -0.3931]])), ('linear.bias', tensor([0.2503]))])
{'state': {}, 'param_groups': [{'lr': 0, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'params': [0, 1]}]}


In [9]:
print(model.state_dict())
print(optimizer.state_dict())

OrderedDict([('linear.weight', tensor([[-0.1179, -0.1340, -0.3754,  0.2554,  0.0309,  0.1495]])), ('linear.bias', tensor([0.2086]))])
{'state': {}, 'param_groups': [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'params': [0, 1]}]}


# 6. Save & Load on GPU/CPU

## Save on GPU, Load on CPU

In [None]:
import torch 
import torch.nn as nn

PATH = 'model.pth'

# save
device = torch.device('cuda')
model.to(device)
torch.save(model.state_dict(), PATH)

# load
device = torch.device('cpu')
model = Model(*args, **kwargs)
model.load_state_dict(torch.load(PATH, map_location=device))

## Save on GPU, Load on GPU

In [None]:
# save
device = torch.device('cuda')
model.to(device)
torch.save(model.state_dict(), PATH)

# load
model = Model(*args, **kwargs)
model.load_state_dict(torch.load(PATH))
model.to(device)

## Save on CPU, Load on GPU

In [None]:

# save
torch.save(model.state_dict(), PATH)

# load
device = torch.device('cuda')
model = Model(*args, **kwargs)
model.load_state_dict(torch.load(PATH, map_location="cuda:0")) # Choose whatever GPU device number
model.to(device)