# File I/O

What about if we want to save and load models on disk! Furthermore, training process may take days and it is worth checkpoointing this training at intervals. 

In [1]:
import torch
from torch import nn
from torch.nn import functional as F

### For invidivual tensors, we can save and load these using the built-in .save() and .load() methods

In [11]:
x = torch.arange(4)
torch.save(x, 'x-file')

In [5]:
x2 = torch.load('x-file')

In [6]:
x2

tensor([0, 1, 2, 3])

###  We can also save a list of tensors and load them back into memory

In [12]:
y = torch.zeros(4)
torch.save([x, y], 'x-files')

In [13]:
x2, y2 = torch.load('x-files')

In [14]:
(x2, y2)

(tensor([0, 1, 2, 3]), tensor([0., 0., 0., 0.]))

### We can even read and write dictionaries which map from strings to tensors

In [16]:
mydict = {'x', x, 'y', y}
torch.save(mydict, 'mydict')
mydict2 = torch.load('mydict')
mydict2

{tensor([0, 1, 2, 3]), tensor([0., 0., 0., 0.]), 'x', 'y'}

## Loading and Saving Model Parameters

Of course, while loading and saving individual tensors is very useful, loading and saving entire models this way would be much more tedious. PyTorch offers the flexibility to save and load entire networks. Note, that this will only be saving the _parameters_ of these networks, not the entire model. Models may contain arbitrary code, so we will have to write separate code to specify the model and load the parameters in fromd disk.

In [17]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.LazyLinear(256)
        self.output = nn.LazyLinear(10)

    def forward(self, x):
        return self.output(F.relu(self.hidden(x)))

net = MLP()
X = torch.randn(size=(2, 20))
Y = net(X)



We can access the current state of our MLP with state_dict

In [18]:
net.state_dict()

OrderedDict([('hidden.weight',
              tensor([[-0.0749,  0.1870,  0.2081,  ..., -0.1018,  0.0288, -0.1384],
                      [ 0.0284, -0.0653,  0.0959,  ..., -0.0086,  0.2058, -0.1934],
                      [ 0.1021,  0.1825,  0.0777,  ...,  0.1903,  0.0897, -0.0154],
                      ...,
                      [-0.1280, -0.2099, -0.0647,  ..., -0.1688,  0.1736,  0.2047],
                      [-0.0763,  0.0649, -0.1289,  ..., -0.1129,  0.0253,  0.2184],
                      [-0.0741, -0.1477, -0.1806,  ..., -0.1244, -0.0063, -0.1170]])),
             ('hidden.bias',
              tensor([ 0.0843, -0.1556, -0.1968, -0.0914,  0.0962,  0.0352, -0.0317, -0.1255,
                      -0.1528, -0.1287, -0.1293, -0.1812,  0.0312,  0.0436,  0.0065,  0.1241,
                       0.0789,  0.0033,  0.0422,  0.0144,  0.2108,  0.1877,  0.0428, -0.1436,
                      -0.1105, -0.0628,  0.1512, -0.1089, -0.1569, -0.0309,  0.1342, -0.1205,
                       0.1990,

In [19]:
# We can write this state dict to disk with torch.save

torch.save(net.state_dict(), 'mlp.params')

To recover the model using this state dict, we instantiate a clone of the model, but rather than initializing the parameters randomly, we read in the parameters stored in the file.

In [20]:
clone = MLP()
clone.load_state_dict(torch.load('mlp.params'))
clone.eval()

MLP(
  (hidden): LazyLinear(in_features=0, out_features=256, bias=True)
  (output): LazyLinear(in_features=0, out_features=10, bias=True)
)

In [21]:
# To verify that we have, in fact, loaded the correct model...

Y_clone = clone(X)
Y_clone == Y

tensor([[True, True, True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True]])