In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torchsummary import summary

In [2]:
x = [[1,2],[3,4],[5,6],[7,8]]
y = [[3],[7],[11],[15]]

In [3]:
device = "cuda"

In [4]:
class MyDataset(Dataset):
    def __init__(self, x, y):
        self.x = torch.tensor(x).float().to(device)
        self.y = torch.tensor(y).float().to(device)
    def __getitem__(self, ix):
        return self.x[ix], self.y[ix]
    def __len__(self):
        return len(self.x)

In [5]:
ds = MyDataset(x, y)
dl = DataLoader(ds, batch_size=2, shuffle=True)

In [6]:
model = nn.Sequential(
    nn.Linear(2, 8),
    nn.ReLU(),
    nn.Linear(8,1)
).to(device)

In [7]:
import time
from torch.optim import SGD
loss_func = nn.MSELoss()
opt = SGD(model.parameters(), lr = 0.001)
loss_history = []
start = time.time()
for epoch in range(50):
    for ix, iy in dl:
        opt.zero_grad()
        loss_value = loss_func(model(ix),iy)
        loss_value.backward()
        opt.step()
        loss_history.append(loss_value)
        end = time.time()
print(end - start)

1.6520848274230957


# Saving

In [8]:
model.state_dict()

OrderedDict([('0.weight',
              tensor([[-0.1863, -0.1660],
                      [-0.5849,  0.1869],
                      [ 0.6496,  0.5547],
                      [-0.3653, -0.5604],
                      [ 0.5869, -0.5110],
                      [-0.1316,  0.0269],
                      [ 0.4387, -0.2034],
                      [ 0.4441,  0.9580]], device='cuda:0')),
             ('0.bias',
              tensor([ 0.4427,  0.0088, -0.5212,  0.1724, -0.1634,  0.0592, -0.3583,  0.1285],
                     device='cuda:0')),
             ('2.weight',
              tensor([[ 0.2911, -0.1515,  0.5969, -0.2930, -0.1938,  0.2839,  0.0915,  0.8830]],
                     device='cuda:0')),
             ('2.bias', tensor([0.1798], device='cuda:0'))])

In [11]:
torch.save(model.state_dict(), "checkpoints/mymodel_cuda.pth")
torch.save(model.to('cpu').state_dict(), "checkpoints/mymodel_cpu.pth")

In [12]:
# save jtit script
torch.jit.save(torch.jit.script(model), "checkpoints/mymodel_cuda_jit.pth")

# Loading

In [13]:
model = nn.Sequential(
    nn.Linear(2, 8),
    nn.ReLU(),
    nn.Linear(8,1)
).to(device)

In [14]:
state_dict = torch.load("checkpoints/mymodel_cuda.pth")
model.load_state_dict(state_dict)
model.to(device)

Sequential(
  (0): Linear(in_features=2, out_features=8, bias=True)
  (1): ReLU()
  (2): Linear(in_features=8, out_features=1, bias=True)
)

# Eval

In [15]:
val = [[8,9],[10,11],[1.5,2.5]]

In [16]:
model(torch.tensor(val).float().to(device))

tensor([[16.9362],
        [20.8770],
        [ 4.0947]], device='cuda:0', grad_fn=<AddmmBackward0>)