In [2]:
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn

In [3]:
x = [[1,2],[3,4],[5,6],[7,8]]
y = [[3],[7],[11],[15]]

In [4]:
X = torch.tensor(x).float()
Y = torch.tensor(y).float()

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [6]:
X = X.to(device)
Y = Y.to(device)

In [7]:
class MyDataset(Dataset):
    def __init__(self, x, y):
        self.x = torch.tensor(x).float()
        self.y = torch.tensor(y).float()
        
    def __getitem__(self, ix):
        return self.x[ix], self.y[ix]
    
    def __len__(self):
        return len(self.x)

In [8]:
ds = MyDataset(X, Y)

  self.x = torch.tensor(x).float()
  self.y = torch.tensor(y).float()


In [9]:
dl = DataLoader(ds, batch_size = 2, shuffle=True)

In [10]:
class MyNeuralNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.input_to_hidden_layer = nn.Linear(2,8)
        self.hidden_layer_activation = nn.ReLU()
        self.hidden_to_output_layer = nn.Linear(8,1)
    def forward(self, x):
        x = self.input_to_hidden_layer(x)
        x = self.hidden_layer_activation(x)
        x = self.hidden_to_output_layer(x)
        return x

In [11]:
mynet = MyNeuralNet().to(device)
loss_func = nn.MSELoss()
from torch.optim import SGD
opt = SGD(mynet.parameters(), lr=0.001)

In [12]:
import time
loss_history = []
start = time.time()
for _ in range(50):
    for data in dl:
        x, y = data
        opt.zero_grad()
        loss_value = loss_func(mynet(x), y)
        loss_value.backward()
        opt.step()
        loss_history.append(loss_value)
end = time.time()
print(end - start)

0.47753357887268066


In [13]:
val_x = [[10, 11]]

In [14]:
val_x = torch.tensor(val_x).float().to(device)

In [15]:
mynet(val_x)

tensor([[20.3552]], device='cuda:0', grad_fn=<AddmmBackward>)

### Implementing a custom loss function

In [16]:
def mean_squared_error(y_, y):
    loss = (y_ - y)**2
    loss = loss.mean()
    return loss

### Fetching intermediate layer values

In [17]:
input_to_hidden = mynet.input_to_hidden_layer(X)
hidden_activation = mynet.hidden_layer_activation(input_to_hidden)
print(hidden_activation)

tensor([[0.0000, 1.1995, 1.0141, 0.0000, 0.0000, 1.9552, 1.2923, 0.0000],
        [0.0000, 1.7831, 2.4878, 0.0000, 0.0000, 3.8180, 3.4059, 0.0000],
        [0.0000, 2.3666, 3.9615, 0.0000, 0.0000, 5.6808, 5.5196, 0.0000],
        [0.0000, 2.9502, 5.4352, 0.0000, 0.0000, 7.5436, 7.6332, 0.0000]],
       device='cuda:0', grad_fn=<ReluBackward0>)


In [18]:
class neuralnet(nn.Module):
    def __init__(self):
        super().__init__()
        self.input_to_hidden_layer = nn.Linear(2,8)
        self.hidden_layer_activation = nn.ReLU()
        self.hidden_to_output_layer = nn.Linear(8,1)
    def forward(self, x):
        hidden1 = self.input_to_hidden_layer(x)
        hidden2 = self.hidden_layer_activation(hidden1)
        output = self.hidden_to_output_layer(hidden2)
        return output, hidden2

### Sequential methods to build a neural network

In [19]:
model = nn.Sequential(
    nn.Linear(2, 8),
    nn.ReLU(),
    nn.Linear(8, 1)
).to(device)

In [20]:
from torchsummary import summary

In [21]:
summary(model, torch.zeros(1,2))

Layer (type:depth-idx)                   Output Shape              Param #
├─Linear: 1-1                            [-1, 8]                   24
├─ReLU: 1-2                              [-1, 8]                   --
├─Linear: 1-3                            [-1, 1]                   9
Total params: 33
Trainable params: 33
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00


Layer (type:depth-idx)                   Output Shape              Param #
├─Linear: 1-1                            [-1, 8]                   24
├─ReLU: 1-2                              [-1, 8]                   --
├─Linear: 1-3                            [-1, 1]                   9
Total params: 33
Trainable params: 33
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00

In [22]:
loss_func = nn.MSELoss()
from torch.optim import SGD
opt = SGD(model.parameters(), lr = 0.001)
import time
loss_history = []
start = time.time()
for _ in range(50):
    for ix, iy in dl:
        opt.zero_grad()
        loss_value = loss_func(model(ix),iy)
        loss_value.backward()
        opt.step()
        loss_history.append(loss_value)
end = time.time()
print(end - start)

0.1034541130065918


In [23]:
val = [[8,9],[10,11],[1.5,2.5]]

In [24]:
model(torch.tensor(val).float().to(device))

tensor([[17.1109],
        [21.1839],
        [ 3.8762]], device='cuda:0', grad_fn=<AddmmBackward>)

### Saving and loading a kernel

In [25]:
model.state_dict()

OrderedDict([('0.weight',
              tensor([[ 0.9452,  0.9224],
                      [-0.3630,  0.3519],
                      [-0.2643, -0.0199],
                      [-0.0549, -0.5817],
                      [ 0.2828, -0.3016],
                      [ 0.1808,  0.0842],
                      [ 0.2217, -0.1718],
                      [ 0.4671,  0.0411]], device='cuda:0')),
             ('0.bias',
              tensor([-0.2878, -0.3410,  0.0140,  0.2367, -0.4587, -0.5723, -0.5047, -0.0751],
                     device='cuda:0')),
             ('2.weight',
              tensor([[ 1.0109, -0.1794,  0.1976,  0.2850,  0.0663,  0.0969, -0.2824,  0.2433]],
                     device='cuda:0')),
             ('2.bias', tensor([0.2265], device='cuda:0'))])

In [26]:
torch.save(model.cpu().state_dict(), 'mymodel.pth')

In [27]:
model = nn.Sequential(
            nn.Linear(2, 8),
            nn.ReLU(),
            nn.Linear(8, 1)
        ).to(device)

In [28]:
state_dict = torch.load('mymodel.pth')

In [29]:
model.load_state_dict(state_dict)

<All keys matched successfully>

In [30]:
model = model.to(device)

In [31]:
model(torch.tensor(val).float().to(device))

tensor([[17.1109],
        [21.1839],
        [ 3.8762]], device='cuda:0', grad_fn=<AddmmBackward>)