### Move data, model(it has parameters) to GPU - and then everything is in GPU

In [1]:
import torch

import numpy as np
import matplotlib.pyplot as plt

In [2]:
torch.manual_seed(0) #torch seed for random

<torch._C.Generator at 0x7f61f1de7d90>

#### data(blob)

In [3]:
# data

from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split

data, labels = make_blobs(n_samples=1000, centers=4, n_features=2, random_state=0)

XX_train, XX_val, Y_train, Y_val = train_test_split(data, labels, 
                                                  stratify=labels, random_state=0)
print(XX_train.shape, XX_val.shape, labels.shape)

(750, 2) (250, 2) (1000,)


#### data as torch tensors

In [4]:
XX_train, Y_train, XX_val, Y_val = map(torch.tensor, (XX_train, Y_train, XX_val, Y_val)) 

# map the function to all and return all. - instead of calling on each one by one.

print(XX_train.shape, Y_train.shape)

torch.Size([750, 2]) torch.Size([750])


In [5]:
XX_train = XX_train.float()
Y_train = Y_train.long()  # as its an index.

### Fit

In [6]:
def fit(XX, YY, model_obj, opt, loss_fn, epochs=1000):
    
    for epoch in range(epochs):
        
        # forward and loss
        loss = loss_fn( model_obj(XX), YY )
        
        # computing gradient
        loss.backward()
        
        # updating parameters
        opt.step()
        opt.zero_grad()

    return loss.item() # return loss after all epochs

#### model

In [7]:
import torch.nn as nn

In [8]:
class ModelClass(nn.Module):
    def __init__(self):
        
        super().__init__()

        self.net = nn.Sequential(
            nn.Linear(2, 2), 
            nn.Sigmoid(), 
            nn.Linear(2, 4), 
            nn.Softmax(1)
        )

    def forward(self, XX):
        return self.net(XX)

In [9]:
model_obj = ModelClass()

#### loss function, optimizer

In [10]:
from torch import optim

import torch.nn.functional as F

In [11]:
# defining loss function
loss_fn = F.cross_entropy

# defining optimizer
opt = optim.SGD(model_obj.parameters(), lr=1)

### Running on GPU

In [12]:
dev =  torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [13]:
dev

device(type='cuda', index=0)

### move data and model to the device. 
thats all !!

In [14]:
# data to gpu
XX_train=XX_train.to(dev)
Y_train=Y_train.to(dev)

In [15]:
model_obj = ModelClass()

# model to gpu
model_obj.to(dev)

ModelClass(
  (net): Sequential(
    (0): Linear(in_features=2, out_features=2, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=2, out_features=4, bias=True)
    (3): Softmax(dim=1)
  )
)

**model has parameters - moving model to gpu => moves parameters**   

**only that much(above) and everything is in gpu now.**

In [16]:
import time

In [17]:
tic = time.time()
print('Final loss', fit(XX_train, Y_train, model_obj, opt, loss_fn))
toc = time.time()
print('Time taken', toc - tic)

Final loss 1.3824316263198853
Time taken 0.6604752540588379


In [18]:
# a larger model.
# more neurons(parameters)

class LargeModelClass(nn.Module):
    def __init__(self):
        super().__init__()
        torch.manual_seed(0)
        self.net = nn.Sequential(
            nn.Linear(2, 1024 * 4), 
            nn.Sigmoid(), 
            nn.Linear(1024 * 4, 4), 
            nn.Softmax(1)
        )

    def forward(self, XX):
        return self.net(XX)

### in CPU

In [19]:
dev = torch.device("cpu")

### move data and model to GPU

In [20]:
XX_train=XX_train.to(dev)
Y_train=Y_train.to(dev)

large_model_obj = LargeModelClass()
large_model_obj.to(dev)

LargeModelClass(
  (net): Sequential(
    (0): Linear(in_features=2, out_features=4096, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=4096, out_features=4, bias=True)
    (3): Softmax(dim=1)
  )
)

In [21]:
tic = time.time()
print('Final loss', fit(XX_train, Y_train, large_model_obj, opt, loss_fn))
toc = time.time()
print('Time taken', toc - tic)

Final loss 1.389022946357727
Time taken 14.862571716308594


### in GPU

In [22]:
dev = torch.device("cuda")

In [23]:
# move data and model to GPU

XX_train=XX_train.to(dev)
Y_train=Y_train.to(dev)

large_model_obj = LargeModelClass()
large_model_obj.to(dev)

LargeModelClass(
  (net): Sequential(
    (0): Linear(in_features=2, out_features=4096, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=4096, out_features=4, bias=True)
    (3): Softmax(dim=1)
  )
)

In [24]:
tic = time.time()
print('Final loss', fit(XX_train, Y_train, large_model_obj, opt, loss_fn))
toc = time.time()
print('Time taken', toc - tic)

Final loss 1.3890228271484375
Time taken 0.8753559589385986


cuda could do it much faster.

powers of 2 - GPU architecture is orgaised that way. better performance if all work distributions are in powers of 2. also matrices allign with the memory. 

**read docs of stuff.**