In [1]:
import torch

In [2]:
print(torch.__version__) # to check the pytorch version

2.2.2


In [3]:
print(torch.backends.mps.is_available()) # to check if the mps is available

True


In [4]:
# creating pytorch tensors

tensor0d = torch.tensor(1)
print(tensor0d)

tensor1d = torch.tensor([1,2,3])
print(tensor1d)

tensor2d = torch.tensor([[1,2,3],[4,5,6]])
print(tensor2d)

tensor3d = torch.tensor([[1,2,3],[4,5,6],[7,8,9]])
print(tensor3d)

tensor(1)
tensor([1, 2, 3])
tensor([[1, 2, 3],
        [4, 5, 6]])
tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])


In [5]:
# to check the dataype of the tensor
tensor0d.dtype

torch.int64

In [6]:
float_tensor = torch.tensor([1.0,2.0])

In [7]:
# when the values are int then the default type is always torch.int64, when the values are float the default is torch.float32
print(float_tensor.dtype)

torch.float32


In [8]:
# to change the datatype we use .to function
tensor0d_float = tensor0d.to(torch.float32)
print(tensor0d_float.dtype)

torch.float32


In [9]:
# to get the shape we use .shape
tensor2d.shape

torch.Size([2, 3])

In [10]:
# to change the shape of the tensor we .reshape and we can also use the .view
tensor2d_reshaped = tensor2d.reshape([3,2])

In [11]:
tensor2d_reshaped.shape

torch.Size([3, 2])

In [12]:
# .T is used to get the transpose of a matrix
m1 = torch.tensor([[1,2],[3,4]])
m2 = m1.T

print("Orginal matrix")
print(m1)
print("transposed matrix")
print(m2)

Orginal matrix
tensor([[1, 2],
        [3, 4]])
transposed matrix
tensor([[1, 3],
        [2, 4]])


In [13]:
# to do matrix multiplication we can use .matmul and also @
m3 = m2.matmul(m1)
m4 = m2@m1
print(m3, "matrix multiplication using matmul")
print(m4, "matrix multiplication using @")

tensor([[10, 14],
        [14, 20]]) matrix multiplication using matmul
tensor([[10, 14],
        [14, 20]]) matrix multiplication using @


#### Computing Gradients in Torch

In [14]:
import torch.nn.functional as F
from torch.autograd import grad

y = torch.tensor([1.0])
x1 = torch.tensor([1.1])
w1 = torch.tensor([2.2], requires_grad=True)
b = torch.tensor([0.0], requires_grad=True)

z = w1*x1 + b
a = torch.sigmoid(z)

loss = F.binary_cross_entropy(a,y)

gradient_loss_w1 = grad(loss,w1,retain_graph=True)
gradient_loss_b = grad(loss,b,retain_graph=True)

In [15]:
print(gradient_loss_w1, " This is the gradient of loss W.R.T w1")
print(gradient_loss_b, " This is the gradient of loss W.R.T b")

(tensor([-0.0898]),)  This is the gradient of loss W.R.T w1
(tensor([-0.0817]),)  This is the gradient of loss W.R.T b


#### The above process we have done everything manually it is useful for debugging but pytroch has something very simple

In [16]:
import torch.nn.functional as F
from torch.autograd import grad

y = torch.tensor([1.0])
x1 = torch.tensor([1.1])
w1 = torch.tensor([2.2], requires_grad=True)
b = torch.tensor([0.0], requires_grad=True)

z = w1*x1 + b
a = torch.sigmoid(z)

loss = F.binary_cross_entropy(a,y)

loss.backward()
print(w1.grad)
print(b.grad)

tensor([-0.0898])
tensor([-0.0817])


### Code implementing a classic multilayer perceptron with two hidden layers.

1) We use the torch.nn.Module to build our own architecture.
2) We use the init constructor to define the network layers and forward method to see how the inputs pass and interact.
3) We use the .backwards method to inside of the training loop to calculate the gradients.

In [17]:
class NeuralNetwork(torch.nn.Module):

    def __init__(self, num_inputs, num_outputs):
        super().__init__()

        self.layers = torch.nn.Sequential(
            torch.nn.Linear(num_inputs, 30),
            torch.nn.ReLU(),

            torch.nn.Linear(30, 20),
            torch.nn.ReLU(),

            torch.nn.Linear(20, num_outputs),
        )

    def forward(self,x):
        logits = self.layers(x)
        return logits

In [18]:
model = NeuralNetwork(50,3)

In [19]:
print(model) # this is used to see the summary of the model

NeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=50, out_features=30, bias=True)
    (1): ReLU()
    (2): Linear(in_features=30, out_features=20, bias=True)
    (3): ReLU()
    (4): Linear(in_features=20, out_features=3, bias=True)
  )
)


In [20]:
# To check the number of trainable parameter's

num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(num_params)

2213


In [22]:
# NeuralNetwork(
#   (layers): Sequential(
#     (0): Linear(in_features=50, out_features=30, bias=True)
#     (1): ReLU()
#     (2): Linear(in_features=30, out_features=20, bias=True)
#     (3): ReLU()
#     (4): Linear(in_features=20, out_features=3, bias=True)
#   )
# )

# To access the paramerters for any layer the above network 
print(model.layers[0].weight)

Parameter containing:
tensor([[ 0.0058,  0.1188,  0.1084,  ..., -0.0280, -0.1022,  0.0612],
        [ 0.0535,  0.0830, -0.0996,  ...,  0.1353, -0.0698,  0.1284],
        [ 0.1357,  0.0864,  0.0097,  ..., -0.1301, -0.0637,  0.0754],
        ...,
        [-0.0945,  0.0780,  0.1143,  ...,  0.0452, -0.1234, -0.1341],
        [ 0.0315, -0.1160, -0.0495,  ...,  0.0477,  0.0805,  0.0192],
        [ 0.0871, -0.0545,  0.1396,  ..., -0.0317, -0.1230,  0.0414]],
       requires_grad=True)


In [23]:
print(model.layers[0].weight.shape)

torch.Size([30, 50])


In [24]:
# For reproducability purposes we can use manual_seed

torch.manual_seed(123)
model = NeuralNetwork(50,3)
print(model.layers[0].weight)

Parameter containing:
tensor([[-0.0577,  0.0047, -0.0702,  ...,  0.0222,  0.1260,  0.0865],
        [ 0.0502,  0.0307,  0.0333,  ...,  0.0951,  0.1134, -0.0297],
        [ 0.1077, -0.1108,  0.0122,  ...,  0.0108, -0.1049, -0.1063],
        ...,
        [-0.0787,  0.1259,  0.0803,  ...,  0.1218,  0.1303, -0.1351],
        [ 0.1359,  0.0175, -0.0673,  ...,  0.0674,  0.0676,  0.1058],
        [ 0.0790,  0.1343, -0.0293,  ...,  0.0344, -0.0971, -0.0509]],
       requires_grad=True)


In [25]:
x = torch.rand((1,50))
out = model(x)
print(out)

tensor([[-0.1670,  0.1001, -0.1219]], grad_fn=<AddmmBackward0>)


In [27]:
# why we use this during inference is because with grad it will slow down the process. This tells pytroch to not keep track of the 
# gradients hence faster inference.

with torch.no_grad():
    out = model(x)
print(out)

tensor([[-0.1670,  0.1001, -0.1219]])


In [28]:
# we used softmax to get the class membership

with torch.no_grad():
    out = torch.softmax(model(x), dim=1)
print(out)

tensor([[0.2983, 0.3896, 0.3121]])


## Setting up Efficient Data Loaders
1) The custom dataset class is used to instantiate objects that define how each data record is loaded
2) The dataloader class is used to assemble and shuffle the data into batches etc.

In [29]:
# Creating a toy dataset
x_train = torch.tensor([
    [-1.2,3.1],
    [-0.9,2.9],
    [-0.5,2.6],
    [2.3,-1.1],
    [2.7,-1.5]
])

y_train = torch.tensor([0,0,0,1,1])

x_test = torch.tensor([
    [-0.8,2.8],
    [2.6,-1.6]
])

y_test = torch.tensor([0,1])

In [32]:
# Custom dataset class
from torch.utils.data import Dataset

class ToyDataset(Dataset):
    def __init__(self,x,y):
        self.features = x
        self.labels = y

    def __getitem__(self,index):
        one_x = self.features[index]
        one_y = self.features[index]
        return one_x, one_y

    def __len__(self):
        return self.labels.shape[0]

train_ds = ToyDataset(x_train, y_train)
test_ds = ToyDataset(x_test, y_test)