In [None]:
# Notebook introduces the classes and function that is used in the models

In [6]:
import torch.nn.functional as F
import torch
import torch.nn as nn

In [10]:
te1 = torch.tensor([1, 2, 3,], dtype=torch.float32)
te1

tensor([1., 2., 3.])

In [8]:
lin1 = nn.Linear(3, 1, bias=False)
lin2 = nn.Linear(1, 1, bias=False)  # these create linear functions

In [12]:
out1 = lin1(te1)
out1

tensor([-0.7392], grad_fn=<SqueezeBackward4>)

In [13]:
out2 = lin2(out1)
out2

tensor([0.0703], grad_fn=<SqueezeBackward4>)

In [17]:
for x in lin1.parameters():
    print(x)

Parameter containing:
tensor([[-0.2989, -0.5447,  0.2164]], requires_grad=True)


In [18]:
s_out = F.softmax(te1)
s_out

  s_out = F.softmax(te1)


tensor([0.0900, 0.2447, 0.6652])

In [19]:
# softmax sums to 1.0
s_out.sum()

tensor(1.)

In [20]:
# Here is how embedding works. Small scale intro
vocab_size = 80
emb_dim = 6

r_in = nn.Embedding(num_embeddings=vocab_size,
                    embedding_dim=emb_dim)
new_te1 = torch.LongTensor([12, 8, 5, 0])
eout = r_in(new_te1)
eout.shape

torch.Size([4, 6])

In [21]:
print(eout)

tensor([[ 0.8331,  0.4172,  1.4355,  0.0130,  2.0639,  0.6350],
        [-1.0837, -1.9262, -1.4662,  0.1357,  0.9624,  0.7248],
        [-0.8413,  0.6144,  1.0747, -0.8504, -0.4385, -0.8482],
        [-1.1507, -0.0981, -0.5113,  0.7938,  0.9766, -1.2716]],
       grad_fn=<EmbeddingBackward0>)


In [22]:
# detour to matrix multiplication

a = torch.tensor([[1, 2], [2, 3], [3, 4], [4, 5]])
b = torch.tensor([[7, 6, 2], [6, 3, 4]])

print(a @ b)  # @ is short for matmul

tensor([[19, 12, 10],
        [32, 21, 16],
        [45, 30, 22],
        [58, 39, 28]])


In [23]:
# detour to playing with shapes with vieow

input = torch.rand((2, 3, 8))

n, r, c = input.shape
print(n, r, c)
output = input.view(n * r, c) 
print(output.shape)

2 3 8
torch.Size([6, 8])


In [38]:
# Continue further experiment with view

b = torch.randint(1, 75, size=(2, 8))
d = torch.randint(1, 75, size=(2, 8))

In [33]:
# playing with type casting, following will throw error 

b = torch.tensor(torch.randint(1, 75, size=(2, 8)), dtype=torch.float32)
d = torch.tensor(torch.randint(1, 75, size=(2, 8)), dtype=torch.float32) 

  b = torch.tensor(torch.randint(1, 75, size=(2, 8)), dtype=torch.float32)
  d = torch.tensor(torch.randint(1, 75, size=(2, 8)), dtype=torch.float32)


In [50]:
# tensor type casting
bf = b.float() 
bf.dtype
df = d.float()

In [52]:
df.view(2 * 8)

tensor([55.,  2., 58., 70., 28., 10., 58., 56., 40., 55., 44., 18., 37., 48.,
        63., 27.])

In [53]:
# cross-entropy 
ce = F.cross_entropy(bf.view(16), df.view(16))
ce

tensor(20662.5156)

In [56]:
from sklearn import datasets

wineds = datasets.load_wine()
data = wineds['data']
data.shape
tgt = wineds['target']
tgt.shape
# take a look at the data, and try to access parts of it using indexing

(178,)

In [57]:
# Before diving into creating Torch datasets, dive into Numpy to Tensor conversion
import numpy as np

x_data = np.linspace(50, 686, 250)
x_data.shape

(250,)

In [62]:
# Moving from numpy to tensor using from_numpy

x_tensor = torch.from_numpy(x_data.astype(np.float32))
x_tensor.shape
x_tensor.dtype

torch.float32

In [63]:
# creating a loader class by inheriting the Dataset
from torch.utils.data import Dataset, DataLoader

class WineDataset(Dataset):
    def __init__(self, x_data, y_data):
        self.x = torch.from_numpy(x_data.astype(np.float32))
        self.y = torch.from_numpy(y_data.astype(np.float32))
        self.n_samples = x_data.shape[0]
    
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return self.n_samples

In [64]:
wine_ds = WineDataset(x_data=data,
                      y_data=tgt)

In [66]:
wine_ds[2]

(tensor([1.3160e+01, 2.3600e+00, 2.6700e+00, 1.8600e+01, 1.0100e+02, 2.8000e+00,
         3.2400e+00, 3.0000e-01, 2.8100e+00, 5.6800e+00, 1.0300e+00, 3.1700e+00,
         1.1850e+03]),
 tensor(0.))

In [67]:
# Dataloader can accept the Dataset object

wine_dl = DataLoader(wine_ds, shuffle=True, batch_size=3)
dloader_iter = iter(wine_dl)

In [68]:
next(dloader_iter)

[tensor([[1.3900e+01, 1.6800e+00, 2.1200e+00, 1.6000e+01, 1.0100e+02, 3.1000e+00,
          3.3900e+00, 2.1000e-01, 2.1400e+00, 6.1000e+00, 9.1000e-01, 3.3300e+00,
          9.8500e+02],
         [1.3030e+01, 9.0000e-01, 1.7100e+00, 1.6000e+01, 8.6000e+01, 1.9500e+00,
          2.0300e+00, 2.4000e-01, 1.4600e+00, 4.6000e+00, 1.1900e+00, 2.4800e+00,
          3.9200e+02],
         [1.3450e+01, 3.7000e+00, 2.6000e+00, 2.3000e+01, 1.1100e+02, 1.7000e+00,
          9.2000e-01, 4.3000e-01, 1.4600e+00, 1.0680e+01, 8.5000e-01, 1.5600e+00,
          6.9500e+02]]),
 tensor([0., 1., 2.])]

In [None]:
# Need to learn the Pytorch Semantics, (not syntax)

# Each tensor has at least one dimension.
# When iterating over the dimension sizes, start at 
# the trailing dimension, the dimension sizes must 
# either be equal, one of them is 1, or one of them
# does not exist.

x = torch.empty(5, 7, 8)
y = torch.empty(5, 7, 8)
# tensor of same shapes are broadcastable

x = torch.empty((0, ))
x = torch.empty(2, 2)
# cannot broadcast as one of the dimension is not 1

a = torch.rand((3, 3, 1))
b = torch.rand((3, 1))

# 1st trailing dimension: both have size 1
# 2nd trailing dimension: a size == b size
# 3rd trailing dimension: b dimension doesn't exist 



In [None]:
# When it comes to Matrix Multiplication, there are following pairs one has to identify
# vector * vector, matrix * vector, batched mat * vector(bc) 
# batched matrix * batched matrix, batched matrix * broadcasted matrix
# A vector is of shape 1 row n col

In [69]:
# vector X vector is acceptable, and return dot product
tensor2 = torch.rand(3)  # Size([3]) 

tensor1 = torch.tensor([5, 6, 8], dtype=torch.float32)  # Size([3])

tensor1 @ tensor2  # returns dot-pdt

tensor(13.4056)

In [71]:
matensor = torch.tensor([[1, 3, 4,],
                         [5, 7, 6,],
                         [9, 2, 8]],
                         dtype=torch.float32)

tensor2 @ matensor  # 1D to 2D is processed

tensor([10.0336,  6.4768, 12.4093])

In [72]:
tensor5 = torch.rand(3, 3, 4)
tensor6 = torch.rand(4)

tensor5 @ tensor6 # batched tensor broadcasts over vectors

tensor([[0.8100, 0.9030, 0.4469],
        [1.0553, 1.0003, 1.0467],
        [1.1010, 0.9913, 1.0810]])

In [76]:
tensor7 = torch.rand(4, 3, 3)

tensor8 = torch.rand(4, 3, 3)

(tensor7 @ tensor8).shape

torch.Size([4, 3, 3])

In [80]:
# returning to work on the Network Layers

flaten = nn.Flatten(start_dim=1, end_dim=-1)
print(tensor8.shape)
x = flaten(tensor8)
x.shape

torch.Size([4, 3, 3])


torch.Size([4, 9])

In [81]:
# returning to work on the Network Layers

flaten = nn.Flatten(start_dim=0, end_dim=-1)
print(tensor8.shape)
x = flaten(tensor8)
x.shape

torch.Size([4, 3, 3])


torch.Size([36])

In [86]:
# returning to work on the Network Layers

flaten = nn.Flatten(start_dim=0, end_dim=-2)
print(tensor8.shape)
x = flaten(tensor8)
x.shape

# the pattern tested with various dimensions of tensors
# the intuition point is the start-dim that tells from which 
# dimension tensor has to be flattened.

torch.Size([4, 3, 3])


torch.Size([12, 3])

In [97]:
flaten = nn.Flatten(start_dim=1, end_dim=-1)
# -1 means the final value is calculated by torch
test_data2 = torch.rand(5, 28, 28)
test_out = flaten(test_data2)  # Size([5, 784])

print(test_out.shape)  # Size([5, 784])

torch.Size([5, 784])


In [98]:
l1 = nn.Linear(784, 2)

In [100]:
l1(test_out)

tensor([[ 0.1134, -0.7086],
        [-0.2925, -0.5162],
        [-0.3797, -0.6359],
        [-0.3133, -0.6307],
        [-0.0352, -0.6771]], grad_fn=<AddmmBackward0>)

In [104]:
r1 = nn.ReLU()
hi2 = r1(l1(test_out))
hi2

tensor([[0.1134, 0.0000],
        [0.0000, 0.0000],
        [0.0000, 0.0000],
        [0.0000, 0.0000],
        [0.0000, 0.0000]], grad_fn=<ReluBackward0>)

In [105]:
# Using Sequential

seq2 = nn.Sequential(
    flaten,
    nn.Linear(784, 512),
    r1,
    nn.Linear(512, 512),
    r1,
    nn.Linear(512, 10)
)

In [107]:
print(seq2(test_out).shape)
seq2(test_out)

torch.Size([5, 10])


tensor([[ 0.0083, -0.0336,  0.0222,  0.0419, -0.0087, -0.0420,  0.0052,  0.0352,
         -0.0825,  0.0571],
        [-0.0334, -0.0558,  0.0160,  0.0594, -0.0261, -0.0602, -0.0141,  0.0507,
         -0.0760,  0.0447],
        [-0.0319, -0.0464,  0.0377,  0.0456, -0.0057, -0.0249, -0.0117,  0.0662,
         -0.0425,  0.0201],
        [ 0.0109, -0.0131,  0.0319, -0.0034, -0.0013, -0.0178, -0.0502,  0.0193,
         -0.0444,  0.0038],
        [-0.0253, -0.0313, -0.0276,  0.0212, -0.0137, -0.0233,  0.0532,  0.0746,
         -0.1118,  0.0811]], grad_fn=<AddmmBackward0>)

In [108]:
for name, params in seq2.named_parameters():
    print(f'Layer: {name} | size: {params.size()} | vals : {params[:2]}')

Layer: 1.weight | size: torch.Size([512, 784]) | vals : tensor([[ 0.0016, -0.0325,  0.0260,  ...,  0.0055,  0.0083, -0.0276],
        [-0.0063,  0.0069,  0.0278,  ..., -0.0162, -0.0172,  0.0195]],
       grad_fn=<SliceBackward0>)
Layer: 1.bias | size: torch.Size([512]) | vals : tensor([-0.0241, -0.0257], grad_fn=<SliceBackward0>)
Layer: 3.weight | size: torch.Size([512, 512]) | vals : tensor([[-0.0212, -0.0266,  0.0416,  ..., -0.0034, -0.0057,  0.0126],
        [-0.0159,  0.0261, -0.0367,  ..., -0.0029, -0.0320, -0.0329]],
       grad_fn=<SliceBackward0>)
Layer: 3.bias | size: torch.Size([512]) | vals : tensor([0.0085, 0.0295], grad_fn=<SliceBackward0>)
Layer: 5.weight | size: torch.Size([10, 512]) | vals : tensor([[ 0.0103, -0.0404, -0.0303,  ..., -0.0007,  0.0362, -0.0076],
        [-0.0039, -0.0085,  0.0257,  ..., -0.0183, -0.0011,  0.0262]],
       grad_fn=<SliceBackward0>)
Layer: 5.bias | size: torch.Size([10]) | vals : tensor([ 0.0062, -0.0389], grad_fn=<SliceBackward0>)


In [109]:
# Basic model (need to practice)
x = torch.ones(5)
y = torch.zeros(3)

w = torch.randn(5, 3, requires_grad=True)
b = torch.randn(3, requires_grad=True)

z = torch.matmul(x, w) + b

loss = F.binary_cross_entropy_with_logits(z, y)
print(loss)

print(f"The z grad fn: {z.grad_fn}")
print(f"The loss grad fn: {loss.grad_fn}")

with torch.no_grad():
    a = torch.matmul(x, w) + b
print(z.requires_grad)  # returns False

f = torch.matmul(x, w) + b
f_det = f.detach()
print(f_det.requires_grad)  # returns false


tensor(0.3232, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
The z grad fn: <AddBackward0 object at 0x0000027BB93AF670>
The loss grad fn: <BinaryCrossEntropyWithLogitsBackward0 object at 0x0000027BB93AF5B0>
True
False
