# **PyTorch Refersher**
---

In [12]:
%pip install torch torchvision

Note: you may need to restart the kernel to use updated packages.


In [13]:
import torch
import numpy as np

In [14]:
tensor = torch.tensor([1,2,3], dtype=torch.float32)
print(tensor)

tensor([1., 2., 3.])


In [15]:
tensor = torch.tensor([[1,0],[0,1]])
print(tensor)

tensor([[1, 0],
        [0, 1]])


In [16]:
np_array = np.array([1,2,3])
print(np_array)
np_array = np.array([1,2,3], dtype=np.float32)
print(np_array)
np_array = np.array([[1,2,3],[4,5,6]])
print(np_array)


[1 2 3]
[1. 2. 3.]
[[1 2 3]
 [4 5 6]]


In [17]:
tensor= torch.from_numpy(np_array)
print(tensor)

tensor([[1, 2, 3],
        [4, 5, 6]])


In [18]:
torch.randint(1,3,[1,2])

tensor([[1, 2]])

## Summary

- ones, zeroes, rand, randn, randint are tensor generation operations.
- randn generates from a Z-distribution.
- randint requires a high, low, then an array shape in a list form.
- rand generates from a uniform distribution.
---

In [19]:
a = torch.ones(1,3, dtype=torch.float32)
b = torch.tensor([1,2,3], dtype=torch.float32)

In [20]:
print(a+b)
print(a*b)
print(a/b)
print(a%b)
print(a**b)
print(a.add(b))
print(a.mul(b))
print(a.div(b))

tensor([[2., 3., 4.]])
tensor([[1., 2., 3.]])
tensor([[1.0000, 0.5000, 0.3333]])
tensor([[0., 1., 1.]])
tensor([[1., 1., 1.]])
tensor([[2., 3., 4.]])
tensor([[1., 2., 3.]])
tensor([[1.0000, 0.5000, 0.3333]])


In [21]:
# print(torch.matmul(a,b))
# print(torch.mm(a,b.transpose(0,1)))


tensor([6.])


IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)

- operator between two operands seem to behave as element-wise operations for each tensor position.
- $a*b$ is equivalent to $a.mul(b)$
---

## Autograd
---

In [22]:
x = torch.tensor([4.0], requires_grad=True)

y=x**2
z=y*2 + 5
z.backward()
print(x.grad)

tensor([16.])


## Neural Network Implementation
---

In [23]:
import torch.nn as nn
import torch.nn.functional as F

class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.layer1 = nn.Linear(4,3)
        self.layer2 = nn.Linear(3,5)
        self.layer3 = nn.Linear(5,1)

    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        x = self.layer3(x)
        return x

In [24]:
model = NeuralNetwork()
print(model)

NeuralNetwork(
  (layer1): Linear(in_features=4, out_features=3, bias=True)
  (layer2): Linear(in_features=3, out_features=5, bias=True)
  (layer3): Linear(in_features=5, out_features=1, bias=True)
)


In [25]:
x = torch.randn(2,4)
print(x)
print(model(x))

tensor([[-0.2436, -0.1036, -1.8888, -0.0354],
        [-0.8344, -1.0803, -1.8193,  0.0855]])
tensor([[-0.3976],
        [-0.2277]], grad_fn=<AddmmBackward0>)


## Training Loop

---

In [26]:
%pip install scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [27]:
from sklearn.model_selection import train_test_split

x = torch.randn(500,4)
y = torch.randn(500,1)

x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=42)

print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

torch.Size([400, 4]) torch.Size([100, 4]) torch.Size([400, 1]) torch.Size([100, 1])


In [28]:


criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr= 0.001)

for epoch in range(10000):
    y_pred = model(x_train)
    loss = criterion(y_pred, y_train)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item()}")




Epoch 0, Loss: 1.0180182456970215
Epoch 10, Loss: 1.0133273601531982
Epoch 20, Loss: 1.0089972019195557
Epoch 30, Loss: 1.004998803138733
Epoch 40, Loss: 1.0013065338134766
Epoch 50, Loss: 0.9978926777839661
Epoch 60, Loss: 0.9947355389595032
Epoch 70, Loss: 0.9918051362037659
Epoch 80, Loss: 0.989088237285614
Epoch 90, Loss: 0.986575186252594
Epoch 100, Loss: 0.9842469692230225
Epoch 110, Loss: 0.9820797443389893
Epoch 120, Loss: 0.9800668358802795
Epoch 130, Loss: 0.9781973361968994
Epoch 140, Loss: 0.976460874080658
Epoch 150, Loss: 0.9748502969741821
Epoch 160, Loss: 0.9733507037162781
Epoch 170, Loss: 0.9719477891921997
Epoch 180, Loss: 0.9706411957740784
Epoch 190, Loss: 0.9694241285324097
Epoch 200, Loss: 0.9682917594909668
Epoch 210, Loss: 0.9672366380691528
Epoch 220, Loss: 0.9662511944770813
Epoch 230, Loss: 0.9653306603431702
Epoch 240, Loss: 0.9644708037376404
Epoch 250, Loss: 0.9636675119400024
Epoch 260, Loss: 0.9629162549972534
Epoch 270, Loss: 0.9622129797935486
Epoch 2

In [29]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


## Dataset and Dataloader
---

In [30]:
import torch
import torch.nn.functional as F
import torch.optim as optimizer
from torch.utils.data import Dataset, DataLoader
import numpy

In [31]:
class CustomDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.FloatTensor(X)
        self.y = torch.FloatTensor(y)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [32]:
X = torch.rand(1000,5)
y = torch.rand(1000,1)

dataset = CustomDataset(X, y)

In [33]:
for it in iter(dataset):
    print(it)

(tensor([0.9352, 0.8058, 0.6995, 0.6886, 0.1498]), tensor([0.7472]))
(tensor([0.0278, 0.9181, 0.4171, 0.7009, 0.9337]), tensor([0.1932]))
(tensor([0.5800, 0.6215, 0.7900, 0.2085, 0.6403]), tensor([0.1213]))
(tensor([0.9951, 0.1208, 0.9619, 0.8091, 0.5377]), tensor([0.3665]))
(tensor([0.9961, 0.9775, 0.3476, 0.7347, 0.4454]), tensor([0.7858]))
(tensor([0.4094, 0.0169, 0.8631, 0.6576, 0.4155]), tensor([0.9302]))
(tensor([0.9747, 0.8060, 0.1444, 0.2591, 0.6852]), tensor([0.1800]))
(tensor([0.8186, 0.1517, 0.6050, 0.3102, 0.3166]), tensor([0.4961]))
(tensor([0.0658, 0.1215, 0.2336, 0.9514, 0.2732]), tensor([0.5088]))
(tensor([0.1657, 0.7812, 0.8246, 0.2526, 0.8791]), tensor([0.3968]))
(tensor([0.0301, 0.1723, 0.5915, 0.0761, 0.8964]), tensor([0.1282]))
(tensor([0.3539, 0.0018, 0.7173, 0.6396, 0.7519]), tensor([0.5899]))
(tensor([0.8841, 0.4965, 0.2675, 0.9013, 0.2321]), tensor([0.4604]))
(tensor([0.0635, 0.1213, 0.0100, 0.1265, 0.9803]), tensor([0.4180]))
(tensor([0.3877, 0.4248, 0.7723, 0

In [34]:
from torchvision import datasets, transforms

In [35]:
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.1307,), (0.3081,))])

mnist_dataset = datasets.MNIST(
    root='./data',
    train=True,
    download=True,
    transform=transform
)

In [36]:
print(mnist_dataset)

Dataset MNIST
    Number of datapoints: 60000
    Root location: ./data
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.1307,), std=(0.3081,))
           )


In [37]:
BATCH_SIZE = 32
train_loader = DataLoader(
    dataset=mnist_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=2,
    drop_last=False
)

In [38]:
for batch_id, (X, y) in enumerate(train_loader):
    print(f"Batch: {batch_id}")
    print(f"Input size: {X.shape}")
    print(f"Target: {y.shape}")

Batch: 0
Input size: torch.Size([32, 1, 28, 28])
Target: torch.Size([32])
Batch: 1
Input size: torch.Size([32, 1, 28, 28])
Target: torch.Size([32])
Batch: 2
Input size: torch.Size([32, 1, 28, 28])
Target: torch.Size([32])
Batch: 3
Input size: torch.Size([32, 1, 28, 28])
Target: torch.Size([32])
Batch: 4
Input size: torch.Size([32, 1, 28, 28])
Target: torch.Size([32])
Batch: 5
Input size: torch.Size([32, 1, 28, 28])
Target: torch.Size([32])
Batch: 6
Input size: torch.Size([32, 1, 28, 28])
Target: torch.Size([32])
Batch: 7
Input size: torch.Size([32, 1, 28, 28])
Target: torch.Size([32])
Batch: 8
Input size: torch.Size([32, 1, 28, 28])
Target: torch.Size([32])
Batch: 9
Input size: torch.Size([32, 1, 28, 28])
Target: torch.Size([32])
Batch: 10
Input size: torch.Size([32, 1, 28, 28])
Target: torch.Size([32])
Batch: 11
Input size: torch.Size([32, 1, 28, 28])
Target: torch.Size([32])
Batch: 12
Input size: torch.Size([32, 1, 28, 28])
Target: torch.Size([32])
Batch: 13
Input size: torch.Size([3

In [39]:
from torch.utils.data import random_split

total_size = len(mnist_dataset)
train_size = int(0.8 * total_size)
test_size = total_size - train_size

print(f"Total Size: {total_size}, Train Size: {train_size}, Test Size: {test_size}")

Total Size: 60000, Train Size: 48000, Test Size: 12000


In [40]:
train_dataset, test_dataset = random_split(mnist_dataset, [train_size, test_size])

train_loader = DataLoader(
    train_dataset, 
    batch_size=BATCH_SIZE,
    shuffle=True
    )

test_loader = DataLoader(
    dataset=test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False
)

In [41]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [42]:
class NN_Custom(nn.Module):
    def __init__(self):
        super(NN_Custom, self).__init__()
        self.layer1 = nn.Conv2d(1, 16, (3, 3))  # 1 input channel, 16 output channels
        self.pool = nn.AdaptiveMaxPool2d((1,1))
        self.flatten = nn.Flatten()

        x = torch.randn(1,1,28,28)
        x = self.pool(self.layer1(x))
        x = self.flatten(x)
        print(x.shape[1])
        self.layer2 = nn.Linear(x.shape[1], 10)


        # self.layer2 = nn.Linear(16 * 26 * 26, 10)  # Adjusted for output size after conv layer

    def forward(self, x):
        x = self.layer1(x)
        x = self.pool(x)
        x = self.flatten(x)
        x = self.layer2(x)
        return x


In [43]:
model = NN_Custom()

16


In [44]:
model.forward(torch.randn(1, 1,28,28))

tensor([[-0.3764, -0.2476, -1.5294, -0.9001,  1.5609,  0.4619, -2.3081,  0.5959,
          1.0880,  0.4522]], grad_fn=<AddmmBackward0>)

In [51]:
optimizer = optim.SGD(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [57]:
NUM_EPOCHS = 30

for epoch in range(NUM_EPOCHS):
    model.train()
    print(f"Epoch {epoch} in progress...")
    train_loss = 0
    for batch_id, (X, y) in enumerate(train_loader):
        # print(X.shape, y.shape)
        y_pred = model.forward(X)
        loss = criterion(y_pred, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
    print(f"Train Loss is {train_loss/len(train_loader)}")

    model.eval()

    test_loss = 0
    predictions = []
    for (X,y) in test_loader:
        y_t = model.forward(X)
        predictions.append(y_t)
        loss = criterion(y_t, y)
        
        test_loss += loss.item()

    print(f"Test Loss is {test_loss/len(test_loader)}\n")







Epoch 0 in progress...
Train Loss is 1.3865432147979737
Test Loss is 1.3987116117477416

Epoch 1 in progress...
Train Loss is 1.3823703635931015
Test Loss is 1.394538322766622

Epoch 2 in progress...
Train Loss is 1.3784174979130428
Test Loss is 1.3889789582888286

Epoch 3 in progress...
Train Loss is 1.3744904791514079
Test Loss is 1.385541622797648

Epoch 4 in progress...
Train Loss is 1.370820561726888
Test Loss is 1.3839726723035177

Epoch 5 in progress...
Train Loss is 1.3670745839277902
Test Loss is 1.3796058448155721

Epoch 6 in progress...
Train Loss is 1.3635107872883478
Test Loss is 1.375654894987742

Epoch 7 in progress...
Train Loss is 1.3604207207759222
Test Loss is 1.3726336534818013

Epoch 8 in progress...
Train Loss is 1.356920174519221
Test Loss is 1.368545304775238

Epoch 9 in progress...
Train Loss is 1.3536842449903488
Test Loss is 1.3649768125216166

Epoch 10 in progress...
Train Loss is 1.3502974259058635
Test Loss is 1.3642506078084309

Epoch 11 in progress...
Tr

This covers the fundamental concepts of PyTorch. 

Some next topics:
- Dataset and DataLoader
- More complex neural network architectures
- Transfer learning
- Custom loss functions
- Model saving and loading
- Batch processing
- Regularization techniques
