<a href="https://colab.research.google.com/github/lucib3196/llm-sandbox/blob/main/PyTorch_Intro.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install torch

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

## Tensor Creation

In [8]:
import torch
tensor0d = torch.tensor(1)
tensor1d = torch.tensor([1,2,3])
tensor2d = torch.tensor([[1,2,3],[4,5,6]])
tensor3d = torch.tensor([[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]]])
print(tensor3d.dtype)

torch.int64


## Linear Regression with Pytorch

In [16]:
import torch.nn.functional as F
from torch.autograd import grad

y = torch.tensor([1.0]) # True Label
x1 = torch.tensor([1.1]) # Input Feature
w1 = torch.tensor([2.2], requires_grad=True) # Weight
b = torch.tensor([0.0], requires_grad=True) # Bias
z = x1*w1+b # Net Input
a = torch.sigmoid(z) # Activationaand output
loss = F.binary_cross_entropy(a,y)
print(loss)

grad_L_w1 = grad(loss,w1,retain_graph=True)
grad_L_b = grad(loss,b,retain_graph=True)
print(grad_L_w1)
print(grad_L_b)

tensor(0.0852, grad_fn=<BinaryCrossEntropyBackward0>)
(tensor([-0.0898]),)
(tensor([-0.0817]),)


## Custom Neural Network

In [33]:
class NeuralNetwork(torch.nn.Module):
  def __init__(self,num_inputs, num_outputs):
    super().__init__()

    self.layers = torch.nn.Sequential(
        torch.nn.Linear(num_inputs,30),
        torch.nn.ReLU(),

        torch.nn.Linear(30,20),
        torch.nn.ReLU(),

        torch.nn.Linear(20,num_outputs))

  def forward(self,x):
    logits = self.layers(x)
    return logits

torch.manual_seed(69)
model = NeuralNetwork(50,3)
print(model)
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(num_params)
print(model.layers[0].weight)
print(model.layers[0].weight.shape)

# Compute
X = torch.randn(1,50)
print(X)
with torch.no_grad():
  logits = model(X)
print(logits)

NeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=50, out_features=30, bias=True)
    (1): ReLU()
    (2): Linear(in_features=30, out_features=20, bias=True)
    (3): ReLU()
    (4): Linear(in_features=20, out_features=3, bias=True)
  )
)
2213
Parameter containing:
tensor([[ 0.0961,  0.0860, -0.1071,  ...,  0.0042,  0.0751,  0.0123],
        [-0.0347, -0.0550,  0.0501,  ...,  0.0003,  0.0734,  0.0525],
        [-0.0395, -0.0379,  0.1234,  ..., -0.1394, -0.0156,  0.0926],
        ...,
        [ 0.0267, -0.0669, -0.1213,  ..., -0.0425, -0.1011, -0.1052],
        [ 0.1216,  0.0402, -0.0705,  ...,  0.0799,  0.0231, -0.0458],
        [ 0.0467,  0.0275, -0.1199,  ..., -0.0855, -0.0845, -0.1152]],
       requires_grad=True)
torch.Size([30, 50])
tensor([[ 0.1270, -0.4677, -0.8362, -0.7942, -0.0465, -1.4539, -0.4948, -0.4045,
         -0.4724, -1.1489,  1.0102, -1.1374,  1.2405, -0.1574, -0.8747, -1.4901,
         -0.2081,  0.4571, -1.4474, -0.6524,  0.0815,  0.4358, -0.0553,  

## Setting up Efficient Data Loaders

In [34]:
X_train = torch.tensor([
    [-1.2, 3.1],
    [-0.9,2.9],
    [-0.5,2.6],
    [2.3,-1.1],
    [2.7,-1.5]
])

y_train = torch.tensor([0,0,0,1,1])

X_test = torch.tensor([
    [-0.8,2.8],
    [2.6,-1.6]
])

y_test = torch.tensor([0,1])


from torch.utils.data import Dataset

class ToyDataset(Dataset):
  def __init__(self,X,y):
    self.features = X
    self.labels = y
  def __getitem__(self,index):
    one_x = self.features[index]
    one_y = self.labels[index]
    return one_x,one_y
  def __len__(self):
    return self.labels.shape[0]

train_ds = ToyDataset(X_train,y_train)
test_ds = ToyDataset(X_test,y_test)

In [35]:
from torch.utils.data import DataLoader

torch.manual_seed(69)

train_loaded = DataLoader(
    dataset = train_ds,
    batch_size = 2,
    shuffle=True,
    num_workers=0
)


test_loader = DataLoader(
    dataset = test_ds,
    batch_size = 2,
    shuffle=False,
    num_workers=0
)


for idx, (x,y) in enumerate(train_loaded):
  print(f"Batch {idx+1}:", x, y)

Batch 1: tensor([[-0.9000,  2.9000],
        [-1.2000,  3.1000]]) tensor([0, 0])
Batch 2: tensor([[ 2.3000, -1.1000],
        [-0.5000,  2.6000]]) tensor([1, 0])
Batch 3: tensor([[ 2.7000, -1.5000]]) tensor([1])


## Neural Network

In [42]:
import torch.nn.functional as F

torch.manual_seed(45)
model = NeuralNetwork(2,2)
optimizer = torch.optim.SGD(model.parameters(),lr=0.5)
num_epochs = 3
for epoch in range(num_epochs):
  model.train()
  for batch_idx, (features, labels) in enumerate(train_loaded):
    logits = model(features)

    loss = F.cross_entropy(logits,labels)
    optimizer.zero_grad() # Set the gradiants from the previous to 0
    loss.backward() # Computes the gradiant of the loss
    optimizer.step() # Update the gradiant


    print(f"Epoch {epoch+1:03d}/{num_epochs:03d}\n Batch {batch_idx}/{len(train_loaded)}\n Train Loss {loss:.2f}")

  model.eval()
  with torch.no_grad():
    outputs = model(X_train)
  print(outputs)
  torch.set_printoptions(sci_mode=False)
  probas = F.softmax(outputs,dim=1)
  print(probas)
  predictions = torch.argmax(probas,dim=1)
  print(predictions)


Epoch 001/003
 Batch 0/3
 Train Loss 0.73
Epoch 001/003
 Batch 1/3
 Train Loss 0.31
Epoch 001/003
 Batch 2/3
 Train Loss 0.06
tensor([[ 2.5766, -2.1943],
        [ 2.3748, -2.0220],
        [ 2.0880, -1.7799],
        [-0.4528,  1.0581],
        [-0.5114,  1.2596]])
tensor([[0.9916, 0.0084],
        [0.9878, 0.0122],
        [0.9795, 0.0205],
        [0.1808, 0.8192],
        [0.1454, 0.8546]])
tensor([0, 0, 0, 1, 1])
Epoch 002/003
 Batch 0/3
 Train Loss 0.01
Epoch 002/003
 Batch 1/3
 Train Loss 0.11
Epoch 002/003
 Batch 2/3
 Train Loss 0.05
tensor([[ 2.8130, -2.4231],
        [ 2.5870, -2.2275],
        [ 2.2664, -1.9539],
        [-1.3864,  2.1732],
        [-1.6052,  2.5702]])
tensor([[0.9947, 0.0053],
        [0.9920, 0.0080],
        [0.9855, 0.0145],
        [0.0277, 0.9723],
        [0.0151, 0.9849]])
tensor([0, 0, 0, 1, 1])
Epoch 003/003
 Batch 0/3
 Train Loss 0.02
Epoch 003/003
 Batch 1/3
 Train Loss 0.01
Epoch 003/003
 Batch 2/3
 Train Loss 0.00
tensor([[ 3.1600, -2.7700],
  