# Chapter 8 - Introduction to Pytorch

## Installing Pytorch

In [1]:
%pip install torch

Note: you may need to restart the kernel to use updated packages.


In [2]:
import torch
torch.__version__

'2.4.1+cpu'

## Comparing Tensors with NumPy Arrays

In [3]:
import torch
import numpy as np

# NumPy array
np_array = np.array([[1, 2], [3, 4]])

# PyTorch Tensor
tensor = torch.tensor([[1, 2], [3, 4]])

# Converting between NumPy arrays and PyTorch tensors
tensor_from_numpy = torch.from_numpy(np_array)
numpy_from_tensor = tensor.numpy()


## Broadcasting

In [4]:
import torch

# Broadcasting example
tensor_a = torch.tensor([[1, 2], [3, 4]])
tensor_b = torch.tensor([10, 20])
result = tensor_a + tensor_b
print(result)


tensor([[11, 22],
        [13, 24]])


## Slicing and indexing

In [5]:
tensor = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
# Slicing the first row
print(tensor[0, :])
# Modifying the first element
tensor[0, 0] = 99
print(tensor)


tensor([1, 2, 3])
tensor([[99,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]])


## GPU Accleration with CUDA

In [6]:
import torch
import time

# Perform matrix multiplication on CPU
tensor_cpu = torch.randn(10000, 10000)
start_time = time.time()
tensor_cpu.matmul(tensor_cpu)
print(f"CPU Time: {time.time() - start_time} seconds")

# Perform matrix multiplication on GPU (if available)
if torch.cuda.is_available():
    tensor_gpu = tensor_cpu.cuda()
    start_time = time.time()
    tensor_gpu.matmul(tensor_gpu)
    print(f"GPU Time: {time.time() - start_time} seconds")


CPU Time: 13.120132207870483 seconds


## multi-GPU setups

In [7]:
import torch
import torch.nn as nn

# Define a simple neural network model
class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(10, 50)
        self.fc2 = nn.Linear(50, 2)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Instantiate the model
model = SimpleNet()

# Check if multiple GPUs are available and apply DataParallel
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.device_count() > 1:
    model = nn.DataParallel(model)

# Move the model to the GPU(s)
model.to(device)

# Create dummy input for testing
input_data = torch.randn(64, 10).to(device)  # Batch of 64, input size 10

# Forward pass (distributed across GPUs if available)
output = model(input_data)
print(output)

tensor([[ 4.8184e-02,  7.7248e-02],
        [ 2.1718e-01,  6.0830e-03],
        [ 2.7460e-01,  1.2461e-01],
        [ 3.7005e-01,  2.7445e-01],
        [ 8.0467e-02,  2.9555e-02],
        [-7.0795e-02, -1.7445e-01],
        [ 3.1169e-02, -9.6348e-02],
        [ 5.9631e-02,  2.7559e-02],
        [ 1.0945e-01,  7.8810e-02],
        [-1.2917e-03,  8.9544e-02],
        [ 1.6125e-01,  5.5902e-05],
        [ 4.8137e-01,  2.7564e-01],
        [ 8.3060e-02,  1.4095e-02],
        [ 1.8755e-01,  4.8861e-01],
        [ 3.0322e-01, -2.5338e-02],
        [-8.9329e-04,  1.5688e-02],
        [-4.6770e-02, -2.8562e-01],
        [ 1.6438e-01,  2.9836e-02],
        [ 2.2067e-01,  8.5723e-02],
        [ 9.8859e-02,  6.8470e-02],
        [ 1.2108e-01, -2.8894e-01],
        [ 2.6080e-01,  5.5530e-02],
        [ 2.6808e-01,  2.9986e-01],
        [ 4.0351e-01,  1.5557e-01],
        [-5.2741e-03, -1.0963e-01],
        [ 2.5599e-01,  2.3856e-01],
        [ 1.7228e-02, -1.3740e-01],
        [ 1.7132e-01,  8.849

# Automatic Differentiation and Gradients
## Using Autograd to Compute Gradients

In [8]:
x = torch.tensor([2.0], requires_grad=True)
y = x ** 2
y.backward()  # Compute the gradient
print(x.grad)  # Gradient of y with respect to x is 2x


tensor([4.])


In [9]:
x = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)
y = x ** 3  # Some arbitrary operation
y.backward(torch.tensor([1.0, 0.5, 0.25]))  # Custom gradient
print(x.grad)

tensor([3.0000, 6.0000, 6.7500])


In [10]:
x = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)
y = x ** 3  # Some arbitrary operation
y.backward(torch.tensor([1.0, 0.5, 0.25]))  # Custom gradient
print(x.grad)


tensor([3.0000, 6.0000, 6.7500])


# Building Neural Networks in PyTorch

## Creating a Simple Neural Network

In [11]:
import torch.nn as nn

class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(4, 16)  # Input layer (4 features) to hidden layer (16 neurons)
        self.fc2 = nn.Linear(16, 3)  # Hidden layer to output layer (3 classes)

    def forward(self, x):
        x = torch.relu(self.fc1(x))  # Activation function
        x = self.fc2(x)
        return x


## Simple CNN

In [12]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)  # Input: 1 channel, output: 32 channels
        self.pool = nn.MaxPool2d(2, 2)  # Max pooling with a 2x2 window
        # We will initialize fc1 later dynamically

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # Apply conv, ReLU, and pooling
        x = x.view(x.size(0), -1)  # Flatten the tensor
        # Fully connected layer input size should match flattened size, calculate dynamically
        if not hasattr(self, 'fc1'):
            self.fc1 = nn.Linear(x.size(1), 10)  # Dynamically set based on input size
        x = self.fc1(x)  # Pass through fully connected layer
        return x

# Instantiate the model
model = SimpleCNN()

# Define a dummy input (batch size: 8, channels: 1, height: 28, width: 28)
input_data = torch.randn(8, 1, 28, 28)

# Forward pass
output = model(input_data)
print(output.shape)  # Output should be (8, 10), where 10 is the number of output classes

torch.Size([8, 10])
