# üöÄ PyTorch Intro

Broadcasting is one of PyTorch's most powerful features - it lets you do operations between tensors of different shapes automatically. Let's explore this step by step!

In [None]:
import torch

In [None]:
N = torch.zeros((27, 27), dtype=torch.int32)
# Let's examine our tensor N
print("Our tensor N:")
print(f"Shape: {N.shape}")        # Dimensions of the tensor
print(f"Data type: {N.dtype}")    # What type of numbers it stores
print(f"Device: {N.device}")      # CPU vs GPU
print(f"Total elements: {N.numel()}")  # Total number of elements

# Let's see what it looks like (first 5 rows and columns)
print("\nFirst 5x5 portion of N:")
print(N[:5, :5])

In [None]:
# Let's create different types of tensors to understand the basics
print("=== Creating Different Tensors ===")

# 1. Different ways to make tensors
zeros = torch.zeros(3, 4)           # 3x4 matrix of zeros
ones = torch.ones(2, 3)             # 2x3 matrix of ones  
random = torch.randn(2, 2)          # 2x2 random normal distribution
from_list = torch.tensor([1, 2, 3, 4])
from_2dlist = torch.tensor([[1, 2], [1, 2]])  # From Python list

print("Zeros (3x4):")
print(zeros)
print("\nOnes (2x3):")  
print(ones)
print("\nRandom (2x2):")
print(random)
print("\nFrom list:")
print(from_list)
print("\nFrom 2D list:")
print(from_2dlist)
print("Shape:", from_2dlist.shape)

In [None]:
# Let's play with indexing and slicing (like numpy arrays)
print("=== Indexing and Slicing ===")

# Create a small tensor to experiment with
small = torch.arange(12).reshape(3, 4)  # Numbers 0-11 in 3x4 shape
print("Our test tensor:")
print(small)

# Indexing examples
print(f"\nElement at [1,2]: {small[1, 2]}")           # Single element
print(f"First row: {small[0, :]}")                    # Entire first row  
print(f"Second column: {small[:, 1]}")                # Entire second column
print(f"Top-left 2x2: \n{small[:2, :2]}")           # 2x2 submatrix

# You can also modify parts of tensors
small[0, 0] = 999
print(f"\nAfter changing [0,0] to 999:")
print(small)

In [None]:
# Basic tensor operations
print("=== Basic Tensor Operations ===")

a = torch.tensor([1, 2, 3])
b = torch.tensor([4, 5, 6])

print("a =", a)
print("b =", b)

# Element-wise operations
print(f"\na + b = {a + b}")           # Addition
print(f"a * b = {a * b}")             # Element-wise multiplication  
print(f"a ** 2 = {a ** 2}")           # Squaring

# Matrix operations
matrix1 = torch.tensor([[1, 2], [3, 4]], dtype=torch.float)
matrix2 = torch.tensor([[5, 6], [7, 8]], dtype=torch.float)

print(f"\nMatrix multiplication:")
print(f"matrix1 @ matrix2 = \n{matrix1 @ matrix2}")

# Useful tensor methods
print(f"\nUseful operations:")
print(f"Sum: {a.sum()}")
print(f"Mean of matrix1: {matrix1.mean()}")
print(f"Max of a: {a.max()}")
print(f"Shape of matrix1: {matrix1.shape}")

In [None]:
# Now let's understand what your N tensor is for!
print("=== Understanding the N tensor in context ===")

# Your N is a 27x27 matrix. Why 27? Let's figure it out!
# It's probably for the 26 letters + 1 special character (like '.')

alphabet = 'abcdefghijklmnopqrstuvwxyz'
print(f"Alphabet has {len(alphabet)} letters")
print(f"With the '.' character, that's {len(alphabet) + 1} = 27 total")

# Let's create a mapping
chars = ['.'] + list(alphabet) 
print(f"Our character set: {chars}")
print(f"Total characters: {len(chars)}")

# Create char to index mapping
char_to_idx = {ch: i for i, ch in enumerate(chars)}
idx_to_char = {i: ch for i, ch in enumerate(chars)}

print(f"\nChar to index examples:")
print(f"'.' -> {char_to_idx['.']}")
print(f"'a' -> {char_to_idx['a']}")  
print(f"'z' -> {char_to_idx['z']}")

print(f"\nSo N[i,j] will count how often character i is followed by character j!")

## üéÆ Your Turn to Experiment!

Try running each cell above to see how tensors work. Here are some fun experiments you can try:

1. **Modify the tensors**: Change the numbers in the tensor creation cells
2. **Try different operations**: Add `.transpose()`, `.reshape()`, or `.view()` to existing tensors  
3. **Experiment with indexing**: Try `N[0:5, 10:15]` to see different parts of your N matrix
4. **Check data types**: Try `torch.zeros((3,3), dtype=torch.float64)` vs `torch.int8`

The next cell is a playground for you!

In [None]:
# üß™ PLAYGROUND - Experiment here!
# Try anything you want with tensors

# Example experiments you can try:
# 1. Create your own tensor
my_tensor = torch.tensor([[1, 2, 3], [4, 5, 6]])
print("My tensor:", my_tensor)

# 2. Try some operations
print("Shape:", my_tensor.shape)
print("Transposed:", my_tensor.T)

# Your experiments below:

# üöÄ PyTorch Broadcasting Deep Dive

Broadcasting is one of PyTorch's most powerful features - it lets you do operations between tensors of different shapes automatically. Let's explore this step by step!

In [None]:
# Let's start with the basic broadcasting rules
print("=== Broadcasting Rule 1: Same-size tensors ===")

# Same shapes always work
a = torch.tensor([1, 2, 3])
b = torch.tensor([4, 5, 6])
print(f"a: {a} (shape: {a.shape})")
print(f"b: {b} (shape: {b.shape})")
print(f"a + b = {a + b}")
print()

print("=== Broadcasting Rule 2: Scalar with tensor ===")
# Scalar broadcasts to match any tensor shape
scalar = 10
tensor = torch.tensor([[1, 2], [3, 4]])
print(f"scalar: {scalar}")
print(f"tensor: \n{tensor} (shape: {tensor.shape})")
print(f"scalar + tensor = \n{scalar + tensor}")
print("The scalar 10 gets added to EVERY element!")

In [None]:
# The magic: Different shapes that broadcast together
print("=== Broadcasting Rule 3: Compatible different shapes ===")

# Let's explore dimension compatibility
matrix = torch.tensor([[1, 2, 3], 
                       [4, 5, 6]])  # Shape: (2, 3)

row_vector = torch.tensor([10, 20, 30])  # Shape: (3,) 
col_vector = torch.tensor([[100], [200]])  # Shape: (2, 1)

print(f"Matrix:\n{matrix}")
print(f"Matrix shape: {matrix.shape}")
print()

print(f"Row vector: {row_vector}")
print(f"Row vector shape: {row_vector.shape}")
print(f"Matrix + row vector:\n{matrix + row_vector}")
print("Row vector gets 'copied' to each row of the matrix!")
print()

print(f"Column vector:\n{col_vector}")
print(f"Column vector shape: {col_vector.shape}")
print(f"Matrix + column vector:\n{matrix + col_vector}")
print("Column vector gets 'copied' to each column of the matrix!")

In [None]:
# Let's understand the broadcasting rules step by step
print("=== The Broadcasting Algorithm ===")

def check_broadcast_compatibility(shape1, shape2):
    """Show step-by-step how PyTorch decides if shapes can broadcast"""
    print(f"Shape 1: {shape1}")
    print(f"Shape 2: {shape2}")
    
    # PyTorch compares dimensions from RIGHT to LEFT
    max_dims = max(len(shape1), len(shape2))
    shape1_padded = [1] * (max_dims - len(shape1)) + list(shape1)
    shape2_padded = [1] * (max_dims - len(shape2)) + list(shape2)
    
    print(f"After padding: {shape1_padded} vs {shape2_padded}")
    
    result_shape = []
    compatible = True
    
    for i in range(max_dims):
        dim1, dim2 = shape1_padded[i], shape2_padded[i]
        print(f"Dimension {i}: {dim1} vs {dim2}", end=" -> ")
        
        if dim1 == dim2:
            result_shape.append(dim1)
            print(f"Same size: {dim1}")
        elif dim1 == 1:
            result_shape.append(dim2)
            print(f"Broadcast dim1: {dim2}")
        elif dim2 == 1:
            result_shape.append(dim1)
            print(f"Broadcast dim2: {dim1}")
        else:
            print("INCOMPATIBLE!")
            compatible = False
            break
    
    if compatible:
        print(f"‚úÖ Result shape: {result_shape}")
    else:
        print("‚ùå Cannot broadcast!")
    print()
    return compatible

# Test some examples
check_broadcast_compatibility((2, 3), (3,))      # Works
check_broadcast_compatibility((2, 3), (2, 1))    # Works  
check_broadcast_compatibility((2, 3), (2, 4))    # Fails

In [None]:
# Let's see broadcasting in action with real examples
print("=== Hands-on Broadcasting Examples ===")

# Example 1: Adding bias to each row of a matrix  
matrix = torch.randn(3, 4)
row_bias = torch.tensor([1, 2, 3, 4])  # Shape: (4,)

print("Matrix (3x4):")
print(matrix)
print(f"\nRow bias (4,): {row_bias}")
print(f"\nMatrix + row_bias (broadcasts to 3x4):")
print(matrix + row_bias)
print("The bias gets added to EVERY row!")
print()

# Example 2: Adding bias to each column of a matrix
col_bias = torch.tensor([[10], [20], [30]])  # Shape: (3, 1)
print(f"Column bias (3x1):\n{col_bias}")
print(f"\nMatrix + col_bias (broadcasts to 3x4):")
print(matrix + col_bias)
print("The bias gets added to EVERY column!")
print()

# Example 3: Element-wise operations with broadcasting
A = torch.tensor([[1, 2], [3, 4]])  # (2, 2)
B = torch.tensor([10, 100])         # (2,)
print(f"A (2x2):\n{A}")
print(f"B (2,): {B}")
print(f"A * B (element-wise, broadcasts):\n{A * B}")

In [None]:
# Common broadcasting mistakes and how to debug them
print("=== Common Broadcasting Pitfalls ===")

# Mistake 1: Incompatible shapes
try:
    a = torch.tensor([[1, 2, 3]])        # (1, 3)
    b = torch.tensor([[1], [2]])         # (2, 1) 
    print(f"a.shape: {a.shape}, b.shape: {b.shape}")
    result = a + b  # This actually works! (1,3) + (2,1) -> (2,3)
    print(f"Success! Result shape: {result.shape}")
    print(f"Result:\n{result}")
except RuntimeError as e:
    print(f"Error: {e}")
print()

# Mistake 2: Really incompatible shapes
try:
    a = torch.tensor([[1, 2, 3], [4, 5, 6]])  # (2, 3)
    b = torch.tensor([[1, 2], [3, 4]])         # (2, 2)
    print(f"a.shape: {a.shape}, b.shape: {b.shape}")
    result = a + b  # This will fail!
except RuntimeError as e:
    print(f"‚ùå Error: {e}")
print()

# How to debug: Use unsqueeze to add dimensions
print("=== Debugging with .unsqueeze() ===")
vec = torch.tensor([1, 2, 3])  # Shape: (3,)
print(f"Original vector shape: {vec.shape}")
print(f"After unsqueeze(0): {vec.unsqueeze(0).shape}")  # (1, 3)
print(f"After unsqueeze(1): {vec.unsqueeze(1).shape}")  # (3, 1)
print("Use unsqueeze to add dimensions of size 1 for broadcasting!")

In [None]:
# Advanced broadcasting: Working with higher dimensions
print("=== Advanced Broadcasting: 3D+ Tensors ===")

# Simulate a batch of images: (batch, height, width)
batch_of_images = torch.randn(2, 3, 4)  # 2 images, 3x4 each
print(f"Batch of images shape: {batch_of_images.shape}")

# Add different bias to each image in the batch
batch_bias = torch.tensor([[[1]], [[2]]])  # Shape: (2, 1, 1)
print(f"Batch bias shape: {batch_bias.shape}")
result = batch_of_images + batch_bias
print(f"Result shape: {result.shape}")
print("Each image gets a different bias value!")
print()

# Add different bias to each row of every image  
row_bias = torch.tensor([10, 20, 30])  # Shape: (3,)
print(f"Row bias shape: {row_bias.shape}")
result = batch_of_images + row_bias.unsqueeze(1)  # Make it (3, 1)
print(f"After unsqueeze(1): {row_bias.unsqueeze(1).shape}")
print(f"Result shape: {result.shape}")
print("Every row in every image gets different bias!")
print()

# The power of broadcasting: Complex operations in one line
print("=== Broadcasting enables complex operations ===")
# Normalize each image by its mean (broadcasting magic!)
means = batch_of_images.mean(dim=(1, 2), keepdim=True)  # (2, 1, 1)
normalized = batch_of_images - means
print(f"Mean shape: {means.shape}")
print(f"Normalized shape: {normalized.shape}")
print("Each image normalized by its own mean - all in one operation!")

## üß™ Broadcasting Playground

Now it's your turn! Try these experiments to master broadcasting:

1. **Shape Detective**: Create tensors with shapes `(3, 1)` and `(1, 4)`. What happens when you add them?

2. **Bias Challenge**: Create a `(5, 3)` matrix and add a different bias to each column

3. **Batch Normalization**: Create a `(10, 20, 30)` tensor and subtract the mean of each `(20, 30)` slice

4. **Error Explorer**: Try operations that will fail and see what PyTorch tells you

5. **Dimension Engineering**: Use `unsqueeze()`, `squeeze()`, and `reshape()` to make incompatible shapes work together

In [None]:
# üéØ YOUR BROADCASTING EXPERIMENTS GO HERE!

print("=== Experiment 1: Shape Detective ===")
# Try: (3, 1) + (1, 4) = ?
a = torch.tensor([[1], [2], [3]])  # (3, 1)
b = torch.tensor([[10, 20, 30, 40]])  # (1, 4)
# Your code here:


print("\n=== Experiment 2: Column Bias Challenge ===") 
# Create (5, 3) matrix, add different bias to each column
matrix = torch.randn(5, 3)
# Your code here:


print("\n=== Experiment 3: Batch Normalization ===")
# Create (10, 20, 30) tensor, subtract mean of each (20, 30) slice
data = torch.randn(10, 20, 30)
# Your code here:


print("\n=== Experiment 4: Your Creative Broadcasting ===")
# Try your own broadcasting experiment!
# Your code here: