# Introduction to Neural Networks

## Imports

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

## Convention

In [2]:
d0 = 3
d1 = 2
activation = nn.ReLU()
model = nn.Linear(d0, d1)
model

Linear(in_features=3, out_features=2, bias=True)

In [3]:
dict(model.named_parameters())

{'weight': Parameter containing:
 tensor([[ 0.3177,  0.3732,  0.2700],
         [ 0.1860, -0.0274,  0.4721]], requires_grad=True),
 'bias': Parameter containing:
 tensor([ 0.0197, -0.5723], requires_grad=True)}

In [4]:
model.weight.shape

torch.Size([2, 3])

In [5]:
model.bias.shape

torch.Size([2])

In [6]:
for i in range(d0):
    print(model.weight[0, i])

tensor(0.3177, grad_fn=<SelectBackward0>)
tensor(0.3732, grad_fn=<SelectBackward0>)
tensor(0.2700, grad_fn=<SelectBackward0>)


## Vectorization with `torch.vmap`

In [7]:
from jaxtyping import Float
from torch import Tensor
from beartype import beartype

### Quick intro to jaxtyping

#### Scalars

In [8]:
scalar_type = Float[Tensor, ""]

In [9]:
scalar = torch.tensor(1.0)
scalar.shape

torch.Size([])

In [10]:
isinstance(scalar, scalar_type)

True

In [11]:
non_scalar = torch.tensor([1.0])
non_scalar.shape

torch.Size([1])

In [12]:
isinstance(non_scalar, scalar_type)

False

#### Vectors

In [13]:
vector_type = Float[Tensor, "n"]

In [14]:
isinstance(non_scalar, vector_type)

True

In [15]:
vector = torch.tensor([1.0, 2.0])
vector.shape

torch.Size([2])

In [16]:
isinstance(vector, vector_type)

True

#### Matrices

In [17]:
matrix_type = Float[Tensor, "n m"]

In [18]:
isinstance(vector, matrix_type)

False

In [19]:
matrix = torch.tensor([[1.0, 2.0]])
matrix.shape

torch.Size([1, 2])

In [20]:
isinstance(matrix, matrix_type)

True

In [21]:
another_matrix = torch.tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
another_matrix.shape

torch.Size([2, 3])

In [22]:
isinstance(another_matrix, matrix_type)

True

#### Tensors

In [23]:
tensor_type = Float[Tensor, "b0 b1 b2"]

In [24]:
tensor = torch.tensor([[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0], [7.0, 8.0]]])
tensor.shape

torch.Size([2, 2, 2])

In [25]:
isinstance(tensor, tensor_type)

True

### Quick intro to beartype

In [26]:
def call_my_name(name):
    return f"Hello, {name}!"

In [27]:
call_my_name("John")

'Hello, John!'

In [28]:
call_my_name(123)

'Hello, 123!'

In [29]:
@beartype
def secured_call_my_name(name: str) -> str:
    return f"Hello, {name}!"

In [30]:
secured_call_my_name("John")

'Hello, John!'

In [31]:
secured_call_my_name(123)

BeartypeCallHintParamViolation: Function __main__.secured_call_my_name() parameter name=123 violates type hint <class 'str'>, as int 123 not instance of str.

### Vectorization

On which dimensions should we apply the vectorization?
- Next layer's neurons size
- Number of examples

In [32]:
n = 50
a = torch.rand(n, d0)
a.shape

torch.Size([50, 3])

In [33]:
@beartype
def forward(a: Float[Tensor, "d0"], w: Float[Tensor, "d0"], b: Float[Tensor, ""]) -> Float[Tensor, ""]:
    z = (a * w).sum() + b  # () + () -> ()
    a = activation(z)  # () -> ()
    return a  # ()

In [34]:
dummy_a = torch.rand(d0)
dummy_w = torch.rand(d0)
dummy_b = torch.rand(())
print(dummy_a.shape, dummy_w.shape, dummy_b.shape)

torch.Size([3]) torch.Size([3]) torch.Size([])


In [35]:
forward(dummy_a, dummy_w, dummy_b).shape

torch.Size([])

In [36]:
forward(a[0], model.weight[0], model.bias[0])

tensor(0.4475, grad_fn=<ReluBackward0>)

#### Vectorization over next layer's neurons size

| input | shape in forward | shape in vectorized forward |
| --- | --- | --- |
| a | [d0=3] | [d0=3] |
| w | [d0=3] | [d1=2, d0=3] |
| b | [] | [d1=2] |
| output | [] | [d1=2] |

In [37]:
v1_forward = torch.vmap(forward, in_dims=(None, 0, 0), out_dims=0)

In [38]:
out = v1_forward(a[0], model.weight, model.bias)
out.shape

torch.Size([2])

#### Vectorization over number of examples

| input | shape in forward | shape in vectorized forward |
| --- | --- | --- |
| a | [d0] | [n, d0] |
| w | [d1, d0] | [d1, d0] |
| b | [d1] | [d1] |
| output | [d1] | [n, d1] |

In [39]:
v2_forward = torch.vmap(v1_forward, in_dims=(0, None, None), out_dims=0)

In [40]:
final_out = v2_forward(a, model.weight, model.bias)
final_out.shape

torch.Size([50, 2])

In [41]:
model_out = F.relu(model(a))
torch.allclose(final_out, model_out)

True

## XOR example

### Define inputs, outputs, weights and biases

In [64]:
xor_x = torch.tensor([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]])
print(xor_x)
print(xor_x.shape)

tensor([[0., 0.],
        [0., 1.],
        [1., 0.],
        [1., 1.]])
torch.Size([4, 2])


In [65]:
xor_y = torch.tensor([[0.0], [1.0], [1.0], [0.0]])
print(xor_y)
print(xor_y.shape)

tensor([[0.],
        [1.],
        [1.],
        [0.]])
torch.Size([4, 1])


In [66]:
W_1 = torch.tensor([[1.0, 1.0], [1.0, 1.0]])
b_1 = torch.tensor([0.0, -1.0])
print(W_1)
print(b_1)
print(W_1.shape, b_1.shape)

tensor([[1., 1.],
        [1., 1.]])
tensor([ 0., -1.])
torch.Size([2, 2]) torch.Size([2])


In [67]:
W_2 = torch.tensor([[1.0, -2.0]])
b_2 = torch.tensor([0.0])
print(W_2)
print(b_2)
print(W_2.shape, b_2.shape)

tensor([[ 1., -2.]])
tensor([0.])
torch.Size([1, 2]) torch.Size([1])


### Forward pass

In [70]:
a1 = v2_forward(xor_x, W_1, b_1)
a1

tensor([[0., 0.],
        [1., 0.],
        [1., 0.],
        [2., 1.]])

In [71]:
a2 = v2_forward(a1, W_2, b_2)
a2

tensor([[0.],
        [1.],
        [1.],
        [0.]])