In [1]:
import torch

# Creating tensors (multi-dimensional arrays)
x = torch.tensor([1.0, 2.0, 3.0])  # 1D tensor
y = torch.tensor([[1.0, 2.0], [3.0, 4.0]])  # 2D tensor

print(x.shape)  # torch.Size([3])
print(y.shape)  # torch.Size([2, 2])

torch.Size([3])
torch.Size([2, 2])


Then y = xW^T + b has shape (32, 20)


In [3]:
import torch.nn as nn

# The simplest neural network layer
linear = nn.Linear(in_features=2, out_features=3)
# This creates a layer that transforms input of size 2 to output of size 3
# It contains weights (2x3) and biases (3)

# 4 data points
input_data = torch.tensor([[1.0, 2.0], [3.0, 4.0], [3.0, 4.0], [3.0, 4.0]])
output = linear(input_data)
print(output.shape)

torch.Size([4, 3])


In [4]:
# Activation functions add non-linearity
activation = nn.ReLU()  # Rectified Linear Unit: f(x) = max(0, x)
output = activation(linear(input_data))

In [5]:
# Method 1: Sequential container (chains modules in order)
model = nn.Sequential(
    nn.Linear(2, 10),  # First layer: 2 inputs → 10 hidden neurons
    nn.ReLU(),  # Activation function
    nn.Linear(10, 1),  # Output layer: 10 hidden → 1 output
)


# Method 2: Custom class (more flexible)
class SimpleNetwork(nn.Module):
    def __init__(self):
        super(SimpleNetwork, self).__init__()
        self.layer1 = nn.Linear(2, 10)
        self.relu = nn.ReLU()
        self.layer2 = nn.Linear(10, 1)

    def forward(self, x):
        x = self.layer1(x)
        x = self.relu(x)
        x = self.layer2(x)
        return x


model2 = SimpleNetwork()

In [6]:
# Manual initialization (what you're doing in your code)
def init_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.uniform_(m.weight, -0.001, 0.001)
        nn.init.uniform_(m.bias, -0.001, 0.001)


model.apply(init_weights)  # Apply to all layers

Sequential(
  (0): Linear(in_features=2, out_features=10, bias=True)
  (1): ReLU()
  (2): Linear(in_features=10, out_features=1, bias=True)
)

In [None]:
loss_fn = nn.MSELoss()  # Mean Squared Error loss

# Calculate loss between predicted and target values
target = torch.tensor([[0.5], [1.0]])
loss = loss_fn(output, target)  # Scalar value measuring prediction error

In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)  # Stochastic Gradient Descent

In [None]:
# Basic training step
optimizer.zero_grad()      # Clear previous gradients
loss.backward()            # Compute gradients
optimizer.step()           # Update weights based on gradients