# Tutorial 1: Introduction to Deep Learning

In [None]:
import torch
import matplotlib.pyplot as plt
import numpy as np

## Part 1: PyTorch Introduction

Creating tensors in Pytorch

In [None]:
tensor_a = torch.tensor([1, 2, 3])
tensor_b = torch.tensor([[1, 2, 3], [4, 5, 6]])

print("Tensor a:", tensor_a)
print("Tensor a shape:", tensor_a.shape)
print('\n')
print("Tensor b:", tensor_b)
print("Tensor b shape:", tensor_b.shape)

Basic operations

In [None]:
sum_tensors = tensor_a + tensor_b[0]
product_tensors = tensor_a * 2

print("Sum of tensors:", sum_tensors)
print("Tensor multiplied by 2:", product_tensors)

Reshaping tensors

In [None]:
reshaped_tensor = tensor_b.view(3, 2)   # Equivalent to tensor_b.reshape(3,2)
print("Reshaped tensor b:", reshaped_tensor)
print("Reshaped tensor b shape:", reshaped_tensor.shape)

Concatenate tensors

In [None]:
# Concatenating tenso
print("Tensor a shape:", tensor_a.shape)
print("Tensor b shape:", tensor_b.shape)

print(50*'-')

try:
  torch.cat([tensor_a, tensor_b])
except Exception as e:
  print(e)

print(50*'-')



In [None]:
print("Tensor A:", tensor_a)
print(tensor_a.unsqueeze(0))
tensor_cat = torch.cat([tensor_a.unsqueeze(0), tensor_b], dim=0)
print("Tensor cat shape:", tensor_cat.shape)

### Automatic differentiation

In [None]:
x = torch.tensor(2.0, requires_grad=True)
def y_func(x):
  return x**2 + 3*x + 1 + torch.sin(x**2)

y = y_func(x)

print('Gradient before backward call:', x.grad)
print(50*'-')

# Compute gradients
y.backward()

# Gradient of y with respect to x
print("dy/dx at x = 2:", x.grad)


### Exercise:
Compute the gradient of the function $f(x) = (x+2y)^2$ at $(x,y) = (1,2)$ using PyTorch.

Computing the derivative of a function

In [None]:
def y_func(x):
  return x**2 + 3*x + 1 + torch.sin(x**2)

# Generating a range of values
x = torch.linspace(-3, 3, steps=100, requires_grad=True)

# Applying the function
y = y_func(x)

# Prepare to store gradients
grads = torch.zeros_like(x)

# Calculate gradients for each element in x
for i in range(x.size(0)):

    # Calculate the function and backpropagate on each element
    y[i].backward(retain_graph=True)

    # Store the computed gradient
    grads[i] = x.grad[i]

# Plotting
plt.plot(x.detach().numpy(), y.detach().numpy(), label='f(x)')
plt.plot(x.detach().numpy(), grads.numpy(), label='df/dx')
plt.xlabel('x')
plt.ylabel('y / df/dx')
plt.legend()
plt.grid()
plt.title('Function and its Gradient')
plt.show()

### Training a very simple neural network

We train a neural network with one linear layer: $x\mapsto xW + b$ using gradient descent.

We want to solve the following optimization problem:
$$\min_{W,b} L([W,b]) = \min_{W,b}\sum_{i=1}^N (x_iW+b - y_i)^2$$

In [None]:
x = torch.tensor([0.2,0.8,0.9,0.3,0.2])                     # input tensor
y = torch.tensor([0,1,1,0,0]).to(dtype=torch.float32)       # expected output

# Weights and biases b + w^T x
w = torch.randn(5, 5, requires_grad=True)
b = torch.randn(5, requires_grad=True)
learning_rate = 0.1

for _ in range(100):

  z = relu(torch.matmul(x, w)+b)
  loss = torch.norm(y-z)
  loss.backward()

  with torch.no_grad():
    w -= learning_rate * w.grad
    b -= learning_rate * b.grad

    # Zero gradients after updating
    w.grad.zero_()
    b.grad.zero_()

print(torch.matmul(x, w) +b)
print("Training error:", loss.item())

### Exercise:
Modify the code above to add a ReLU activation function after the linear layer. You can use the `torch.nn.ReLU` class. The ReLU function is defined as:
$$\text{ReLU}(x) = \max(0,x)$$
The network should now be:
$$x\mapsto \text{ReLU}(xW + b)$$
You first need to define the ReLU function as `relu = torch.nn.ReLU()`, and then apply it to the output of the linear layer.

## Part 2: Building a neural network
#### Source: https://medium.com/@mn05052002/building-a-simple-mlp-from-scratch-using-pytorch-7d50ca66512b

#### Importing libraries

In [None]:
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

#### Generate the dataset

In [None]:
X, y = make_moons(n_samples=1000, noise=0.2, random_state=42)

# TODO: Split the dataset into 80% training and 20% testing sets
# hint: Use the train_test_split function from sklearn.model_selection
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).reshape(-1, 1)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).reshape(-1, 1)

#### Visualize the dataset

In [None]:
plt.scatter(X[:,0], X[:,1], c=y)

#### Create the model
The model should be a simple feedforward neural network with one hidden layer, that computes
$\begin{align}
y = \sigma(\sigma(x W_1 + b_1)W_2 + b_2)
\end{align}
$
where $\sigma$ is the sigmoid activation function, $W_1$ and $W_2$ are the weights of the first and second layer respectively, and $b_1$ and $b_2$ are the biases of the first and second layer respectively.

In [None]:
class SimpleMLP(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleMLP, self).__init__()
        self.W1 = torch.nn.Parameter(torch.randn(input_size, hidden_size))
        self.b1 = torch.nn.Parameter(torch.randn(1, hidden_size))
        self.W2 = torch.nn.Parameter(torch.randn(hidden_size, output_size))
        self.b2 = torch.nn.Parameter(torch.randn(1, output_size))

    def forward(self, X):
        # TODO: Implement the forward pass
        self.z1 = torch.matmul(X, self.W1) + self.b1
        self.a1 = torch.sigmoid(self.z1)  # Hidden layer activation
        self.z2 = torch.matmul(self.a1, self.W2) + self.b2
        self.z = torch.sigmoid(self.z2)  # Output layer activation
        return self.z

In [None]:
def train(model, X, y, epochs=1000, lr=0.01):
        losses = []

        # Initialize the optimizer
        optimizer = torch.optim.SGD(model.parameters(), lr=lr)

        for epoch in range(epochs):
            # Set gradients to zero
            optimizer.zero_grad()

            # TODO: forward pass through the model
            output = model(X)

            #TODO: Compute loss using (Mean Squared Error)
            loss = torch.mean((output - y) ** 2)

            # Backward pass
            loss.backward()

            # Update weights
            optimizer.step()

            losses.append(loss.item())

            if (epoch + 1) % 100 == 0:
                print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")
        return losses

#### Train the model

In [None]:
input_size = 2
hidden_size = 4
output_size = 1
# TODO: Create an instance of the SimpleMLP class
model = SimpleMLP(input_size, hidden_size, output_size)

#Train  model and store the losses
losses = train(model, X_train, y_train, epochs=1000, lr=0.1)

#### Evaluate the model

In [None]:
with torch.no_grad():
    test_output = model.forward(X_test)
    test_output = (test_output > 0.5).float()
accuracy = torch.mean((test_output == y_test).float())
print(f"Test Accuracy: {accuracy.item() * 100:.2f}%")

# Exercise:
- Vary the learning rate between 0.01 and 0.9 and see how it affects the training process.
- Change the optimization algorithm to Adam: `torch.optim.Adam(model.parameters(), lr=0.001)`
- Change the number of hidden units in the neural networks