# 1. Make classification data and get it ready

In [None]:
from sklearn.datasets import make_circles

# Make 1000 samples
n_samples = 1000

# Create circles
X, y = make_circles(n_samples,
                    noise=0.03, # a little bit of noise to the dots
                    random_state=42) # keep random state so we get the same value

In [None]:
print(f'First 5 X features:\n{X[:5]}')
print(f'First 5 y labels:\n{y[:5]}')

In [None]:
# Make DataFrame of circle data
import pandas as pd

circles = pd.DataFrame({
    "X1": X[:, 0],
    "X2": X[:, 1],
    "label": y
})
circles.head()

In [None]:
# Check different labels
circles.label.value_counts()

In [None]:
# Visualize with a plot
import matplotlib.pyplot as plt

plt.scatter(X[:,0], X[:,1], c=y, cmap=plt.cm.PiYG)

## 1.1 Input and output shapes

In [None]:
# Check the shapes of our features and labels
X.shape, y.shape

In [None]:
# View the first example of features and labels
X_sample = X[0]
y_sample = y[0]
print(f'Values for one sample of X: {X_sample} and the same for y: {y_sample}')
print(f'Shape for one sample of X: {X_sample.shape} and the same for y: {y_sample.shape}')

## 1.2 Turn data into tensors and create train and test splits

In [None]:
# Turn data into tensors
# Otherwise this causes issues with computations later on
import torch
X = torch.from_numpy(X).type(torch.float)
y = torch.from_numpy(y).type(torch.float)

# View the first five samples
X[:5], y[:5]

In [None]:
# Split data into train and test sets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2, # 20% test, 80% train
                                                    random_state=42) # make the random split reproducible

len(X_train), len(X_test), len(y_train), len(y_test)

# 2. Building a model

In [None]:
# Standard PyTorch imports
import torch
from torch import nn

# Make device agnostic code
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

In [None]:
# 1. Construct a model class that subclasses nn.Module
class CircleModelV0(nn.Module):
    def __init__(self):
        super().__init__()
        # 2. Create 2 nn.Linear layers capable of handling X and y input and output shapes
        self.layer_1 = nn.Linear(in_features=2, out_features=5) # takes in 2 features (X), produces 5 features
        self.layer_2 = nn.Linear(in_features=5, out_features=1) # takes in 5 features, produces 1 feature (y)

    # 3. Define a forward method containing the forward pass computation
    def forward(self, x):        
        # Return the output of layer_2, a single feature, the same shape as y
        return self.layer_2(self.layer_1(x)) # computation goes through layer_1 first then the output of layer_1 goes through layer_2


# 4. Create an instance of the model and send it to target device
model_0 = CircleModelV0().to(device)
model_0

In [None]:
# Replicate CircleModelV0 with nn.Sequential
model_0_seq = nn.Sequential(
    nn.Linear(in_features=2, out_features=5),
    nn.Linear(in_features=5, out_features=1),
).to(device)

model_0_seq

In [None]:
# Make predictions with the model
untrained_preds = model_0(X_test.to(device))

print(f"Length of predictions: {len(untrained_preds)}, Shape: {untrained_preds.shape}")
print(f"Length of test samples: {len(y_test)}, Shape: {y_test.shape}")
print(f"\nFirst 10 predictions:\n{untrained_preds[:10]}")
print(f"\nFirst 10 test labels:\n{y_test[:10]}")

## 2.1 Setup loss function and optimizer

In [None]:
# Create a loss function
# loss_fn = nn.BCELoss() # BCELoss = no sigmoid built-in
loss_fn = nn.BCEWithLogitsLoss() # BCEWithLogits = sigmoid built-in

# Create an optimizer
optimizer = torch.optim.SGD(params=model_0.parameters(), lr=0.1)

In [None]:
# Calculate accuracy (a classification metric)
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item() # torch.eq() calculates where two tensors are equal
    acc = (correct / len(y_pred)) * 100
    return acc

# 3. Train model

## 3.1 Going from raw model outputs to predicted labels

In [None]:
# View the first 5 outputs of the forward pass on the test data
y_logits = model_0(X_test.to(device))
y_logits[:5]

In [None]:
# Use sigmoid on model logits
y_pred_probs = torch.sigmoid(y_logits)
y_pred_probs[:5]

In [None]:
# Find the predicted labels (round the prediction probabilities)
y_preds = torch.round(y_pred_probs)

# In full
y_pred_labels = torch.round(torch.sigmoid(model_0(X_test.to(device))))

# Check for equality
print(torch.eq(y_preds.squeeze(), y_pred_labels.squeeze())[:5])

# Get rid of extra dimension
y_preds.squeeze()[:5]

In [None]:
y_test[:5]

## 3.2 Building a training and testing loop

In [None]:
torch.manual_seed(42)

# Set the number of epochs
epochs = 100

# Put data to target device
X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)

# Building training and evaluation loop
for epoch in range(epochs):
    ### Training
    model_0.train()

    # 1. Forward pass (model outputs raw logits)
    train_logits = model_0(X_train).squeeze() # squeeze to remove extra '1' dimension, this won't work unless model anda data are on same device
    train_preds = torch.round(torch.sigmoid(train_logits)) # turn logits -> pred probs -> pred labels

    # 2. Calculate loss/accuracy
    train_loss = loss_fn(train_logits, y_train) # Using nn.BCEWithLogitsLoss works with raw logits
    train_acc = accuracy_fn(y_train, train_preds)

    # 3. Zero grad
    optimizer.zero_grad()

    # 4. Loss backwards
    train_loss.backward()

    # 5. Step optimizer
    optimizer.step()

    ### Testing
    if epoch % 10 == 0:
        model_0.eval()

        with torch.inference_mode():
            # 1. Forward pass
            test_logits = model_0(X_test).squeeze()
            test_preds = torch.round(torch.sigmoid(test_logits))

            # 2. Calculate loss/accuracy
            test_loss = loss_fn(test_logits, y_test)
            test_acc = accuracy_fn(y_test, test_preds)

        # Print out what's happening every 10 epochs
        if epoch % 10 == 0:
            print(f"Epoch: {epoch} | Loss: {train_loss:.5f}, Accuracy: {train_acc:.2f}% | Test loss: {test_loss:.5f}, Test acc: {test_acc:.2f}%")

# 4. Make predictions and evaluate the model

In [None]:
import requests
from pathlib import Path 

# Download helper functions from Learn PyTorch repo (if not already downloaded)
if Path("helper_functions.py").is_file():
  print("helper_functions.py already exists, skipping download")
else:
  print("Downloading helper_functions.py")
  request = requests.get("https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/main/helper_functions.py")
  with open("helper_functions.py", "wb") as f:
    f.write(request.content)

from helper_functions import plot_predictions, plot_decision_boundary

In [None]:
# Plot decision boundaries for traininig and test sets
plt.figure(figsize =(8, 4))
plt.subplot(1,2,1)
plt.title('Train')
plot_decision_boundary(model_0, X_train, y_train)
plt.subplot(1,2,2)
plt.title('Test')
plot_decision_boundary(model_0, X_test, y_test)

# 5. Improving a model (from a model perspective)

In [None]:
class CircleModelV1(nn.Module):
    def __init__(self):
        super().__init__()

        # three linear layers with input 2 output 1 and 10 hidden layers
        self.fc1 = nn.Linear(2, 10)
        self.fc2 = nn.Linear(10, 2)
        self.fc3 = nn.Linear(2, 1)

    def forward(self, x):
        # Creating a model like this is the same as belos, though below
        # generally benefits from speedups where possible
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x

model_1 = CircleModelV1().to(device)
model_1

In [None]:
# create loss function with BCE with logits and SGD optimizer with 0.1 of lr
# loss_fn = nn.BCELoss() # Requieres sigmoid on input
loss_fn = nn.BCEWithLogitsLoss() # Does not requieres sigmoid on input
optimizer = torch.optim.SGD(model_1.parameters(), lr=0.1)

In [None]:
torch.manual_seed(42)

epochs = 1000 # Train for longer

# Put data to target device
X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)

for epoch in range(epochs):
    ### Training
    # 1. Forward
    train_logits = model_1(X_train).squeeze()
    train_pred = torch.round(torch.sigmoid(train_logits)) # logits -> prediction probabilities -> predictions label

    # 2. Calculate loss/accuracy
    train_loss = loss_fn(train_logits, y_train)
    train_acc = accuracy_fn(y_train, train_pred)

    # 3. Optimizer zero grad
    optimizer.zero_grad()

    # 4. Loss backwards
    train_loss.backward()

    # 5. Optimizer step
    optimizer.step()

    if epoch % 100 == 0:
        ### Testing
        model_1.eval()
        with torch.inference_mode():
            # 1. Forward pass
            test_logits = model_1(X_test).squeeze()
            test_pred = torch.round(torch.sigmoid(test_logits))

            # 2. Calculate loss/accuracy
            test_loss = loss_fn(test_logits, y_test)
            test_acc = accuracy_fn(y_test, test_pred)

        # Print out what's happening every 10 epochs
        if epoch % 100 == 0:
            print(f"Epoch: {epoch} | Train Loss: {train_loss:.5f}, Train Accuracy: {train_acc:.2f}% | Test loss: {test_loss:.5f}, Test acc: {test_acc:.2f}%")

In [None]:
# Plot decision boundaries for training and test
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.title("Train")
plot_decision_boundary(model_1, X_train, y_train)

plt.subplot(1, 2, 2)
plt.title("Test")
plot_decision_boundary(model_1, X_test, y_test)

## 5.1 Preparing data to see if our model can model a straight line

In [None]:
# Create some data (same as notebook 01)
weight = 0.7
bias = 0.3
start = 0
end = 1
step = 0.01

# Create data
X_regression = torch.arange(start, end, step).unsqueeze(dim=1)
y_regression = weight * X_regression + bias # linear regression formula

# Check the data
print(len(X_regression))
X_regression[:5], y_regression[:5]

In [None]:
# Create train and test splits
train_split = int(0.8 * len(X_regression)) # 80% of data used for training set
X_train_regression, y_train_regression = X_regression[:train_split], y_regression[:train_split]
X_test_regression, y_test_regression = X_regression[train_split:], y_regression[train_split:]

# Check the lengths of each split
print(len(X_train_regression), 
    len(y_train_regression), 
    len(X_test_regression), 
    len(y_test_regression))

In [None]:
plot_predictions(train_data=X_train_regression,
    train_labels=y_train_regression,
    test_data=X_test_regression,
    test_labels=y_test_regression
)

## 5.2 Adjusting ```model_1``` to fit a straight line

In [None]:
# Same architecture as model_1 (but using sequential)
model_2 = nn.Sequential(
    nn.Linear(1,10),
    nn.Linear(10,10),
    nn.Linear(10, 1)
).to(device)

model_2

In [None]:
# Loss and optimizer
loss_fn = nn.L1Loss()
optimizer = torch.optim.SGD(model_2.parameters(), lr=0.1)

In [None]:
# Train the model

# Set the number of epochs
epochs = 1000

# Put data to target device
X_train_regression, y_train_regression = X_train_regression.to(device), y_train_regression.to(device)
X_test_regression, y_test_regression = X_test_regression.to(device), y_test_regression.to(device)

for epoch in range(epochs):
    ### Training
    # 1. Forward pass
    train_pred = model_2(X_train_regression)

    # 2. Calculate loss/acc
    train_loss = loss_fn(train_pred, y_train_regression)

    # 3. Optim zero grad
    optimizer.zero_grad()

    # 4. Loss backwards
    train_loss.backward()

    # 5. Optim step
    optimizer.step()

    if epoch % 100 == 0:
        ### Testing
        model_2.eval()
        with torch.inference_mode():
            # 1. Forward pass
            test_pred = model_2(X_test_regression)

            # 2. Calculate loss/acc
            test_loss = loss_fn(test_pred, y_test_regression)

        # Print out what's happening every 10 epochs
        if epoch % 100 == 0:
            print(f"Epoch: {epoch} | Train Loss: {train_loss:.5f} | Test loss: {test_loss:.5f}")

In [None]:
# Plot data and predictions with data on the CPU (matplotlib can't handle data on the GPU)
# (try removing .cpu() from one of the below and see what happens)
plot_predictions(train_data=X_train_regression.cpu(),
                 train_labels=y_train_regression.cpu(),
                 test_data=X_test_regression.cpu(),
                 test_labels=y_test_regression.cpu(),
                 predictions=test_pred.cpu());

# 6. The missing piece: non-linearity

## 6.1 Recreating non-linear data (red and blue circles)

In [None]:
# Make and plot data
import matplotlib.pyplot as plt
from sklearn.datasets import make_circles

n_samples = 1000

X, y = make_circles(n_samples=n_samples, noise=0.03, random_state=42)

plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.RdBu)

In [None]:
# Convert to tensors and split into train and test sets
import torch
from sklearn.model_selection import train_test_split

# Turn data into tensors
X = torch.from_numpy(X).type(torch.float)
y = torch.from_numpy(y).type(torch.float)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train[:5], y_train[:5]

## 6.2 Building a model with non-linearity

In [None]:
# Build model with non-linear activation function
from torch import nn

class CircleModelV2(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer_1 = nn.Linear(2, 10)
        self.layer_2 = nn.Linear(10, 10)
        self.layer_3 = nn.Linear(10, 1)
        self.relu = nn.ReLU() # <- add in ReLu activation function

        # Can also put sigmoid in the model
        # This would mean you don't need to use it on the predictions
        #self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # Interspere the ReLu activation function between layers
        x = self.layer_1(x)
        x = self.relu(x)
        x = self.layer_2(x)
        x = self.relu(x)
        return self.layer_3(x)

model_3 = CircleModelV2().to(device)
print(model_3)

In [None]:
# Setup loss and optimizer
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(model_3.parameters(), lr=0.1)

In [None]:
# Fit the model
torch.manual_seed(42)
epochs = 1000

# Send data to save device as model
X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)

for epoch in range(epochs):
    ### Testing

    # 1. Forward pass
    train_logits = model_3(X_train).squeeze()
    train_preds = torch.round(torch.sigmoid(train_logits)) # logits -> prediction probabilities -> prediction labels

    # 2. Calculate loss and acc 
    train_loss = loss_fn(train_logits, y_train) # BCEWithLogitsLoss calculates loss using logits
    train_acc = accuracy_fn(y_train, train_preds)

    # 3. Optim zero grad
    optimizer.zero_grad()

    # 4. Loss backwards
    train_loss.backward()

    # 5. Optim step
    optimizer.step()

    ### Testing
    model_3.eval()
    with torch.inference_mode():
        # 1. Forward pass
        test_logits = model_3(X_test).squeeze()
        test_preds = torch.round(torch.sigmoid(test_logits)) # logits -> prediction probabilities -> prediction labels

        # 2 Calculate loss and acc
        test_loss = loss_fn(test_logits, y_test)
        test_acc = accuracy_fn(y_test, test_preds)

    # Print out what's happening
    if epoch % 100 == 0:
        print(f"Epoch: {epoch} | Train Loss: {train_loss:.5f}, Train Accuracy: {train_acc:.2f}% | Test Loss: {test_loss:.5f}, Test Accuracy: {test_acc:.2f}%")





In [None]:
# Plot decision boundaries for training and test sets
plt.figure(figsize=(8, 4))
plt.subplot(1, 2, 1)
plt.title("Test")
plot_decision_boundary(model_1, X_test, y_test) # model_1 = no non-linearity
plt.subplot(1, 2, 2)
plt.title("Test")
plot_decision_boundary(model_3, X_test, y_test) # model_3 = has non-linearity

# 7. Replicating non-linear activation functions

In [None]:
# Create a toy tensor (similar to the data going into our model(s))
A = torch.arange(-10, 10, 1, dtype=torch.float32)
A

In [None]:
# Visualize the toy tensor
plt.plot(A)

In [None]:
# Create ReLu function by hand
def relu(x):
    return torch.maximum(torch.tensor(0), x) # inputs must be tensors

# Pass toy tensor through ReLu function
relu(A)

In [None]:
# Plot ReLu activated toy tensor
plt.plot(relu(A))

In [None]:
# Create a custom sigmoid function
def sigmoid(x):
    return 1 / (1 + torch.exp(-x))

# Test custom sigmoid on toy tensor
sigmoid(A)

In [None]:
# Plot sigmoid activated toy tensor
plt.plot(sigmoid(A))

# 8. Putting things together by building a multi-class PyTorch model

## 8.1 Creating multi-class classification data

In [None]:
# Import dependencies
import torch
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split

In [None]:
# Set the hyperparameters for data creation
NUM_CLASSES = 4
NUM_FEATURES = 2
RANDOM_SEED = 42

# 1. Create multi-class data
X_blob, y_blob = make_blobs(n_samples=1000, 
                            n_features=NUM_FEATURES, # X features 
                            centers=NUM_CLASSES, # y labels
                            cluster_std=1.5, # give the clusters a little shake up (try changing this to 1.0, the default)
                            random_state=RANDOM_SEED)

# 2. Turn data into tensors
X_blob = torch.from_numpy(X_blob).type(torch.float)
y_blob = torch.from_numpy(y_blob).type(torch.LongTensor)
print(X_blob[:5], y_blob[:5])

# 3. Split into train and test sets
X_blob_train, X_blob_test, y_blob_train, y_blob_test = train_test_split(X_blob, y_blob, test_size=0.2, random_state=RANDOM_SEED)

# 4. Plot data
plt.figure(figsize=(8,4))
plt.scatter(X_blob[:, 0], X_blob[:, 1], c=y_blob, cmap=plt.cm.RdYlBu)

## 8.2 Building a multi-class classification model in PyTorch

In [None]:
# Create device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

In [None]:
from torch import nn

# Build model
class BlobModel(nn.Module):
    def __init__(self, input_features, output_features, hidden_features=8) -> None:
        """Initializes all required hyperparameters for a multi-class classification model.

        Args:
            input_features (int): Number of input features to the model.
            ouput_features (int): Number of output features of the model (how many classes there are).
            hidden_units (int): Number of hidden units between layers, default: 8
        """
        super().__init__()
        self.linear_layer_1 = nn.Linear(input_features, hidden_features)
        self.linear_layer_2 = nn.Linear(hidden_features, hidden_features)
        self.linear_layer_3 = nn.Linear(hidden_features, output_features)
        self.relu = nn.ReLU() # Does out dataset require non-linear layers?

    def forward(self, x):
        x = self.linear_layer_1(x)
        x = self.relu(x)
        x = self.linear_layer_2(x)
        x = self.relu(x)
        x = self.linear_layer_3(x)

        return x

# Create an instance of BlobModel and send it to the target device
model_4 = BlobModel(NUM_FEATURES, NUM_CLASSES, 8).to(device)

model_4

## 8.3 Creating a loss function and optimizer for a multi-class PyTorch model

In [None]:
# Create loss and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model_4.parameters(), lr=0.1) # experiment with different values for lr)

## 8.4 Getting prediction probabilities for a multi-class PyTorch model

In [None]:
# Perform a single forward pass on the data (we'll need to put it to the target device for it to work)
model_4(X_blob_train.to(device))[:5]

In [None]:
# How many elements in a single prediction sample?
model_4(X_blob_train.to(device))[0].shape, NUM_CLASSES

In [None]:
# Make prediction logits with model
y_logits = model_4(X_blob_test.to(device))
print(y_logits[:5])

# Perform softmax calculation on logits across dimension 1 to get prediction probabilities
y_pred_probs = torch.softmax(y_logits, dim=1)
print(y_pred_probs[:5])

In [None]:
# Sum the first sample output of the softmax activation function
torch.sum(y_pred_probs[0])

In [None]:
# Which class does the model think is *most* likely at the index 0 sample?
print(y_pred_probs[0])
print(torch.argmax(y_pred_probs[0]))

## 8.5 Creating a training and testing loop for a multi-class PyTorch model

In [None]:
# Fit the model
torch.manual_seed(42)

# Set number of epochs
epochs = 100

# Put the data to target device
X_blob_train, X_blob_test = X_blob_train.to(device), X_blob_test.to(device)
y_blob_train, y_blob_test = y_blob_train.to(device), y_blob_test.to(device)

for epoch in range(epochs):
    ### Training
    # model_4.train()

    # 1. Forward pass
    train_logits = model_4(X_blob_train)
    train_preds = torch.softmax(train_logits, dim=1).argmax(dim=1)

    # 2. Calculate loss
    train_loss = loss_fn(train_logits, y_blob_train)
    train_acc = accuracy_fn(y_blob_train, train_preds)

    # 3. Optim zero
    optimizer.zero_grad()

    # 4. Loss backwards
    train_loss.backward()

    # 5. Optim step
    optimizer.step()

    ### Test
    if epoch % 10 == 0: # eval cada 10 epochs o cada epoch solo que imprimir cada diez epochs
        model_4.eval() # cambiar a antes o despues
        with torch.inference_mode():

            test_logits = model_4(X_blob_test)
            test_preds = torch.softmax(test_logits, dim=1).argmax(dim=1)

            test_loss = loss_fn(test_logits, y_blob_test)
            test_acc = accuracy_fn(y_blob_test, test_preds)

        print(f"Epoch: {epoch} | Train Loss: {train_loss:.5f}, Train Acc: {train_acc:.2f}% | Test Loss: {test_loss:.5f}, Test Acc: {test_acc:.2f}%")

## 8.6 Making and evaluating predictions with a PyTorch multi-class model

In [None]:
# Make predictions
model_4.eval()
with torch.inference_mode():
    pred_logits = model_4(X_blob_test)

# View the first 10 predictions
pred_logits[:10]

In [None]:
# Turn predicted logits in prediction probabilities
pred_probs = torch.softmax(pred_logits, dim=1)

# Turn predicitons probabilities into prediction labels
pred_labels = pred_probs.argmax(dim=1)

# The above operations can be simplified to simply pass to pred_labels, without having the probabilities
# pred_labels = torch.argmax(pred_logits, dim=1)

# Compare first 10 model preds and test labels
print(f"Predictions: {pred_labels[:10]}\nLabels: {y_blob_test[:10]}")
print(f"Test accuracy: {accuracy_fn(y_true=y_blob_test, y_pred=pred_labels)}%")

In [None]:
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.title("Train")
plot_decision_boundary(model_4, X_blob_train, y_blob_train)
plt.subplot(1, 2, 2)
plt.title("Test")
plot_decision_boundary(model_4, X_blob_test, y_blob_test)

# 9. More classification evaluation metrics

In [None]:
from torchmetrics import Accuracy

# Setup metric and make sure it's on the target device
torchmetrics_accuracy = Accuracy(task='multiclass', num_classes=4).to(device)

# Calculate accuracy
torchmetrics_accuracy(pred_labels, y_blob_test)