<a href="https://colab.research.google.com/github/tendai-codes/DeepLearning/blob/main/Multi_class_Classification_workflow_Template.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Make Classsification data

In [None]:
import sklearn

In [None]:
from sklearn.datasets import fetch_rcv1
rcv1 = fetch_rcv1()

rcv1

In [None]:
import pandas as pd
data = pd.DataFrame(rcv1.data)
data.head()

### Import Dependencies


In [None]:
# Import Libraries
import torch
import matplotlib.pyplot as plt
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split

# Set Hyperparameters for data
NUM_CLASSES = 5
NUM_FEATURES = 30
LEARNING_RATE = 0.01
NUM_EPOCHS = 500
RANDOM_SEED = 42


### Turn data into tensors

In [None]:
# Create multi-class data
X , y = "dataset"(n_samples=1000,
                            n_features=NUM_FEATURES,
                            centres=NUM_CLASSES,
                            cluster_std=1.5,
                            random_state=RANDOM_SEED)

# Convert data to tensors
X = torch.from_numpy(X).type(torch.float)
y = torch.from_numpy(y).type(torch.LongTensor)

X[:5], y[:5]

##Check input and output shapes

In [None]:
X.shape , y.shape # This will help you figure out input and output features for buildng model

## Split Training an test data

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_SEED)
#0.2 = 20% of data will be test, 80% will be train

In [None]:
len(X_train), len(X_test), len(y_train), len(y_test)

### Create Device agnostic code

In [None]:
# Setup device Agnostic Code
from torch import nn

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using Device: {device}")

Using Device: cpu


### Build Model

In [None]:
# Create linear regression model class > THIS METHOD FOR LESS COMPLEX LAYERS
class ClassificationModel(nn.Module):
  def __init__(self, input_features, output_features, hidden_units=8):
    """ Initializes multi-calss classification model.

    Args:
    input_features (int): Number of input features to the model.
    output_features (int): Number of output features of the model.
    hidden_units (int): Number of hidden units between layers, default 8.

    Returns:

    Example"
    """
    super().__init__()

    #Use nn.linear() for creating the model parameters > this is a method to automate creation of features
    self.linear_layers = nn.Sequential(
        nn.Linear(in_features= input_features,
                  out_features= hidden_units),
        nn.ReLU(),
        nn.Linear(in_features=hidden_units,
                  out_features=hidden_units),
        nn.ReLU(),
        nn.Linear(in_features=hidden_units,
                  out_features=output_features)
    )

# Forward method to define the computation in the model
  def forward(self, x):
    return self.linear_layers(x)

# Set manual seed
torch.manual_seed(42)
torch.cuda.manual_seed(42) # if using GPU
model_0 = ClassificationModel(input_features=2,
                              output_features=4
                              hidden_units=8).to(device)
model_0

### Check model device vs target device




In [None]:
# Check model device
next(model_0.parameters()).device

device(type='cpu')

In [None]:
# Set model to target device
model_0.to(device)
next(model_0.parameters()).device

device(type='cpu')

### Train Model

#### Set up Loss Function & Optimiser

In [None]:
#Loss function

loss_fn = nn.CrossEntropyLoss() #has sigmoid activation function built in

#Optimiser (stochastic gradient descent- SGD)
optimizer = torch.optim.SGD(params=model_0.parameters(),
                            lr=LEARNING_RATE) #lr = learning rate (hyperparameter) > the smaller the learning rate the smaller the change in the parameter

#### Evaluate Model accuracy

In [None]:
def accuracy_fn(y_true, y_pred):
  correct = torch.eq(y_true, y_pred).sum().item()
  acc = (correct/len(y_pred))*100
  return acc

In [None]:
# View outputs of forward pass
model_0.eval()
with torch.inference_mode():
  y_logits = model_0(X_test.to(device))

#Convert logits > predicted probabilities > predicted labels
  y_pred_probs = torch.softmax(y_logits, dim=1)

In [None]:
y_pred = torch.argmax(y_pred_probs, dim=1)
y_pred

### Build a training loop and a testing loop


In [None]:
torch.manual_seed(42)
torch.cuda.manual_seed(42) # if using GPU

# Set number of epochs

epochs = NUM_EPOCHS

# Make sure data is on correct target device
X_train = X_train.to(device)
y_train = y_train.to(device)
X_test = X_test.to(device)
y_test = y_test.to(device)

# Track different values - use these to track model performance & compare with other models
epoch_count = []
loss_values = []
test_loss_values = []

# Loop through data

### Training
# 0. Loop through the data
for epoch in range(epochs):

  #set model to training mode
  model_0.train()

  # 1. Forward pass
  y_logits = model_0(X_train).squeeze() #check squeeze
  y_pred = torch.softmax(y_logits, dim=1).argmax(dim=1)

  # 2. Calculate the loss/accuracy
  #loss = loss_fn(torch.sigmoid(y_logits),
  #              y_train) # for nn.BCELoss expects prediction probabilities as input

  loss = loss_fn(y_logits,
                 y_train)
  acc = accuracy_fn(y_true=y_train,
                    y_pred=y_pred)

  # 3. Optimiser zero grad
  optimizer.zero_grad()

  # 4. Loss backward (Backpropagation)
  loss.backward()

  # 5. Optimiser step (Gradient descent)
  optimizer.step()


### Testing

  model_0.eval()
  with torch.inference_mode(): # turns off gradient tracking

    # 1. Forward pass
    test_logits = model_0(X_test).squeeze() #check squeeze
    test_pred = torch.softmax(test_logits, dim=1).argmax(dim=1)

    # 2. Calculate the loss
    test_loss = loss_fn(test_logits
                        , y_test)
    test_acc = accuracy_fn(y_true=y_test,
                           y_pred=test_pred)

  # Print out what's happening
  if epoch % 50  == 0:

    print(f"Epoch: {epoch} | Loss:{loss:.4f} | Acc: {acc:.2f}% | Test loss:{test_loss:.4f} | Test Acc: {acc:.2f}%")


## Improve a model (from a model's perspective)

* Add more layers - gives the model more chances to learn about patterns in the data
* Add more hidden units -
* Fit for longer - increase epochs
* Change the activation function
* Change the learning rate
* Change the loss function


### Evaluate Model

In [None]:
model_0.state_dict()

In [None]:
# Turn model into Evaluation mode
model_0.eval()

# Make Predictions on test data
with torch.inference_mode():
  y_logits = model_0(X_test).squeeze() #check squeeze
  y_pred_probs = torch.softmax(y_logits, dim=1)

# Go from prdected probabilities to predcted labels
  y_pred = torch.argmax(y_pred_probs, dim=1)

y_pred[:10], y_test[:10]

### Classification Metrics

* Accuracy
* Precision
* F1 score
* Confusion Matrix
* Classification report

In [None]:
!pip install torchmetrics

Collecting torchmetrics
  Downloading torchmetrics-1.7.1-py3-none-any.whl.metadata (21 kB)
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.14.3-py3-none-any.whl.metadata (5.6 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->torchmetrics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->torchmetrics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->torchmetrics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.0.0->torchmetrics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=2.0.0->torchmetrics)
  D

In [None]:
from torchmetrics import Accuracy

# Set up Metric
torchmetric_accuracy = Accuracy().to(device)

# Calculate accuracy
torchmetric_accuracy(y_pred, y_test)