In [None]:
#keep alive (right click 'inspect', then 'console', clear console and paste the below code)
function ConnectButton(){
    console.log("Connect pushed"); 
    document.querySelector("#top-toolbar > colab-connect-button").shadowRoot.querySelector("#connect").click() 
}

var colab = setInterval(ConnectButton,600000);   #to connect for 60 seconds

#clearInterval(connect)     #to clear the keep alive interval

### Mounting Drive and Installing Libraries

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive') 

#install libraries 
!pip install -q -r '/content/drive/MyDrive/Colab Notebooks/requirements.txt' 


### Import Libraries 

In [None]:
import random 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
from sklearn.datasets import load_iris 
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import KFold, train_test_split 

#deep learning libraries 
import torch.nn.init as init 
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR 
from torch.utils.data import DataLoader, Dataset, TensorDataset 
import torchmetrics 
from torchsummary import summary 

### Get the Data Ready

In [None]:
from PIL import Image 
import torchvision 
from torch.utils.data import DataLoader, Dataset, TensorDataset, random_split
from torchvision import datasets 
from torchvision.datasets import DatasetFolder
import torchvision.transforms as transforms 

>> Load the Data (generic)

In [None]:
from torch.utils.data import TensorDataset

data_mnist = np.loadtxt(open(r"/content/sample_data/mnist_train_small.csv" , 'rb'), delimiter = ',' )
print("shape: ", data_mnist.shape)

scaler = MinMaxScaler()
inputs = data_mnist[:,1:]
inputs = scaler.fit_transform(inputs)
labels = data_mnist[:,0] 

# train, test = torch.utils.data.random_split(train_dataset, [800, 200]) #train/test split 
X_train, X_test, y_train, y_test = train_test_split(inputs, labels, random_state=23, train_size=0.9)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

# inputs 
X_train = torch.tensor(np.array(X_train), dtype=torch.float32)
X_test = torch.tensor(np.array(X_test), dtype=torch.float32)
y_train = torch.tensor(np.array(y_train), dtype=torch.long)
y_test = torch.tensor(np.array(y_test), dtype=torch.long)

#dataset 
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)



>> Load the Data (torchvision)

In [None]:
# Define the transformation to apply to the images
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to a fixed size
    transforms.ToTensor(),          # Convert images to tensors
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize image tensors
]) 

#1. Using ImageFolder
# Load the dataset from the image folders 
dataset = torchvision.datasets.ImageFolder(root='Dataset/', transform=transform)
class_labels = dataset.classes # Get the class labels
print(class_labels) # Print the class labels


#2. Using a DatasetFolder
# Create an instance of the DatasetFolder
dataset = torchvision.datasets.DatasetFolder(
                            root='Dataset/',
                            loader=torchvision.datasets.folder.default_loader,  # Use the default image loader
                            extensions=".jpg",  # Specify the file extensions of the images
                            transform=transform  # Apply the defined transformation pipeline
                            )


#3. Using Custom Dataset
class CustomImageDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        """
        Arguments:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.data = pd.read_csv(csv_file)  # Read the CSV file
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = self.data.iloc[idx, 0]  # Get the image file name from the CSV
        img_path = os.path.join(self.root_dir, img_name)
        image = Image.open(img_path).convert('RGB')  # Open and convert the image to RGB

        if self.transform:
            image = self.transform(image)  # Apply transformations if provided

        label = self.data.iloc[idx, 1]  # Get the corresponding label from the CSV

        return image, label

face_dataset = CustomImageDataset(csv_file='data/faces/face_landmarks.csv',
                                    root_dir='data/faces/')

>> Data Split

In [None]:
#When using ImageFolder, DatasetFolder, or a custom dataset to load your data, you can split it into training and 
# testing sets using the random_split function from PyTorch


# Split the dataset into training and testing sets
train_size = int(0.8 * len(dataset))  # 80% for training, adjust as desired
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

>> Data Augmentation (only to train dataset)

In [None]:
# Noise Data Augmentation 

import torchvision.transforms.functional as TF

def add_guassian_noise_to_dataset(dataset, mean=0, std=0.1):
    """
    Adds random Gaussian noise to an image dataset.

    Args:
        dataset (torch.utils.data.Dataset): The image dataset.
        mean (float): Mean of the Gaussian noise (default: 0).
        std (float): Standard deviation of the Gaussian noise (default: 0.1).

    Returns:
        torch.utils.data.Dataset: The noisy image dataset.
    """
    noisy_dataset = []
    
    for image, label in dataset:
        # Convert the image to a tensor
        tensor = TF.to_tensor(image)

        # Generate random noise with the same shape as the image tensor
        noise = torch.randn_like(tensor) * std + mean

        # Add the noise to the image tensor
        noisy_tensor = tensor + noise

        # Convert the noisy tensor back to an image
        noisy_image = TF.to_pil_image(noisy_tensor)
    
    return noisy_image 




In [None]:
#To Augment samples of the loaded dataset and then concatenate it with the original dataset (remember to only use 
# transforms.ToTensor() when loading the original datasets)

from torch.utils.data import Dataset, ConcatDataset

class AugmentedDataset(Dataset):
    def __init__(self, original_dataset, augmentation_transforms, augmentation_ratio ):
        self.original_dataset = original_dataset
        self.augmentation_transforms = augmentation_transforms
        self.augmentation_ratio  = augmentation_ratio 

    def __getitem__(self, index):
        # Retrieve an item from the original dataset
        original_image, original_label = self.original_dataset[index]

        augmented_images = []
        augmented_labels = []

        # Randomly select samples for augmentation
        num_augmented_samples = int(len(self.original_dataset) * self.augmentation_ratio)
        selected_indices = random.sample(range(len(self.original_dataset)), num_augmented_samples)

        # Generate augmented samples for selected indices
        for idx in selected_indices:
            original_image, _ = self.original_dataset[idx]
            augmented_image = self.augmentation_transforms(original_image)
            augmented_images.append(augmented_image)
            augmented_labels.append(original_label)

        # Concatenate the original sample with the augmented samples
        augmented_images.append(original_image)
        augmented_labels.append(original_label)

        # Return concatenated tensors
        return torch.stack(augmented_images), torch.tensor(augmented_labels)

    def __len__(self):
        # Calculate the length of the augmented dataset
        return len(self.original_dataset) * (self.augmentation_ratio + 1) 

# Define the transformations for augmentation
augmentation_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(degrees=10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.Lambda(lambda x: add_guassian_noise_to_dataset(x, mean=0, std=0.1)),  #noise data augmentation (uniform)
    transforms.ToTensor()
])


In [None]:
# Augment the train dataset

# Set the augmentation ratio (e.g., 50%)
augmentation_ratio = 0.5

# Create the augmented dataset by concatenating augmented samples with the original dataset
augmented_dataset = AugmentedDataset(train_dataset, augmentation_transforms, augmentation_ratio)

# Now, you can use the combined_dataset for training or further processing 

>> Data Balancing

In [None]:
import torch
from torch.utils.data import DataLoader, WeightedRandomSampler

# Assuming you already have the augmented_dataset

# Calculate the class frequencies in the augmented dataset
class_counts = [0] * len(augmented_dataset.classes)
for _, label in augmented_dataset:
    class_counts[label] += 1

# Calculate the weights for each sample based on class frequencies
weights = [1.0 / class_counts[label] for _, label in augmented_dataset]

# Create a sampler with weighted sampling
sampler = WeightedRandomSampler(weights, len(augmented_dataset), replacement=True)


>> Data Loader

In [None]:
#dataloader
batch_size = 32
shuffle = True 
drop_last = True

train_dataloader = DataLoader(augmented_dataset, batch_size=batch_size, shuffle=shuffle, 
                                drop_last=drop_last, sampler=sampler)
test_dataloader = DataLoader(test_dataset, shuffle = False, batch_size=test_dataset.tensors[0].shape[0]) 

### Build the Model

>> Build the Model from Scratch

In [None]:
# define the model 

#1

class iris_model(nn.Module):
    """Some Information about iris_model"""
    def __init__(self, weight_init='default'):
        super(iris_model, self).__init__()
        self.fc1 = nn.Linear(4, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 3)
        
        if weight_init == 'default':
            pass  # Default weight initialization

        elif weight_init == 'xavier_uniform':
            self._init_weights_xavier_uniform()

        elif weight_init == 'kaiming_normal':
            self._init_weights_kaiming_normal()
        
    def forward(self, x):
        out = F.relu(self.fc1(x))
        # out = self.fca(out) 
        out = F.relu(self.fc2(out))
        # out = self.fcb(out)
        out = self.fc3(out)
        return out

    def _init_weights_xavier_uniform(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    init.constant_(m.bias, 0)

    def _init_weights_kaiming_normal(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
                if m.bias is not None:
                    init.constant_(m.bias, 0)

model = iris_model(weight_init='default')    #initializing the model  #model = iris_model(weight_init='xavier_uniform') 


#2

# def create_model(nUnits, nLayers, weight_init):
#     class iris_model(nn.Module):
#         def __init__(self):
#             super().__init__()

#             # Create dictionary to store the layers
#             self.layers = nn.ModuleDict()
#             self.nLayers = nLayers 
#             self.weight_init = weight_init

#             ### Input layer
#             self.layers['input'] = nn.Linear(4, nUnits)
#             self.layers['input_bn'] = nn.BatchNorm1d(nUnits)
#             self.layers['input_dropout'] = nn.Dropout(0.2)
            
#             ### Hidden layers
#             for i in range(nLayers):
#                 self.layers[f'hidden{i}'] = nn.Linear(nUnits, nUnits)
#                 self.layers[f'hidden{i}_bn'] = nn.BatchNorm1d(nUnits)
#                 self.layers[f'hidden{i}_dropout'] = nn.Dropout(0.2)

#             ### Output layer
#             self.layers['output'] = nn.Linear(nUnits, 3)
        
#             # Initialize weights
#             self._initialize_weights()
            
        
#         # Forward pass
#         def forward(self, x):
#             # Input layer
#             x = self.layers['input'](x)
#             x = self.layers['input_bn'](x)
#             x = F.relu(x)
#             x = self.layers['input_dropout'](x)

#             # Hidden layers
#             for i in range(self.nLayers):
#                 x = self.layers[f'hidden{i}'](x)
#                 x = self.layers[f'hidden{i}_bn'](x)
#                 x = F.relu(x)
#                 x = self.layers[f'hidden{i}_dropout'](x)
                
#             # Output layer
#             x = self.layers['output'](x)    #or x = F.sigmoid(self.layers['output](x)) for Binary classification 
            
#             return x 
        
#         def _initialize_weights(self):
#             for name, module in self.layers.items():
#                 if isinstance(module, nn.Linear):
#                     weight_init = self.weight_init.get(name, 'default') #works well with sigmoid (uniform distribution)
#                     if weight_init == 'xavier_uniform':     #works well with sigmoid
#                         init.xavier_uniform_(module.weight)
#                     elif weight_init == 'kaiming_normal':   #works well with ReLU activation 
#                         init.kaiming_normal_(module.weight)
#                     if module.bias is not None:
#                         init.constant_(module.bias, 0)
    
#     return iris_model()

# nUnits = 64
# nLayers = 5
# weight_init = {
#     'input': 'default',
#     'hidden0': 'kaiming_normal',
#     'hidden1': 'kaiming_normal',
#     'hidden2': 'kaiming_normal',
#     # 'hidden3': 'kaiming_normal',
#     'output': 'default'
# }

# model = create_model(nUnits, nLayers, weight_init)    #initializing the model


>> Use a Pretrained Model

### Training the Model

In [None]:
#model training 
from torch.optim.lr_scheduler import StepLR

## metric = torchmetrics.Accuracy(task='multiclass', num_classes=num_classes)    (.Precision(), .Recall(), .F1Score(), .ConfusionMatrix())
                #see doc. https://torchmetrics.readthedocs.io/en/stable/classification/accuracy.html#functional-interface 
           
num_epochs = 50
learning_rate = 0.0032734813343726263
losses = torch.zeros(num_epochs)
ongoing_accuracy = []
ongoing_accuracy_test = []
num_classes = 10

# Define the loss function and optimizer
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.0008018107002058151)

# Define the learning rate scheduler
scheduler = StepLR(optimizer, step_size=10, gamma=0.1)

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Move the model and data to the appropriate device (e.g., GPU if available)
model.to(device)

# Variables to track the best model and accuracy
best_accuracy = 0.0
best_model_state = None

# Training loop
for epoch in range(num_epochs):
    model.train()
    batchAcc = []
    batchLoss = []

    # Iterate over the training dataloader
    for inputs, labels in train_dataloader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        batchLoss.append(loss.item())

        # Compute accuracy on the training set
        predictions = torch.argmax(outputs, axis=1)
        accuracy = torchmetrics.functional.classification.accuracy(predictions, labels, task='multiclass',
                                                                    num_classes=num_classes) * 100
        # accuracy = torchmetrics.functional.classification.accuracy(predictions, labels, task='multiclass', num_classes=num_classes) 
        #                                                     (or metric(predictions, labels))
        # accuracy = torchmetrics.functional.classification.binary_accuracy (predicted, labels, threshold = 0.5)   #for binary classification
        # r2score = torchmetrics.functional.r2_score(preds, target) 
        batchAcc.append(accuracy.item())

    # Update the learning rate
    scheduler.step()

    ongoing_accuracy.append(np.mean(batchAcc.to(device)))
    losses[epoch] = np.mean(batchLoss)

    # Print loss and accuracy for the epoch
    if epoch % 10 == 0:
      print(f"Epoch {epoch}/{num_epochs}: Loss = {np.mean(batchLoss):.4f}, Accuracy = {np.mean(batchAcc):.2f}%")

    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():
        batchAcc_test = []
        for data in test_dataloader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            # Forward pass
            outputs = model(inputs)

            # Calculate predictions
            _, predicted = torch.max(outputs.data, 1)

            accuracy = torchmetrics.functional.classification.accuracy(predicted, labels,
                                                                    task='multiclass', 
                                                                       num_classes=num_classes) * 100
            batchAcc_test.append(accuracy.cpu())
            
    test_accuracy = np.mean(batchAcc_test)
    ongoing_accuracy_test.append(test_accuracy)

    if test_accuracy > best_accuracy:
        best_accuracy = test_accuracy
        best_model_state = model.state_dict().copy()

    if epoch % 10 == 0:
        print(f"Accuracy on test set: {test_accuracy:.2f}%")

print('Finished Training')
print(' ')

# Load the best model state
model.load_state_dict(best_model_state)

# Report accuracy
print('Final accuracy (eval): {:.2f}%'.format(ongoing_accuracy_test[-1]))
print('Best accuracy (eval): {:.2f}%'.format(best_accuracy)) 

fig, ax = plt.subplots(1, 2, figsize=(13, 4))

ax[0].plot(losses.detach())
ax[0].set_ylabel('Loss')
ax[0].set_xlabel('Epoch')
ax[0].set_title('Losses')

ax[1].plot(ongoing_accuracy, label='Training Accuracy')
ax[1].plot(ongoing_accuracy_test, label='Evaluation Accuracy')
ax[1].set_ylabel('Accuracy')
ax[1].set_xlabel('Epoch')
ax[1].set_title('Accuracy')
ax[1].legend()

plt.show()

# run training again to see whether this performance is consistent 

### Model Evaluation

>> Confusion Matrix, Precision, Recall, Accuracy, F1-Score, AUC ROC Curve

In [None]:
from sklearn.metrics import (roc_auc_score,roc_curve,precision_recall_curve, auc,
                             classification_report, confusion_matrix, average_precision_score,
                             accuracy_score,silhouette_score,mean_squared_error)
from inspect import signature


#confusion matrix
# accuracy = accuracy_score(y_test, y_pred) 
class_names = digits.target_names

def plot_confusion_matrix(y_true, y_pred, classes,
                        normalize=False,
                        title='Confusion matrix',
                        cmap=plt.cm.Blues):
    """
    This function plots a confusion matrix.
    """
    cm = confusion_matrix(y_true, y_pred)
    
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    
    for i, j in np.ndindex(cm.shape):
        plt.text(j, i, format(cm[i, j], fmt),
                horizontalalignment="center",
                color="white" if cm[i, j] > thresh else "black")
    
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()

plot_confusion_matrix(y_test, y_pred, classes=class_names,
                    title='Confusion matrix, Accuracy = {:.2f}'.format(accuracy))

In [None]:
#AUC ROC curve (use this for binary classification)

def plot_roc(y_actual, y_pred):
    """
    Function to plot AUC-ROC curve
    """
    fpr, tpr, thresholds = roc_curve(y_actual, y_pred)
    plt.plot(
        fpr,
        tpr,
        color="b",
        label=r"Model (AUC = %0.2f)" % (roc_auc_score(y_actual, y_pred)),
        lw=2,
        alpha=0.8,
    )
    plt.plot(
        [0, 1],
        [0, 1],
        linestyle="--",
        lw=2,
        color="r",
        label="Luck (AUC = 0.5)",
        alpha=0.8,
    )
    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title("Receiver operating characteristic example")
    plt.legend(loc="lower right")
    plt.show()

    #To choose the threshold value that maximizes the Youden's J statistic
    # calculate Youden's J statistic for each threshold value
    J = tpr - fpr
    best_threshold = thresholds[np.argmax(J)]
    print('Best threshold:', best_threshold)
    
plot_roc(y_actual, y_pred) 




#OR
from yellowbrick.classifier import ROCAUC #yellow brick can be used for multiclass classification

visualizer = ROCAUC(model, classes=["win", "loss", "draw"])
visualizer.fit(X_train, y_train)        # Fit the training data to the visualizer
visualizer.score(X_test, y_test)        # Evaluate the model on the test data
visualizer.show()                       # Finalize and render the figure

# roc_auc(model, X_train, y_train, X_test=X_test, y_test=y_test, classes=['not_defaulted', 'defaulted']) #quick_method

In [None]:
#Classification Report (Precision, Recall, F1-Score)
from sklearn.metrics import classification_report
from yellowbrick.classifier import classification_report 


print(classification_report(y_test, y_pred))

# precision measures how many of the positive predictions made by the model are actually correct. 
# A high precision score indicates that the model is making very few false positive predictions.

# recall measures how many of the actual positive instances in the dataset are correctly predicted as positive 
# by the model. A high recall score indicates that the model is correctly identifying a large proportion of the 
# positive instances in the dataset.

#F1- Score is used to compare precision/recall numbers


#OR
# Instantiate the visualizer
visualizer = classification_report(
    model, X_train, y_train, X_test, y_test, classes=classes, support=True
)


In [None]:
#Precision - Recall

def plot_precisionrecall(y_actual, y_pred):
    """
    Function to plot AUC-ROC curve
    """
    average_precision = average_precision_score(y_actual, y_pred)
    precision, recall, _ = precision_recall_curve(y_actual, y_pred)
    # In matplotlib < 1.5, plt.fill_between does not have a 'step' argument
    step_kwargs = (
        {"step": "post"} if "step" in signature(plt.fill_between).parameters else {}
    )

    plt.figure(figsize=(9, 6))
    plt.step(recall, precision, color="b", alpha=0.2, where="post")
    plt.fill_between(recall, precision, alpha=0.2, color="b", **step_kwargs)

    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title("Precision-Recall curve: AP={0:0.2f}".format(average_precision))

plot_precisionrecall(y_actual, y_pred)

#from the plot, we can pick a trade-off threshold where both precision and recall are high



#OR
from yellowbrick.classifier import PrecisionRecallCurve
# Create the visualizer, fit, score, and show it
viz = PrecisionRecallCurve(model, per_class=True,
                            cmap="Set1", iso_f1_curves=True, 
                            micro=False)
viz.fit(X_train, y_train)
viz.score(X_test, y_test)
viz.show()

>> Analyze Error Distribution

In [None]:
# if the errors are normally distributed around zero, it may indicate that the model is making unbiased predictions. 
# If there is a pattern or trend in the errors, it may suggest that the model has systematic biases or is making 
# consistent errors in certain regions of the input space



def analyze_error_distribution(y_true, y_pred):
    """
    Function to analyze the error distribution by plotting histograms and scatter plots.

    Parameters:
    -----------
    y_true : array-like
        Array of true labels or ground truth.
    y_pred : array-like
        Array of predicted values.

    Returns:
    --------
    None
    """
    # Calculate errors
    errors = y_true - y_pred

    # Plot histogram of errors
    plt.figure(figsize=(8, 6))
    plt.hist(errors, bins=20, alpha=0.75)
    plt.xlabel('Error')
    plt.ylabel('Frequency')
    plt.title('Error Distribution (Histogram)')
    plt.grid(True)
    plt.show()

    # Plot scatter plot of true labels vs. errors
    plt.figure(figsize=(8, 6))
    plt.scatter(y_true, errors, alpha=0.75)
    plt.xlabel('True Labels')
    plt.ylabel('Error')
    plt.title('Error Distribution (Scatter Plot)')
    plt.grid(True)
    plt.show()

    # Plot scatter plot of predicted values vs. errors
    plt.figure(figsize=(8, 6))
    plt.scatter(y_pred, errors, alpha=0.75)
    plt.xlabel('Predicted Values')
    plt.ylabel('Error')
    plt.title('Error Distribution (Scatter Plot)')
    plt.grid(True)
    plt.show()


analyze_error_distribution(y_val, y_pred)

>> Error Analysis - Diagnostics

In [None]:
#Error analysis is the process of analyzing the errors made by a machine learning model and identifying the patterns 
# or trends that may be causing the errors. The goal of error analysis is to gain insight into the behavior of the 
# model and identify areas for improvement.

# The steps involved in error analysis:
    # Collect error data
    # Categorize errors
    # Identify patterns
    # Analyze causes
    # Prioritize fixes
    
# Based on the insights gained from the error analysis, you can perform the following.
# False negatives:
# False negatives occur when the model predicts that a customer will not churn when they actually do churn. 
# To fix this issue, you may consider the following:
#     Increase the weight of the features that are more indicative of churn for low-usage customers, 
#         such as frequency of usage or specific product usage. (adjust the model parameters)
#     Add new features that may be predictive of churn, such as customer sentiment or customer service interactions.
#     Use a different model architecture that is better suited for handling imbalanced data, such as a decision tree 
#         or ensemble model.
# False positives:
# False positives occur when the model predicts that a customer will churn when they actually do not churn. 
# To fix this issue, you may consider the following:
#     Decrease the weight of features that are causing false positives, such as age or income, if they are not as 
#         indicative of churn for low-usage customers. (adjust the model parameters)
#     Remove features that are causing false positives altogether, if they are not providing significant value to the 
#         model.
#     Increase the size of the training dataset to capture a more representative sample of customers who do not churn, 
#         which may help the model learn more accurately which customers are likely to churn.


In [None]:
#Plot confusion matrix to visualize false positives and false negatives
    #By default, scikit-learn will assume that the "positive" class is the last label (or highest label value) 
    # in the list of labels. [0, 1] where 1 is Positive and is the class_of_interest.


class_names = [0, 1] #or iris().target_names #this is an example and should be edited. [0, 1] for binary classification
class_of_interest = 1 #this selects a specific class of interest other than 1 or the highest value. 
                        #always select the highest one because that is what Scikit learn uses. 

def false_positives(X_test, y_true, y_pred, classes):
    """ 
    This function identifies and plots the false positives in a classification problem. 
    """ 
    fp_indices = np.where((y_true != class_of_interest) & (y_pred == class_of_interest))[0] 
    fp_features = X_test[fp_indices] # assuming X_test is a numpy array of input data 
    # fp_features = X_test.iloc[fp_indices]
    fp_labels = y_pred[fp_indices] # assuming y_pred is a numpy array of predicted labels 
    # fp_labels = pd.Series(y_pred).iloc[fp_indices]

    print("False positives: ", len(fp_indices))
    return fp_features, fp_labels


#false negatives 
def false_negatives(X_test, y_true, y_pred, classes):
    """ 
    This function identifies and plots the false negatives in a classification problem. 
    """ 
    fn_indices = np.where((y_true == class_of_interest) & (y_pred != class_of_interest))[0] 
    fn_features = X_test[fn_indices] # assuming X_test is a numpy array of input data
    # fn_features = X_test.iloc[fn_indices] 
    fn_labels = y_pred[fn_indices] # assuming y_pred is a numpy array of predicted labels 
    # fn_labels = pd.Series(y_pred).iloc[fn_indices]

    print("False negatives: ", len(fn_indices))
    return fn_features, fn_labels


# Plot the confusion matrix to evaluate the performance of the model
plot_confusion_matrix(y_test, y_pred, classes=classes,
                    title='Confusion matrix, Accuracy = {:.2f}'.format(accuracy))

# Identify and plot the false positives
X_fp, y_fp = false_positives(X_test, y_test, y_pred, class_names)

# Identify and plot the false negatives
X_fn, y_fn = false_negatives(X_test, y_test, y_pred, class_names)

### Model Optimization

>> Auto Tune using Optuna

In [None]:
import optuna
from sklearn.metrics import accuracy_score


# Define your objective function
def objective(trial):
    # Define your hyperparameters to be tuned
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1) 
    nUnits = trial.suggest_categorical('nUnits', 4, 128, step=8) 
    nLayers = trial.suggest_int('nLayers', 1, 6, step = 1) 
    weight_decay = trial.suggest_loguniform('weight_decay', 1e-6, 1e-3)
    dropout_rate = trial.suggest_uniform('dropout_rate', 0.0, 0.5)    
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64])    
    # weight_inits = trial.suggest_categorical('weight_init', ['default', 'kaiming_normal', 'xavier_uniform_']) 
    # num_epochs = trial.suggest_int('num_epochs', 30, 300)  
    # optimizer = trial.suggest_categorical('optimizer', ['adam', 'sgd'])
    # activation = trial.suggest_categorical('activation', ['relu', 'sigmoid', 'tanh'])
    # patience = trial.suggest_int('patience', 5, 20)

    # Define your model architecture with the hyperparameters
    model = mnist_model(nUnits, nLayers, weight_inits, dropout_rate) 

    num_epochs = 50
    learning_rate = learning_rate
    losses = torch.zeros(num_epochs)
    ongoing_accuracy = []
    num_classes = 10

    # Define the loss function and optimizer
    criterion = nn.NLLLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay = weight_decay) 

    # Define the learning rate scheduler
    scheduler = StepLR(optimizer, step_size=30, gamma=0.1)

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    # Move the model and data to the appropriate device (e.g., GPU if available)
    model.to(device)

    # Loop over the dataset for multiple epochs
    for epoch in range(num_epochs):
        model.train()
        batchAcc  = []
        batchLoss = []

        # Iterate over the training dataloader
        for inputs, labels in train_dataloader:
            inputs, labels = inputs.to(device), labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

            batchLoss.append(loss.item())

            # Compute accuracy on the training set
            predictions = torch.argmax(outputs, axis=1)
            accuracy = torchmetrics.functional.classification.accuracy(predictions, labels, task='multiclass',
                                                                        num_classes=num_classes) * 100
            batchAcc.append(accuracy.item())

        # Update the learning rate
        scheduler.step()

        ongoing_accuracy.append(np.mean(batchAcc))
        losses[epoch] = np.mean(batchLoss)


        #evaluation
    model.eval()  # Set the model to evaluation mode

    # Disable gradient computation for evaluation
    with torch.inference_mode():        #or torch.no_grad()
        for data in test_dataloader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            # Forward pass
            outputs = model(inputs)

            # Calculate predictions
            _, predicted = torch.max(outputs.data, 1)

    # Calculate accuracy
    # accuracy = 100 * (total_correct / total_samples)
    accuracy = torchmetrics.functional.classification.accuracy(predicted, labels, task='multiclass', num_classes=num_classes) * 100

    return accuracy 

# Define the study
study = optuna.create_study(direction='maximize')

# Run the optimization
study.optimize(objective, n_trials=100)


In [None]:
# Get the best hyperparameters
print(" Value: ", study.best_trial.value)
print(" Params: ")
for key, value in study.best_trial.params.items():
    print(f"    {key}: {value}")

### Save the Model

In [None]:
import torch

# Save the model checkpoint
checkpoint = {
    'epoch': 300,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'loss': loss,
    'learning_rate': optimizer.param_groups[0]['lr'],
    'hyperparameters': {
                'hidden_units': 64,
                'batch_size': 32
                        },
    # 'other_info': 'Additional information about the checkpoint'
}

torch.save(checkpoint, 'model_checkpoint.pth')
