### Model Definition

In [None]:
# Define your neural network architecture by creating a custom model class that inherits from torch.nn.Module.

# In the __init__ method, define the layers of your model using PyTorch's nn module. 
# This includes defining linear layers, activation functions, pooling layers, etc.

# Implement the forward method to define the forward pass of your model. This method describes how the input flows 
# through the layers to produce an output.

>>Sequential

In [1]:
#using Sequential
#Sequential API allows you to create a model by stacking layers on top of each other in a sequential manner

import torch
import torch.nn as nn

#A simple feed-forward neural network with two hidden layers

# Define the model architecture 
model = nn.Sequential(
    nn.Linear(in_features=784, out_features=64),  # First hidden layer
    nn.ReLU(),                                    # Activation function
    nn.Linear(in_features=64, out_features=32),   # Second hidden layer
    nn.ReLU(),                                    # Activation function
    nn.Linear(in_features=32, out_features=10)    # Output layer
)


>> Custom function (Module)

In [None]:
class paul_model(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(paul_model, self).__init__()
        self.fct1 = nn.Linear(input_dim, hidden_dim)
        self.relu1 = nn.ReLU()
        self.fct2 = nn.Linear(hidden_dim, hidden_dim)
        self.soft1 = nn.Softmax(dim=1)
        self.fct3 = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        x = self.fct1(x)
        x = self.relu1(x)
        x = self.fct2(x)
        x = self.soft1(x)
        x = self.fct3(x)
        return x

# Create an instance of the custom model
input_dim = ...  # specify the input dimension
hidden_dim = ...  # specify the hidden dimension
output_dim = ...  # specify the output dimension
model = paul_model(input_dim, hidden_dim, output_dim)

# get output
y_pred = model(X_train)


In [None]:
#a custom function to define the model

import torch
import torch.nn as nn

# Define a custom model class
# Define your neural network architecture by creating a custom model class that inherits from torch.nn.Module.
class MyModel(nn.Module):
    
    # In the __init__ method, define the layers of your model using PyTorch's nn module. 
    # This includes defining linear layers, activation functions, pooling layers, etc.
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(MyModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)  #first hidden layer
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)   #second hidden layer
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(hidden_dim, output_dim)   #output layer

# Implement the forward method to define the forward pass of your model. This method describes how the input flows 
# through the layers to produce an output.
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        return x

# Create an instance of the custom model
input_dim = ...  # specify the input dimension
hidden_dim = ...  # specify the hidden dimension
output_dim = ...  # specify the output dimension
model = MyModel(input_dim, hidden_dim, output_dim)

# get output
y_pred = model(X_train)


#Input features represents the number of features or variables in your input data
#hidden features/dimensions represents the number of neurons in the hidden layers of your neural network
#output dimension represents the number of neurons in the output layer of your neural network. 
    # If you have a multi-class classification problem with 10 classes, the output dimension would be 10
    # In a regression dataset, the output dimension would typically be 1. This is because regression tasks 
        # involve predicting a continuous numerical value as the output

In [None]:
#asides the Linear and ReLu, we have other subclasses of nn.Module that can be used to define different layers 
# and operations in your neural network

# Convolutional Layers:
nn.Conv1d: #1D convolutional layer for processing sequential data.
nn.Conv2d: #2D convolutional layer for processing images or spatial data.
nn.Conv3d: #3D convolutional layer for processing volumetric data.
    
# Pooling Layers:
nn.MaxPool1d: #1D max pooling layer 
nn.MaxPool2d: #2D max pooling layer
nn.MaxPool3d: #3D max pooling layer
nn.AvgPool1d: #1D average pooling layer
nn.AvgPool2d: #2D average pooling layer
nn.AvgPool3d: #3D average pooling layer.
    
# Recurrent Layers:
nn.RNN: #Basic RNN layer.
nn.LSTM: #LSTM layer.
nn.GRU: #GRU layer.

# Normalization Layers:
nn.BatchNorm1d: #Batch normalization layer for 1D inputs.
nn.BatchNorm2d: #Batch normalization layer for 2D inputs.
nn.BatchNorm3d: #Batch normalization layer for 3D inputs.

# Dropout and Regularization:
nn.Dropout: #Dropout layer for regularization.
nn.Dropout2d: #2D dropout layer.
nn.Dropout3d: #3D dropout layer.

# Activation Functions:
nn.Sigmoid: #Sigmoid activation function.
nn.Tanh: #Hyperbolic tangent activation function.
nn.Softmax: #Softmax activation function.
nn.LeakyReLU: #Leaky ReLU activation function. 



>> Example

In [2]:
#an example 

import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Preprocess the data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert the data to PyTorch tensors
X_train = torch.Tensor(X_train)
y_train = torch.LongTensor(y_train)

# Define the custom model
class CustomModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(CustomModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

# Set the dimensions for the input, hidden, and output layers
input_dim = X_train.shape[1]
hidden_dim = 16
output_dim = 3

# Create an instance of the custom model
model = CustomModel(input_dim, hidden_dim, output_dim)

# # Define the loss function and optimizer
# criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.parameters(), lr=0.01)

# # Training loop
# num_epochs = 100

# for epoch in range(num_epochs):
#     # Forward pass
#     outputs = model(X_train)
#     loss = criterion(outputs, y_train)
    
#     # Backward pass and optimization
#     optimizer.zero_grad()
#     loss.backward()
#     optimizer.step()
    
#     # Print the loss for every 10 epochs
#     if (epoch+1) % 10 == 0:
#         print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}")

# # Test the model
# X_test = torch.Tensor(X_test)
# y_test = torch.LongTensor(y_test)
# with torch.no_grad():
#     outputs = model(X_test)
#     _, predicted = torch.max(outputs.data, 1)
#     accuracy = (predicted == y_test).sum().item() / len(y_test)
#     print(f"Test Accuracy: {accuracy}")


>> Activation Functions

>> Hyperparameters

In [None]:
# Some common hyperparameters in deep learning include:

# Learning rate: 
    # Determines the step size during gradient descent optimization and affects the convergence speed 
    # and accuracy of the model.

# Number of hidden layers: 
    # Determines the depth of the neural network architecture and influences the model's capacity to learn complex 
    # patterns.

# Number of neurons per layer: 
    # Defines the width of the neural network architecture and affects the model's representational capacity and 
    # computational efficiency.

# Activation functions: 
    # Determines the non-linear transformation applied to the output of each neuron, introducing non-linearity into 
    # the model.

# Dropout rate: 
    # Controls the regularization technique of randomly dropping out a fraction of neurons during training, which
    # helps prevent overfitting. 

# Batch size: 
    # Specifies the number of training samples propagated through the network before updating the model's weights.

# Number of epochs: 
    # Specifies the number of times the entire training dataset is passed through the model during training.

# Regularization techniques: 
    # Include methods like L1 and L2 regularization, which help prevent overfitting by adding penalties to the 
    # loss function.

# Optimizer: 
    # Specifies the optimization algorithm used to update the model's weights during training, such as 
    # Stochastic Gradient Descent (SGD), Adam, or RMSprop.

# Loss function: 
    # Defines the objective function used to measure the discrepancy between the predicted output and the 
    # true output during training.

### Data Preprocessing

In [None]:
import torch
from torchvision import transforms

# Define transformations
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load datasets and apply transformations
train_dataset = torchvision.datasets.ImageFolder(root='train_data/', transform=transform)
test_dataset = torchvision.datasets.ImageFolder(root='test_data/', transform=transform)

# Create data loaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)


>> Custom Dataset and DataLoader

In [24]:
# Load your dataset into a DataFrame
df = pd.read_csv('your_dataset.csv')

# Extract the input features and target labels from the DataFrame
inputs = df[['feature1', 'feature2', ...]].values
targets = df['target'].values

# Convert the data to PyTorch tensors
inputs = torch.tensor(inputs, dtype=torch.float32)
targets = torch.tensor(targets, dtype=torch.long)

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [None]:
# Data loaders in deep learning are utility classes or functions that help in efficiently loading and 
# processing training, validation, and testing data. They are an essential component of training deep learning models 
# and provide several benefits such as Data Batching, Data Shuffling, Data Augmentation, Data Transformation, 
# Efficient Memory Management, Parallel Data Loading.
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True) # Load the training data
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True) #data loader 
                        # are used to move the input data and labels to a specified device (e.g., CPU or GPU) for computation





In [None]:
#Image Folder Dataset
import torchvision.datasets as datasets
import torchvision.transforms as transforms

# Define the transformation to apply to the images
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to a fixed size
    transforms.ToTensor(),          # Convert images to tensors
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize image tensors
])

# Load the dataset from the image folders
dataset = datasets.ImageFolder(root='Dataset/', transform=transform)

# Use the dataset in your model or create a data loader



#CSV Dataset
import torch
import torchvision.datasets as datasets

# Load the dataset from a CSV file
dataset = datasets.CSVDataset(root='Dataset/', filename='data.csv', target_column='label',  # specify the CSV file and target column
                              has_header=True,  # specify if the CSV file has a header row
                              categorical_columns=[3, 4, 5],  # specify categorical columns (if any)
                              continuous_columns=[0, 1, 2],  # specify continuous columns
                              delimiter=',',  # specify the delimiter used in the CSV file
                              transform=None)  # apply transformations if needed

# Use the dataset in your model or create a data loader


#Custom Dataset
import torch
from torch.utils.data import Dataset

class CustomDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        # Additional initialization logic goes here

    def __len__(self):
        # Return the total number of samples in the dataset
        # e.g., return len(self.data)
        pass

    def __getitem__(self, index):
        # Retrieve and preprocess a single sample from the dataset
        # e.g., return self.transform(self.data[index]), self.labels[index]
        pass

# Create an instance of your custom dataset
dataset = CustomDataset(root_dir='Dataset/', transform=None)

# Use the dataset in your model or create a data loader










### Model Training

In [None]:
# Loss Function: 
    # Choose an appropriate loss function based on the problem you are solving. Common loss functions include 
        # mean squared error (MSE), binary cross-entropy, or categorical cross-entropy, depending on the task.
    # Create an instance of the chosen loss function from torch.nn module.

# Optimizer:
    # Select an optimizer that will update the model's parameters during training. Popular choices include 
        # Stochastic Gradient Descent (SGD), Adam, or RMSprop.
    # Initialize the optimizer by passing the model parameters and setting the learning rate and other hyperparameters.
    
# Training Loop:
    # Iterate over the training dataset in batches.
    # Zero the gradients of the model parameters to avoid accumulation.
    # Pass the input batch through the model to obtain predictions.
    # Calculate the loss between the predictions and the target values.
    # Backpropagate the gradients by calling backward() on the loss tensor.
    # Update the model parameters using the optimizer's step() function.
    # Optionally, track and record metrics like accuracy or loss during training.



>> Loss function

In [None]:
# Define the loss function

#Regression 
loss_function = nn.MSELoss()  # Mean Squared Error loss: It is widely used in regression problems 
loss_function = nn.L1Loss() #Mean Absolute Error (MAE): It is often used in regression problems
loss_function = nn.SmoothL1Loss() #Huber Loss: A robust loss function for regression problems that combines properties 
                                    #of both MSE and MAE. 
                                    
#Classification 
loss_function = nn.CrossEntropyLoss( )  #Cross-Entropy Loss: It is commonly used in multi-class classification problems
loss_function = nn.BCELoss() #Binary Cross-Entropy Loss: used in binary classification tasks, where the model's output 
                                #consists of probabilities instead of logits.
loss_function = nn.BCEWithLogitsLoss() #Binary Cross-Entropy (BCE) Loss: It is commonly used in binary classification 
                                            #problems, where the model's output consists of logits 
                                            # (unbounded real numbers) rather than probabilities.
                                            
#Generative models
loss_function = nn.KLDivLoss()  #Kullback-Leibler Divergence (KLD): Measuring the difference between two probability 
                                    #distributions, commonly used in generative models.



# Calculate the loss
loss = loss_function(outputs, target_data)

# Print the loss
print("Loss:", loss.item())


# Logits are the raw, unnormalized values produced by the model before applying any activation function like sigmoid or 
# softmax. They represent the model's predictions or scores for each class without being converted into probabilities 
# yet.


>> Optimizers

In [None]:
# Optimizers play a crucial role in training neural networks by updating the model's parameters to minimize the 
# loss function

# Stochastic Gradient Descent (SGD):
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
# SGD is a classic optimization algorithm that updates the model parameters based on the gradients computed on 
# small subsets of the training data

# Adam:
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Adam (Adaptive Moment Estimation) is an optimization algorithm that adapts the learning rate for each parameter 
# based on the estimates of the first and second moments of the gradients

# Adagrad:
optimizer = torch.optim.Adagrad(model.parameters(), lr=learning_rate)
# Adagrad (Adaptive Gradient) is an optimization algorithm that adapts the learning rate for each parameter based on 
# the historical gradients for that parameter. It is often used in natural language processing tasks. 

#RMSprop:
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)
# RMSprop (Root Mean Square Propagation) is an optimization algorithm that adapts the learning rate for each parameter 
# based on the moving average of squared gradients. It helps mitigate the diminishing learning rate problem

# Adadelta:
optimizer = torch.optim.Adadelta(model.parameters(), lr=learning_rate)
# Adadelta is an optimization algorithm that dynamically adapts the learning rate and accumulates only a limited 
# history of past gradients.


#the most popular are: Stochastic Gradient Descent (SGD), Adam, RMSprop, and Adagrad.


>> Training Loop

In [None]:
# The training loop consists of two nested loops. The outer loop iterates over the specified number of epochs. 
# Inside the epoch loop, the model is set to train mode (model.train()) and the running loss is initialized. 
# Then, we iterate over the training data in batches using the train_loader.

# For each batch, we perform the following steps:

    # Zero the gradients using optimizer.zero_grad().
    # Forward pass: Pass the input data through the model to obtain the predicted outputs.
    # Compute the loss between the predicted outputs and the true labels.
    # Backward pass: Compute the gradients of the loss with respect to the model parameters.
    # Update the weights using the optimizer's step() method.
    # Update the running loss by adding the current batch loss.

In [None]:
# Define your model, optimizer, and loss function
model = MyModel()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Training loop
for epoch in range(num_epochs):
    # Set the model to train mode
    model.train()
    
    # Initialize the running loss
    running_loss = 0.0
    
    # Iterate over the training dataset
    for inputs, labels in train_loader:
        # Zero the gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(inputs)
        
        # Compute the loss
        loss = criterion(outputs, labels)
        
        # Backward pass
        loss.backward()
        
        # Update the weights
        optimizer.step()
        
        # Update the running loss
        running_loss += loss.item()
    
    # Compute the average loss for the epoch
    epoch_loss = running_loss / len(train_loader)
    
    # Print the loss for each epoch
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")


>> Evaluating and Validation

In [None]:
# Validation Loop:
    # Evaluate the model on the validation dataset to monitor its performance and make any necessary adjustments.
    # Pass the input batch through the model to obtain predictions.
    # Calculate the validation loss and any desired evaluation metrics.
    
# Hyperparameter Tuning:
    # Experiment with different learning rates, batch sizes, architectures, activation functions, 
        # regularization techniques, and optimizer settings.
    # Use the validation set to evaluate different combinations of hyperparameters and choose the best-performing ones.

In [None]:
#Model Evaluation

# Set the model to evaluation mode
model.eval()

# Evaluate the model
with torch.no_grad():
  for data, labels in test_loader:
    # Forward pass
    outputs = model(data)

    # Calculate loss
    loss = criterion(outputs, labels)

    # Calculate accuracy
    accuracy = (outputs.argmax(1) == labels).sum().item() / len(labels)

print('Accuracy:', accuracy)

In [None]:
# Set the model to train mode
model.train()

best_loss = float('inf')  # Variable to track the best validation loss
early_stopping_counter = 0  # Counter for early stopping
early_stopping_patience = 3  # Number of epochs to wait before early stopping

for epoch in range(num_epochs):
    # Training
    model.train()
    train_loss = 0.0
    train_correct = 0

    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs.data, 1)
        train_correct += (predicted == targets).sum().item()
    
    train_loss /= len(train_dataset)
    train_accuracy = train_correct / len(train_dataset)

    # Validation
    model.eval()
    val_loss = 0.0
    val_correct = 0

    with torch.no_grad():
        for inputs, targets in val_loader:
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            
            val_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            val_correct += (predicted == targets).sum().item()
    
    val_loss /= len(val_dataset)
    val_accuracy = val_correct / len(val_dataset)

    # Print epoch metrics
    print(f"Epoch: {epoch+1}/{num_epochs}")
    print(f"Train Loss: {train_loss:.4f} | Train Accuracy: {train_accuracy:.4f}")
    print(f"Val Loss: {val_loss:.4f} | Val Accuracy: {val_accuracy:.4f}")

    # Save the best model checkpoint
    if val_loss < best_loss:
        best_loss = val_loss
        torch.save(model.state_dict(), 'best_model.pt')

    # Early stopping
    if epoch > early_stopping_patience and val_loss >= best_loss:
        early_stopping_counter += 1
        if early_stopping_counter == early_stopping_patience:
            print("Early stopping triggered!")
            break
    else:
        early_stopping_counter = 0

# Load the best model checkpoint
model.load_state_dict(torch.load('best_model.pt'))

# Set the model to evaluation mode
model.eval()


>> Hyperparameter Tuning 

In [None]:
import optuna
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score

# Define your model
class MyModel(nn.Module):
    # Your model definition here

# Define your objective function
def objective(trial):
    # Define your hyperparameters to be tuned
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64])
    num_epochs = trial.suggest_int('num_epochs', 10, 30)
    patience = trial.suggest_int('patience', 5, 20)
    weight_decay = trial.suggest_loguniform('weight_decay', 1e-6, 1e-3)
    dropout_rate = trial.suggest_uniform('dropout_rate', 0.0, 0.5)
    hidden_size = trial.suggest_categorical('hidden_size', [64, 128, 256])
    num_layers = trial.suggest_int('num_layers', 1, 4)
    # batch_size = trial.suggest_categorical('batch_size', [16, 32, 64])
    optimizer = trial.suggest_categorical('optimizer', ['adam', 'sgd'])
    activation = trial.suggest_categorical('activation', ['relu', 'sigmoid', 'tanh'])
    patience = trial.suggest_int('patience', 5, 20)

    # Define your model architecture with the hyperparameters
    model = MyModel(input_size, hidden_size, num_layers, output_size, dropout_rate, activation)

    # model = MyModel()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    # if optimizer == 'adam':
    #     optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    # else:
    #     optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay)


    # Define your loss function
    loss_function = nn.CrossEntropyLoss()

    # Define your dataset and dataloader
    train_dataset = MyDataset(train_data, train_labels)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    # Training loop
    for epoch in range(num_epochs):
        model.train()   # Set the model to train mode
        for inputs, targets in train_loader:
            optimizer.zero_grad()   # Zero the gradients
            # Forward pass
            outputs = model(inputs)
            loss = loss_function(outputs, targets)
            # Backward pass and optimization
            loss.backward()
            optimizer.step()

    # Evaluation
    model.eval()
    predictions = []
    with torch.no_grad():
        for inputs, targets in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, dim = 1)
            predictions.extend(predicted.tolist())

    accuracy = accuracy_score(test_labels, predictions)
    
    # Report intermediate results to Optuna
    trial.report(accuracy, epoch)

    # Implement early stopping based on the patience parameter
    if epoch - trial.best_trial.last_epoch > patience:
        break

    # Handle pruning based on the intermediate results
    if trial.should_prune():
        raise optuna.TrialPruned()
    return accuracy

# Define the study
study = optuna.create_study(direction='maximize')

# Run the optimization
study.optimize(objective, n_trials=100)

# Get the best hyperparameters
best_params = study.best_params     #or study.best_trial

# Train the model with the best hyperparameters
best_model = MyModel(**best_params)
# Train your model using the best hyperparameters

# Evaluate the best model on the test set
# Evaluate your best model on the test set

# Print the best hyperparameters and the best score
print("Best Hyperparameters:", best_params)
print("Best Score:", study.best_value)


### Model Saving and Loading

In [None]:
import torch
from torchvision.models import resnet50  # or any other pre-defined model architecture you want to use 


# Save the model checkpoint
torch.save(model.state_dict(), 'model_checkpoint.pth')

# Load the model checkpoint
model.load_state_dict(torch.load('model_checkpoint.pth'))
#or
model = resnet50(pretrained=True) 
model.load_state_dict(torch.load('model.pth'))


# Load the pre-trained model
model = torch.load('obinopaul/model.pth')
