This Notebook defines training of model using ignite method

In [90]:
# !pip install pytorch-ignite
# !pip install tensorboard

In [91]:
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from transformers import ViTFeatureExtractor, ViTForImageClassification,AutoFeatureExtractor,AdamW
from tqdm import tqdm
import torch
import numpy as np
from transformers import ViTImageProcessor, ViTModel
from PIL import Image
import gym
import numpy as np


import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pylab as plt


%matplotlib inline

In [92]:
# Check if CUDA (GPU support) is available
if torch.cuda.is_available():

    print("CUDA is available. Using GPU.")
else:

    print("CUDA is not available. Using CPU.")


CUDA is available. Using GPU.


In [None]:
processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224-in21k')
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k')

# Load pre-trained feature extractor
feature_extractor = AutoFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")

# Step 1: Modify classification head for the new number of classes
num_classes = 10  # Update with your new number of classes
model.classifier = torch.nn.Linear(model.config.hidden_size, num_classes)
model.config.image_size=32
# Move the model to the desired device (GPU if available, otherwise CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


In [94]:
index_to_label = {
    0: 'Airplane',
    1: 'Automobile',
    2: 'Bird',
    3: 'Cat',
    4: 'Deer',
    5: 'Dog',
    6: 'Frog',
    7: 'Horse',
    8: 'Ship',
    9: 'Truck'
}

In [95]:
from ignite.engine import Events, create_supervised_trainer, create_supervised_evaluator
from ignite.metrics import Accuracy,Loss
from torch.utils.data import Subset

In [96]:
from ignite.handlers import ModelCheckpoint
from ignite.contrib.handlers import TensorboardLogger, global_step_from_engine

In [97]:
# Create the dataset and data loaders
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])

train_dataset = datasets.CIFAR10(root="./data", train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root="./data", train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=4)

Files already downloaded and verified
Files already downloaded and verified


In [98]:
print('''# Choose the number of samples you want to keep
num_train_samples = 50
num_test_samples = 10

# Create subsets with a smaller number of samples
train_subset = Subset(train_dataset, list(range(num_train_samples)))
test_subset = Subset(test_dataset, list(range(num_test_samples)))
batch_size = 8

# Create data loaders for the subsets
train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True, num_workers=4)
test_loader = DataLoader(test_subset, batch_size=batch_size, shuffle=False, num_workers=4)''')

# Choose the number of samples you want to keep
num_train_samples = 50
num_test_samples = 10

# Create subsets with a smaller number of samples
train_subset = Subset(train_dataset, list(range(num_train_samples)))
test_subset = Subset(test_dataset, list(range(num_test_samples)))
batch_size = 8

# Create data loaders for the subsets
train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True, num_workers=4)
test_loader = DataLoader(test_subset, batch_size=batch_size, shuffle=False, num_workers=4)


In [99]:
model = model  # Initialize your ViT model here
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [100]:

def trainer_score_function(engine):
    return engine.state.metrics['accuracy'] 
    #return engine.state.metrics['accuracy']
val_metrics = {
    "accuracy": Accuracy(),
    "loss": Loss(criterion)
}

In [101]:


from ignite.engine import Engine
from torch.nn.utils import clip_grad_norm_
from ignite.engine import Events
from ignite.metrics import Accuracy
from ignite.contrib.handlers import TensorboardLogger, global_step_from_engine
from ignite.contrib.handlers import ProgressBar
tb_logger = TensorboardLogger(log_dir="tb-logger")

In [102]:
from ignite.metrics import Precision, Recall,ConfusionMatrix


In [103]:
pbar = ProgressBar(persist=True)


In [104]:
def compute_accuracy(predictions, targets):
    # Get the predicted class indices
    predicted_classes = predictions.argmax(dim=1)
    
    # Compare with the target classes
    correct_predictions = (predicted_classes == targets).float()
    
    # Calculate accuracy
    accuracy = correct_predictions.mean().item()
    
    return accuracy

In [105]:
def train_step(engine, batch):
    model.train()
    inputs, targets = batch
    inputs, targets = inputs.to(device), targets.to(device)
    optimizer.zero_grad()
    outputs = model(inputs.to(device))

    logits = outputs.logits if hasattr(outputs, "logits") else outputs.last_hidden_state

    probabilities = torch.nn.functional.softmax(logits, dim=1)
    
    loss = criterion(probabilities, targets)
    loss.backward()
    optimizer.step()
    # # Gradient Clipping
    clip_grad_norm_(model.parameters(), max_norm=0.9)  # Adjust max_norm as needed
    
    return logits,targets



# Create Ignite trainer and evaluator
trainer = Engine(train_step)



In [106]:
# Create Precision and Recall metrics
precision = Precision(average=True)  # average can be 'binary', 'micro', 'macro', or 'weighted'
recall = Recall(average=True)  # average can be 'binary', 'micro', 'macro', or 'weighted'
# Create the ConfusionMatrix metric
confusion_matrix = ConfusionMatrix(num_classes=10)

# Attach metrics to evaluator engine
precision.attach(trainer, "precision")
recall.attach(trainer, "recall")
Accuracy().attach(trainer, "accuracy")
# Attach the metric to the evaluator engine
confusion_matrix.attach(trainer, "confusion_matrix")

In [108]:
model_checkpoint = ModelCheckpoint(
    "checkpoint",
    n_saved=2,
    filename_prefix="best",
    score_function=trainer_score_function,
    score_name="accuracy",
    global_step_transform=global_step_from_engine(trainer),
)

In [None]:

# @trainer.on(Events.EPOCH_COMPLETED)
# def log_training_results(engine):
#     metrics = trainer.state.metrics
    #print(metrics)
    # logits=engine.state.output[0].item()
    # probabilities = torch.nn.functional.softmax(logits, dim=1)
    
    # loss = criterion(probabilities, engine.state.output[1])
     # Log metrics to TensorBoard
    #global_step = engine.state.iteration
    # tb_logger.add_scalar("Loss", loss, global_step)
    #

# @trainer.on(Events.EPOCH_COMPLETED)
# def log_validation_results(evaluator):
#     evaluator.run(test_loader)
#     metrics = evaluator.state.metrics
#     print(f"Validation Results - Epoch[{evaluator.state.epoch}] Avg accuracy: {metrics['accuracy']:.2f} Avg loss: {metrics['loss']:.2f}")

Lets declare evaluator 

In [109]:
def validation_step(engine, batch):
    model.eval()
    model.to(device)
    with torch.no_grad():
        x, y= batch
        y = y.to(device)
        #probabilities = torch.nn.functional.softmax(model(x.to(device)).logits, dim=1)
        y_pred = model(x.to(device))
        

    return y_pred.logits, y # Include ground truth labels
evaluator = Engine(validation_step)


In [110]:
# Attach metrics to evaluator engine
precision.attach(evaluator, "precision")
recall.attach(evaluator, "recall")
Accuracy().attach(evaluator, "accuracy")
# Attach the metric to the evaluator engine
confusion_matrix.attach(evaluator, "confusion_matrix")


In [111]:
reward_window = []
best_accuracy = 0.0
@trainer.on(Events.EPOCH_COMPLETED)
def log_validation_results(engine):
    evaluator.run(test_loader)
    metrics = evaluator.state.metrics
    print(f"Validation Results - Epoch[{evaluator.state.epoch}] Avg accuracy: {metrics['accuracy']:.2f}")

    
@trainer.on(Events.EPOCH_COMPLETED)
def update_with_reward(evaluator):
    print("done")
    global best_accuracy
    # Do not run the evaluator here, as it's already run in log_validation_results
    # Use the validation accuracy as the reward from evaluator.state.metrics
    reward = evaluator.state.metrics['accuracy']

    # Update the model based on the reward
    if reward > best_accuracy:
        best_accuracy = reward
        reward_window.append(reward)
        print(f"Updating model with reward: {reward}")
        torch.save(model.state_dict(), "best_model.pth")



# Save the model after every epoch of val_evaluator is completed
evaluator.add_event_handler(Events.COMPLETED, model_checkpoint, {"model": model})

<ignite.engine.events.RemovableEventHandle at 0x2e0f70bc610>

In [115]:
# Attach TensorBoard logger for loss, accuracy, and AUC
tb_logger.attach_output_handler(
    trainer,
    event_name=Events.ITERATION_COMPLETED(every=100),
    tag="training",
    output_transform=lambda loss: {"batch_loss": loss},
)

tb_logger.attach_output_handler(
    evaluator,
    event_name=Events.EPOCH_COMPLETED,
    tag="validation",
    metric_names=['loss', 'accuracy'],
    global_step_transform=global_step_from_engine(trainer),
)
for tag, evaluator in [("training", trainer), ("validation", evaluator)]:
    tb_logger.attach_output_handler(
        evaluator,
        event_name=Events.EPOCH_COMPLETED,
        tag=tag,
        metric_names="all",
        global_step_transform=global_step_from_engine(trainer),
    )
# Attach TensorBoard logger to the evaluator for validation metrics, including loss
for name, metric in val_metrics.items():
    tb_logger.attach_output_handler(
        evaluator,
        event_name=Events.EPOCH_COMPLETED,
        tag="validation",
        metric_names=[name],
        global_step_transform=global_step_from_engine(trainer),
    )
pbar.attach(trainer, metric_names=['loss'])

In [None]:
trainer.run(train_loader, max_epochs=6)
# evaluator.run(test_loader)

In [None]:
tb_logger.close()

trainer vs. evaluator: In Ignite, the `trainer` refers to the engine responsible for the training loop, while the `evaluator` refers to the engine responsible for evaluation (validation or testing). They are separate engines with distinct roles.

The trainer is responsible for training the model using the training data loader and the specified training logic (train_step function).
The evaluator is responsible for evaluating the model using the validation or test data loader and the specified evaluation logic (validation_step function).

Iginte Event Management

* Events.STARTED: Triggered when the engine is started.
* Events.COMPLETED: Triggered when the engine is completed.
* Events.EPOCH_STARTED: Triggered at the beginning of each epoch.
* Events.EPOCH_COMPLETED: Triggered at the end of each epoch.
* Events.ITERATION_STARTED: Triggered at the beginning of each iteration (batch).
* Events.ITERATION_COMPLETED: Triggered at the end of each iteration (batch).
* Events.EXCEPTION_RAISED: Triggered when an exception is raised in the engine.
* Events.TERMINATE: Triggered when the engine should terminate.
* Events.MODEL_CHECKPOINT: Triggered when a model checkpoint is about to be saved.
* Events.REDUCE_LR_ON_PLATEAU: Triggered during the learning rate reduction on plateau.

In [None]:
!pip install jupyter-tensorboard


In [None]:
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [None]:
%tensorboard --logdir=./tb-logger