In [2]:
import torch
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms
from sklearn.model_selection import train_test_split

class DataLoaderHelper:
    def __init__(self, train_data_dir,test_data_dir, input_size, batch_size, augmentation):
        self.data_dir = train_data_dir
        self.test_dir = test_data_dir
        self.batch_size = batch_size
        self.augmentation = augmentation
        self.input_size = input_size  # tuple like (400, 400)

        self.transform = self.get_transform()
        self.train_data, self.val_data = self.load_train_val_data()
        self.test_data = self.load_test_data()

    def get_transform(self):
        if self.augmentation:
            #Rotating the image.
            transforms_list = [
                transforms.RandomResizedCrop(self.input_size),
                transforms.RandomHorizontalFlip(),
                transforms.RandomRotation(30),
                transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
                transforms.ToTensor(),
            ]
        else:
            transforms_list = [
                transforms.Resize(self.input_size),
                transforms.ToTensor(),
            ]

        #Normalize the input for better performance
        transforms_list.append(
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        )
        return transforms.Compose(transforms_list)

    def load_train_val_data(self):
        full_dataset = datasets.ImageFolder(root=self.data_dir, transform=self.transform)
        total_size = len(full_dataset)
        indices = list(range(total_size))

        #Train_test_split does not allow tensor data.. (So need to split based on the indices)
        train_idx, val_idx = train_test_split(indices, test_size=0.2, random_state=42, shuffle=True)

        # print(f"Total: {total_size} | Train: {len(train_idx)} | Val: {len(val_idx)}")
        return Subset(full_dataset, train_idx), Subset(full_dataset, val_idx)

    def load_test_data(self):
        return datasets.ImageFolder(root=self.test_dir, transform=self.transform)

    def get_dataloaders(self):
        #Addded pin_memory and workers to increase the loading speed
        train_loader = DataLoader(self.train_data, batch_size=self.batch_size,
                                  shuffle=True, num_workers=2, pin_memory=True)
        val_loader = DataLoader(self.val_data, batch_size=self.batch_size,
                                shuffle=False, num_workers=2, pin_memory=True)

        test_loader = DataLoader(self.test_data, batch_size=self.batch_size,
                                     shuffle=False, num_workers=2, pin_memory=True)
      

        return train_loader, val_loader, test_loader

In [19]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms, models

class FinetuneCNN():
    def __init__(self,pt=True):
      self.fine_tune_model=models.resnet50(pretrained=pt)

    def Freezelayers(self):
        for i in self.fine_tune_model.parameters():
            i.requires_grad=False

        features=self.fine_tune_model.fc.in_features
        self.fine_tune_model.fc= nn.Linear(features, 10)
            


In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import wandb

class Trainer:
    def __init__(self, model, train_loader, val_loader,test_loader, optimizer_name, learning_rate, num_epochs,weight_decay):
        # Set device
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        # self.model=torch.nn.DataParallel(model,device_ids = [0,1]).to(self.device)
        self.model = model.to(self.device)
        #Note : If you are using multidevice gpu commment the above line and uncomment the previous above line 

        self.train_loader = train_loader
        self.val_loader = val_loader
        self.test_loader=test_loader
        self.num_epochs = num_epochs
        self.weight_decay= weight_decay
        self.learning_rate=learning_rate
        self.train_loss_history = []
        self.val_loss_history = []
        self.train_acc_history = []
        self.val_acc_history = []
        
        # Initialize optimizer
        if optimizer_name.lower() == 'adam':
            self.optimizer = optim.Adam(model.parameters(), lr=self.learning_rate,weight_decay=self.weight_decay)
        elif optimizer_name.lower() == 'nadam':
            self.optimizer = optim.NAdam(model.parameters(), lr=self.learning_rate, weight_decay=self.weight_decay)
        elif optimizer_name.lower() == 'rmsprop':
            self.optimizer = optim.RMSprop(model.parameters(), lr=self.learning_rate,weight_decay=self.weight_decay)
        else:
            raise ValueError(f"Unsupported optimizer: {optimizer_name}")
            
        self.criterion = nn.CrossEntropyLoss()
        
    def train_epoch(self):
        #Initialize the training requirements that we have defined in model
        self.model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for images, labels in tqdm(self.train_loader, desc="Training"):
            images, labels = images.to(self.device), labels.to(self.device)
            
            self.optimizer.zero_grad()
            outputs = self.model(images)
            loss = self.criterion(outputs, labels)
            loss.backward()
            #weight update
            self.optimizer.step()
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
        epoch_loss = running_loss / len(self.train_loader)
        epoch_acc = correct / total
        return epoch_loss, epoch_acc
    
    def validate(self):
        #Set for evalution 
        self.model.eval()
        running_loss = 0.0
        correct = 0
        total = 0
        
        with torch.no_grad():
            for images, labels in tqdm(self.val_loader, desc="Validating"):
                images, labels = images.to(self.device), labels.to(self.device)
                outputs = self.model(images)
                loss = self.criterion(outputs, labels)
                
                running_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
                
        epoch_loss = running_loss / len(self.val_loader)
        epoch_acc = correct / total
        return epoch_loss, epoch_acc
    
    def train(self):
        for epoch in range(self.num_epochs):
            print(f"\nEpoch {epoch+1}/{self.num_epochs}")
            
            train_loss, train_acc = self.train_epoch()
            val_loss, val_acc = self.validate()
            
            # Store history for analysis
            self.train_loss_history.append(train_loss)
            self.val_loss_history.append(val_loss)
            self.train_acc_history.append(train_acc)
            self.val_acc_history.append(val_acc)
            
            print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc*100:.4f}")
            print(f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc*100:.4f}")
            #Added for removing the cache
            torch.cuda.empty_cache() 

    def confusion_matrix(self,count=3,Capture_Img=True,plot=True):
        class_names=["Amphibia", "Animalia", "Arachnida", "Aves", "Fungi", "Insecta", "Mammalia", "Mollusca", "Plantae", "Reptilia"]
    
        confusion_matrix = np.zeros((len(class_names), len(class_names)), dtype=int)
        #Set for evalution
        self.model.eval()
        correct = 0
        total = 0
            
        captured_samples = [[] for _ in range(len(class_names))] 
  
        class_to_idx = {name: idx for idx, name in enumerate(class_names)} 

        

        
        with torch.no_grad():
            for images, labels in tqdm(self.test_loader, desc="testing"):
                images, labels = images.to(self.device), labels.to(self.device)
                outputs = self.model(images)
        
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
                
                # Updating Confusion Matrix
                for true, predict in zip(labels.cpu(), predicted.cpu()):
                  confusion_matrix[true,predict] += 1

                if Capture_Img:
                    for i in range(images.size(0)):
                        true_label = labels[i].item()
                        class_idx = class_to_idx[class_names[true_label]] 
                        if len(captured_samples[class_idx]) < count:
                            img = images[i].cpu().numpy()
                            pred_label = predicted[i].item()
                            captured_samples[class_idx].append((img, true_label, pred_label))

        Acc=correct/total

        print(f"Test Acc: {Acc*100:.4f}")
        if plot:
            plt.figure(figsize=(10, 7))
            sns.heatmap(confusion_matrix, annot=True, fmt='d', cmap="Greens", xticklabels=class_names, yticklabels=class_names)
            plt.xlabel("Predicted Label")
            plt.ylabel("Actual Label")   
            plt.title("Confusion Matrix")
            plt.show()
        else:
            plt.figure(figsize=(10, 7))
            sns.heatmap(confusion_matrix, annot=True, fmt='d', cmap="Greens", xticklabels=class_names, yticklabels=class_names)
            plt.xlabel("Actual Label")
            plt.ylabel("Predicted Label")
            plt.title("Confusion Matrix")
            Img_name="confusion_matrix.png"
            plt.savefig(Img_name)
            plt.close()
            wandb.log({"confusion_matrix": wandb.Image(Img_name)})

        #Capture images if flag is set and count is not reached (Capture flag is to plot the 3*10 Images)
        if Capture_Img and any(captured_samples):
            rows = len(class_names)
            cols = count
            fig, axes = plt.subplots(rows, cols, figsize=(cols*3, rows*3))
            
            for row_idx, class_samples in enumerate(captured_samples):
                for col_idx in range(cols):
                    ax = axes[row_idx, col_idx]
                    ax.axis('off')
                    if col_idx < len(class_samples):
                        img, true, pred = class_samples[col_idx]
                        # Unnormalize and transpose
                        img = img.transpose(1, 2, 0)
                        img = (img * np.array([0.229, 0.224, 0.225])) + np.array([0.485, 0.456, 0.406])
                        img = np.clip(img, 0, 1)
                        ax.imshow(img)
                        ax.set_title(f"True: {class_names[true]}\nPred: {class_names[pred]}")
            
            plt.tight_layout()
            
            if plot:
                plt.show()
            else:
                Img_name = "predictions.png"
                fig.savefig(Img_name, bbox_inches='tight', dpi=300, pad_inches=0.1)
                plt.close(fig) 
                wandb.log({"predictions": wandb.Image(Img_name)})
            
    
    
        return confusion_matrix
            
   

In [8]:
!pip install wandb



In [7]:
import wandb

wandb.login()
# If you are running this file in kaggle pass   key='Your login key' i.e wandb.login(key= '')

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mcs24m042[0m ([33mcs24m042-iit-madras-foundation[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [25]:
!python /kaggle/input/model222/other/default/1/train.py -br "/kaggle/input/dataset1/inaturalist_12K"

[34m[1mwandb[0m: Currently logged in as: [33mcs24m042[0m ([33mcs24m042-iit-madras-foundation[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Tracking run with wandb version 0.19.6
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/kaggle/working/wandb/run-20250418_193604-p3n8qzkh[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33moptimizer nadam   batch_size 32 augmentation false weight_decay 0 learning_rate 0.0001  [0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/cs24m042-iit-madras-foundation/DA6401-Assignment-2[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/cs24m042-iit-madras-foundation/DA6401-Assignment-2/runs/p3n8qzkh[0m

Epoch 1/10
Training: 100%|███████████████████████████████| 250/250 [00:52<00:00,  4.73it/s]
Validating: 100%|███████████████████████████████| 63/63 [00:12<00:00,  4.89it/s]
Train Loss: 1.7

In [None]:
import wandb


input_dim=(224,224)
num_classes=10

# Add your directory here 
train_directory='/kaggle/input/dataset1/inaturalist_12K/train'
test_directory='/kaggle/input/dataset1/inaturalist_12K/val'
epochs=10

# Sweep configuration dictionary for wandb
sweep_configuration = {
    'method': 'bayes',
    'name' : 'cnn-finetunining',
    'metric': {
      'name': 'val_accuracy',
      'goal': 'maximize'
    },
    'parameters': {
        'weight_decay': {
            'values':[0, 0.0005, 0.5]
        },
        'augmentation': {
            'values': [True, False]
        },
       
        'learning_rate': {
            'values': [1e-3, 1e-4]
        },
        'optimizer': {
            'values': ['nadam', 'adam']
        },
        'batch_size': {
            'values': [32, 64]
        }
    }
}

def train_sweep(config=None):
    with wandb.init(config=config) as run:
        config = wandb.config


        run.name = "optimizer {}   batch_size {} augmentation {} weight_decay {} learning_rate {}  ".format(
            config.optimizer,
            config.batch_size,
            config.augmentation,
            config.weight_decay,
            config.learning_rate
          )
        # Initialize data loaders
        data_loader = DataLoaderHelper(
            train_directory,test_data_dir=test_directory,
            input_size=input_dim,
            batch_size=config.batch_size,
            augmentation=config.augmentation
        )
        train_loader, val_loader,test_loader = data_loader.get_dataloaders()
        
        # Initialize model
        model=FinetuneCNN()
        model.Freezelayers()
        
        # Initialize trainer
        trainer = Trainer(
            model.fine_tune_model,
            train_loader=train_loader,
            val_loader=val_loader,
            test_loader=test_loader,
            optimizer_name=config.optimizer,
            learning_rate=config.learning_rate,
            num_epochs=epochs,
            weight_decay=config.weight_decay
        )
        
        # Train the model
        trainer.train()
        
        # Log final metrics
        for epoch in range(epochs):
            wandb.log({
                'train_accuracy': trainer.train_acc_history[epoch]*100,
                'train_loss': trainer.train_loss_history[epoch],
                'val_accuracy': trainer.val_acc_history[epoch]*100,
                'val_loss': trainer.val_loss_history[epoch],
                'epoch' : epoch
            })

if __name__ == "__main__":

    
    sweep_id = wandb.sweep(sweep_configuration, project="DA6401-Assignment-2")

    # Start sweep
    wandb.agent('srtmx9ao', function=train_sweep, count=9)