In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Library Imports

In [2]:
# Imports necessary for training the model
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.datasets as dsets
import torchvision.models as models
import torchvision.transforms.v2 as transforms
import torchvision.utils as utils
from torch.utils.data import Dataset, DataLoader, random_split
from tqdm import tqdm

# Few other important libraries
import matplotlib.pyplot as plt
import numpy as np
import time
from sklearn.metrics import f1_score, classification_report, confusion_matrix, ConfusionMatrixDisplay

torch.manual_seed(0)

<torch._C.Generator at 0x7d1730e5ad70>

In [3]:
torch.cuda.empty_cache()

## Data Augmentation

In [4]:

transform_train = transforms.Compose([
    transforms.ToImage(),
    transforms.ToDtype(torch.float32, scale=True),
    transforms.Resize(232),
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(0.2, 0.2, 0.2, 0.1),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                         std=[0.229, 0.224, 0.225])
])

transform_test = transforms.Compose([
    transforms.ToImage(),  # converts from HWC np to CHW Tensor
    transforms.ToDtype(torch.float32, scale=True),  # uint8 [0,255] -> float32 [0.0, 1.0]
    transforms.Resize(256),  
    transforms.CenterCrop(224),  # No random cropping here
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                         std=[0.229, 0.224, 0.225])  # ImageNet stats
])


## Data Loading

In [5]:
train_path = "/kaggle/input/deep-learning-practice-image-classification/train"
test_path = "/kaggle/input/deep-learning-practice-image-classification/test"


from PIL import Image  # Import the Image module from Pillow

class CustomTestDataset(Dataset):
    def __init__(self, test_dir, transform=None):
        self.test_dir = test_dir
        self.transform = transform
        self.image_paths = [os.path.join(test_dir, fname) for fname in os.listdir(test_dir)]
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert("RGB")  # Open image and convert to RGB
        if self.transform:
            image = self.transform(image)  # Apply the transform if available
        return image, img_path  # Return the image and its path


# Create test data
test_data = CustomTestDataset(test_dir=test_path, transform=transform_test)



In [6]:
# Load Training and Test datasets
full_train_data = dsets.ImageFolder(root=train_path)

# Create a validation set from the training data
train_size_fraction = 0.9
train_size = int(train_size_fraction*len(full_train_data))  # 90% for training
val_size = len(full_train_data) - train_size  # 10% for validation
train_dataset, val_dataset = random_split(full_train_data, [train_size, val_size])

# Now apply transform_train and transform_test separately
train_dataset.dataset.transform = transform_train  # augmentation
val_dataset.dataset.transform = transform_test      # no augmentation

# Print data details
print("Train Data Shape: ", len(full_train_data))
print("Test Data Shape: ", len(test_data))


Train Data Shape:  9999
Test Data Shape:  2000


In [7]:
print("label_mapping",full_train_data.class_to_idx)

label_mapping {'Amphibia': 0, 'Animalia': 1, 'Arachnida': 2, 'Aves': 3, 'Fungi': 4, 'Insecta': 5, 'Mammalia': 6, 'Mollusca': 7, 'Plantae': 8, 'Reptilia': 9}


In [8]:
batch_size = 64 # Batch Size of the images

# Creating dataloaders
train_loader = DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle = True,num_workers=2)
val_loader = DataLoader(dataset = val_dataset, batch_size = batch_size, shuffle = False,num_workers=2)
test_loader = DataLoader(dataset = test_data, batch_size = batch_size, shuffle = False,num_workers=2)

# Printing the no. of samples in each dataset
print("No. of samples in training dataset:", len(train_loader.dataset))
print("No. of samples in validation dataset:", len(val_loader.dataset))
print("No. of samples in test dataset:", len(test_loader.dataset))

No. of samples in training dataset: 8999
No. of samples in validation dataset: 1000
No. of samples in test dataset: 2000


## Training and Validating Model 

In [9]:

# Function to calculate accuracy
def calculate_accuracy(preds, labels):
    correct = (preds == labels).sum().item()
    return correct / len(labels) * 100

# Define the training function
def train_model(model, train_loader, val_loader, num_epochs=10, learning_rate=0.0001,patience=2):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    # Loss function
    criterion = nn.CrossEntropyLoss()

    # Optimizer
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=2)

    best_val_f1 = 0.0  # Track the best F1 score
    best_model_weights = None  # Store best model weights

    for epoch in range(num_epochs):
        print(f"Epoch {epoch + 1}/{num_epochs}")
        print("-" * 20)
        
        # Training phase
        model.train()
        running_loss = 0.0
        running_corrects = 0
        all_train_preds = []
        all_train_labels = []

        for inputs, labels in tqdm(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Get predictions
            _, preds = torch.max(outputs, 1)

            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            running_corrects += (preds == labels).sum().item()

            all_train_preds.extend(preds.cpu().numpy())
            all_train_labels.extend(labels.cpu().numpy())

        # Calculate training accuracy and F1 score
        train_accuracy = calculate_accuracy(torch.tensor(all_train_preds), torch.tensor(all_train_labels))
        train_f1 = f1_score(all_train_labels, all_train_preds, average='weighted')

        avg_train_loss = running_loss / len(train_loader)
        print(f"Train Loss: {avg_train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%, Train F1: {train_f1:.4f}")

        # Validation phase
        model.eval()
        val_preds = []
        val_labels = []
        running_val_loss = 0.0
        running_val_corrects = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                _, preds = torch.max(outputs, 1)

                running_val_loss += loss.item()
                running_val_corrects += (preds == labels).sum().item()

                val_preds.extend(preds.cpu().numpy())
                val_labels.extend(labels.cpu().numpy())

        # Calculate validation accuracy and F1 score
        val_accuracy = calculate_accuracy(torch.tensor(val_preds), torch.tensor(val_labels))
        val_f1 = f1_score(val_labels, val_preds, average='weighted')

        avg_val_loss = running_val_loss / len(val_loader)
        print(f"Val Loss: {avg_val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%, Val F1: {val_f1:.4f}")
        
        # Step the learning rate scheduler
        scheduler.step(avg_val_loss)
        
        # Save the best model based on validation F1 score
        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            best_model_weights = model.state_dict()
            epochs_since_improvement = 0  # Reset patience counter
            print(f"Saved best model with validation F1: {val_f1:.4f}")
        else:
            epochs_since_improvement += 1
            
        # Early stopping check
        if epochs_since_improvement >= patience:
            print(f"Stopping training early at epoch {epoch+1} due to no improvement in validation F1.")
            break

    # Load best model weights
    if best_model_weights:
        model.load_state_dict(best_model_weights)

    print(f"Training Complete. Best Validation F1 Score: {best_val_f1:.4f}")
    return model, best_val_f1



## Evaluating Test and Saving Predictions to CSV

In [10]:

def evaluate_and_save_predictions(model, test_loader, output_csv='21F1000641.csv'):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)  # Ensure model is on the correct device
    model.eval()  # Set model to evaluation mode
    predictions = []
    image_ids = []
    
    # Disable gradient computation for inference
    with torch.no_grad():
        for inputs, paths in tqdm(test_loader):  # Assuming test_loader provides file paths
            inputs = inputs.to(device)
            
            # Forward pass
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            
            # Extract Image_IDs (file names without extension)
            for path, pred in zip(paths, preds):
                image_id = os.path.splitext(os.path.basename(path))[0]  # Remove extension from the filename
                predictions.append(pred.item())  # Add the predicted label
                image_ids.append(image_id)  # Add the image ID

    # Create a DataFrame with Image_ID and predicted labels
    submission_df = pd.DataFrame({
        'Image_ID': image_ids,
        'Label': predictions
    })
    
    # Save the DataFrame to a CSV file
    output_csv_path = f"/kaggle/working/{output_csv}"
    submission_df.to_csv(output_csv_path, index=False)
    print(f"Predictions saved to {output_csv_path}")


In [11]:
# Load EfficientNet-V2 pretrained model
model = models.efficientnet_v2_s(pretrained=True)

# Modify the final fully connected layer for custom number of output classes
model.classifier[1] = nn.Linear(model.classifier[1].in_features, 10)  # 10 classes

# Training the model
print("Training EfficientNet-V2:")
trained_model, best_f1 = train_model(model, train_loader, val_loader, num_epochs=30, learning_rate=0.0001)

Downloading: "https://download.pytorch.org/models/efficientnet_v2_s-dd5fe13b.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_v2_s-dd5fe13b.pth
100%|██████████| 82.7M/82.7M [00:00<00:00, 175MB/s]


Training EfficientNet-V2:
Epoch 1/30
--------------------


100%|██████████| 141/141 [01:55<00:00,  1.23it/s]

Train Loss: 0.9203, Train Accuracy: 74.62%, Train F1: 0.7460





Val Loss: 0.3761, Val Accuracy: 88.80%, Val F1: 0.8883
Saved best model with validation F1: 0.8883
Epoch 2/30
--------------------


100%|██████████| 141/141 [01:40<00:00,  1.40it/s]

Train Loss: 0.2738, Train Accuracy: 91.89%, Train F1: 0.9190





Val Loss: 0.3177, Val Accuracy: 90.70%, Val F1: 0.9069
Saved best model with validation F1: 0.9069
Epoch 3/30
--------------------


100%|██████████| 141/141 [01:43<00:00,  1.36it/s]

Train Loss: 0.1342, Train Accuracy: 96.08%, Train F1: 0.9608





Val Loss: 0.3670, Val Accuracy: 89.50%, Val F1: 0.8949
Epoch 4/30
--------------------


100%|██████████| 141/141 [01:45<00:00,  1.34it/s]

Train Loss: 0.0846, Train Accuracy: 97.67%, Train F1: 0.9767





Val Loss: 0.4006, Val Accuracy: 89.60%, Val F1: 0.8958
Stopping training early at epoch 4 due to no improvement in validation F1.
Training Complete. Best Validation F1 Score: 0.9069


In [12]:
evaluate_and_save_predictions(trained_model, test_loader, output_csv='21F1000641.csv')

100%|██████████| 32/32 [00:25<00:00,  1.27it/s]

Predictions saved to /kaggle/working/21F1000641.csv





In [13]:
import pandas as pd
output_df = pd.read_csv("/kaggle/working/21F1000641.csv")
print(len(output_df))

2000
