In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Library Imports

In [2]:
# Imports necessary for training the model
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.datasets as dsets
import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.utils as utils
from torch.utils.data import Dataset, DataLoader, random_split
from tqdm import tqdm

# Few other important libraries
import matplotlib.pyplot as plt
import numpy as np
import time
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay

torch.manual_seed(0)

<torch._C.Generator at 0x7ffb9ecbae10>

In [3]:
torch.cuda.empty_cache()

In [4]:
# !pip install GPUtil

# import torch
# from GPUtil import showUtilization as gpu_usage
# from numba import cuda

# def free_gpu_cache():
#     print("Initial GPU Usage")
#     gpu_usage()                             

#     torch.cuda.empty_cache()

#     cuda.select_device(0)
#     cuda.close()
#     cuda.select_device(0)

#     print("GPU Usage after emptying the cache")
#     gpu_usage()

# free_gpu_cache()                           


## Data Augmentation

In [5]:
# Transform for Data Augmentation
transform_train = transforms.Compose([
    transforms.Resize(256),                        # Resize to 256x256
    transforms.RandomCrop(224),                     # Randomly crop to 224x224
    transforms.RandomHorizontalFlip(0.5),           # Random horizontal flip with 50% probability
    transforms.ToTensor(),                          # Convert the image to a PyTorch tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize with ImageNet stats
])

transform_test = transforms.Compose([
    transforms.Resize(256),                    # Resize to 256x256 (same as training)
    transforms.CenterCrop(224),                 # Center crop to 224x224
    transforms.ToTensor(),                      # Convert to PyTorch tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize with ImageNet stats
])



## Data Loading

In [6]:
train_path = "/kaggle/input/deep-learning-practice-image-classification/train"
test_path = "/kaggle/input/deep-learning-practice-image-classification/test"


# Load Training and Test datasets
train_data = dsets.ImageFolder(root=train_path, transform=transform_train)

from PIL import Image  # Import the Image module from Pillow

class CustomTestDataset(Dataset):
    def __init__(self, test_dir, transform=None):
        self.test_dir = test_dir
        self.transform = transform
        self.image_paths = [os.path.join(test_dir, fname) for fname in os.listdir(test_dir)]
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert("RGB")  # Open image and convert to RGB
        if self.transform:
            image = self.transform(image)  # Apply the transform if available
        return image, img_path  # Return the image and its path

# Create test data
test_data = CustomTestDataset(test_dir=test_path, transform=transform_test)

# Print data details
print("Train Data Shape: ", len(train_data))
print("Test Data Shape: ", len(test_data))


Train Data Shape:  9999
Test Data Shape:  2000


In [7]:
# Create a validation set from the training data
train_size_fraction = 0.9
train_size = int(train_size_fraction*len(train_data))  # 90% for training
val_size = len(train_data) - train_size  # 10% for validation
train_dataset, val_dataset = random_split(train_data, [train_size, val_size])

In [8]:
print("label_mapping",train_data.class_to_idx)

label_mapping {'Amphibia': 0, 'Animalia': 1, 'Arachnida': 2, 'Aves': 3, 'Fungi': 4, 'Insecta': 5, 'Mammalia': 6, 'Mollusca': 7, 'Plantae': 8, 'Reptilia': 9}


In [9]:
batch_size = 64 # Batch Size of the images

# Creating dataloaders
train_loader = DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle = True)
val_loader = DataLoader(dataset = val_dataset, batch_size = batch_size, shuffle = False)
test_loader = DataLoader(dataset = test_data, batch_size = batch_size, shuffle = False)

# Printing the no. of samples in each dataset
print("No. of samples in training dataset:", len(train_loader.dataset))
print("No. of samples in validation dataset:", len(val_loader.dataset))
print("No. of samples in test dataset:", len(test_loader.dataset))

No. of samples in training dataset: 8999
No. of samples in validation dataset: 1000
No. of samples in test dataset: 2000


## Training and Validating Model 

In [10]:
from torchvision import models
import torch.nn as nn
import torch
from tqdm import tqdm  # For a progress bar
from sklearn.metrics import f1_score

# Function to calculate accuracy
def calculate_accuracy(preds, labels):
    correct = (preds == labels).sum().item()
    return correct / len(labels) * 100

# Define the training function
def train_model(model, train_loader, val_loader, num_epochs=10, learning_rate=0.0001,patience=5):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    # Loss function
    criterion = nn.CrossEntropyLoss()

    # Optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    best_val_f1 = 0.0  # Track the best F1 score
    best_model_weights = None  # Store best model weights

    for epoch in range(num_epochs):
        print(f"Epoch {epoch + 1}/{num_epochs}")
        print("-" * 20)
        
        # Training phase
        model.train()
        running_loss = 0.0
        running_corrects = 0
        all_train_preds = []
        all_train_labels = []

        for inputs, labels in tqdm(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Get predictions
            _, preds = torch.max(outputs, 1)

            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            running_corrects += (preds == labels).sum().item()

            all_train_preds.extend(preds.cpu().numpy())
            all_train_labels.extend(labels.cpu().numpy())

        # Calculate training accuracy and F1 score
        train_accuracy = calculate_accuracy(torch.tensor(all_train_preds), torch.tensor(all_train_labels))
        train_f1 = f1_score(all_train_labels, all_train_preds, average='weighted')

        avg_train_loss = running_loss / len(train_loader)
        print(f"Train Loss: {avg_train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%, Train F1: {train_f1:.4f}")

        # Validation phase
        model.eval()
        val_preds = []
        val_labels = []
        running_val_loss = 0.0
        running_val_corrects = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                _, preds = torch.max(outputs, 1)

                running_val_loss += loss.item()
                running_val_corrects += (preds == labels).sum().item()

                val_preds.extend(preds.cpu().numpy())
                val_labels.extend(labels.cpu().numpy())

        # Calculate validation accuracy and F1 score
        val_accuracy = calculate_accuracy(torch.tensor(val_preds), torch.tensor(val_labels))
        val_f1 = f1_score(val_labels, val_preds, average='weighted')

        avg_val_loss = running_val_loss / len(val_loader)
        print(f"Val Loss: {avg_val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%, Val F1: {val_f1:.4f}")

        # Save the best model based on validation F1 score
        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            best_model_weights = model.state_dict()
            epochs_since_improvement = 0  # Reset patience counter
            print(f"Saved best model with validation F1: {val_f1:.4f}")
        else:
            epochs_since_improvement += 1
            
        # Early stopping check
        if epochs_since_improvement >= patience:
            print(f"Stopping training early at epoch {epoch+1} due to no improvement in validation F1.")
            break

    # Load best model weights
    if best_model_weights:
        model.load_state_dict(best_model_weights)

    print(f"Training Complete. Best Validation F1 Score: {best_val_f1:.4f}")
    return model, best_val_f1



## Evaluating Test and Saving Predictions to CSV

In [11]:

def evaluate_and_save_predictions(model, test_loader, output_csv='21F1000641.csv'):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)  # Ensure model is on the correct device
    model.eval()  # Set model to evaluation mode
    predictions = []
    image_ids = []
    
    # Disable gradient computation for inference
    with torch.no_grad():
        for inputs, paths in tqdm(test_loader):  # Assuming test_loader provides file paths
            inputs = inputs.to(device)
            
            # Forward pass
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            
            # Extract Image_IDs (file names without extension)
            for path, pred in zip(paths, preds):
                image_id = os.path.splitext(os.path.basename(path))[0]  # Remove extension from the filename
                predictions.append(pred.item())  # Add the predicted label
                image_ids.append(image_id)  # Add the image ID

    # Create a DataFrame with Image_ID and predicted labels
    submission_df = pd.DataFrame({
        'Image_ID': image_ids,
        'Label': predictions
    })
    
    # Save the DataFrame to a CSV file
    output_csv_path = f"/kaggle/working/{output_csv}"
    submission_df.to_csv(output_csv_path, index=False)
    print(f"Predictions saved to {output_csv_path}")


In [12]:
# Load EfficientNet-V2 pretrained model
model = models.efficientnet_v2_s(pretrained=True)

# Modify the final fully connected layer for custom number of output classes
model.classifier[1] = nn.Linear(model.classifier[1].in_features, 10)  # 10 classes

# Training the model
print("Training EfficientNet-V2:")
trained_model, best_f1 = train_model(model, train_loader, val_loader, num_epochs=30, learning_rate=0.0001)




Training EfficientNet-V2:
Epoch 1/30
--------------------


100%|██████████| 141/141 [04:18<00:00,  1.83s/it]


Train Loss: 0.9687, Train Accuracy: 72.89%, Train F1: 0.7285
Val Loss: 0.4176, Val Accuracy: 87.30%, Val F1: 0.8732
Saved best model with validation F1: 0.8732
Epoch 2/30
--------------------


100%|██████████| 141/141 [03:36<00:00,  1.53s/it]


Train Loss: 0.3567, Train Accuracy: 89.39%, Train F1: 0.8940
Val Loss: 0.3748, Val Accuracy: 88.60%, Val F1: 0.8860
Saved best model with validation F1: 0.8860
Epoch 3/30
--------------------


100%|██████████| 141/141 [03:36<00:00,  1.53s/it]


Train Loss: 0.2393, Train Accuracy: 92.75%, Train F1: 0.9276
Val Loss: 0.3723, Val Accuracy: 89.90%, Val F1: 0.8989
Saved best model with validation F1: 0.8989
Epoch 4/30
--------------------


100%|██████████| 141/141 [03:38<00:00,  1.55s/it]


Train Loss: 0.1719, Train Accuracy: 94.92%, Train F1: 0.9492
Val Loss: 0.3405, Val Accuracy: 89.90%, Val F1: 0.8987
Epoch 5/30
--------------------


100%|██████████| 141/141 [03:36<00:00,  1.53s/it]


Train Loss: 0.1251, Train Accuracy: 96.23%, Train F1: 0.9623
Val Loss: 0.4021, Val Accuracy: 89.00%, Val F1: 0.8898
Epoch 6/30
--------------------


100%|██████████| 141/141 [03:37<00:00,  1.54s/it]


Train Loss: 0.0987, Train Accuracy: 97.09%, Train F1: 0.9709
Val Loss: 0.4285, Val Accuracy: 88.80%, Val F1: 0.8880
Epoch 7/30
--------------------


100%|██████████| 141/141 [03:37<00:00,  1.54s/it]


Train Loss: 0.0911, Train Accuracy: 97.22%, Train F1: 0.9722
Val Loss: 0.4398, Val Accuracy: 89.30%, Val F1: 0.8927
Epoch 8/30
--------------------


100%|██████████| 141/141 [03:37<00:00,  1.54s/it]


Train Loss: 0.0780, Train Accuracy: 97.54%, Train F1: 0.9754
Val Loss: 0.4630, Val Accuracy: 87.70%, Val F1: 0.8768
Stopping training early at epoch 8 due to no improvement in validation F1.
Training Complete. Best Validation F1 Score: 0.8989


In [13]:
evaluate_and_save_predictions(trained_model, test_loader, output_csv='21F1000641.csv')

100%|██████████| 32/32 [00:41<00:00,  1.31s/it]

Predictions saved to /kaggle/working/21F1000641.csv





In [None]:
# model = models.regnet_y_8gf(pretrained=True)

# # Modify the final fully connected layer for custom number of output classes
# model.fc = nn.Linear(model.fc.in_features, 10)  # 10 classes


# # Training the model
# print("Training RegNetY-8GF:")
# trained_model, best_f1 = train_model(model, train_loader, val_loader, num_epochs=30, learning_rate=0.001)


In [None]:
# # Load VGG16 pretrained model
# model = models.vgg16(pretrained=True)

# # Modify the final fully connected layer for custom number of output classes
# model.classifier[6] = nn.Linear(model.classifier[6].in_features, 10)  # 10 classes

# # Training the model
# print("Training VGG16:")
# trained_model, best_f1 = train_model(model, train_loader, val_loader, num_epochs=30, learning_rate=0.001)


In [None]:
# model1 = resnet18(pretrained=True)
# model1.fc = nn.Linear(model1.fc.in_features, 10)  # Modify final FC layer for 10 classes
# print("Training ResNet-18:")
# trained_model1, best_f1_model1 = train_model(model1, train_loader, val_loader, num_epochs=30, learning_rate=0.001)
# print(f"Best F1 Score for ResNet-18: {best_f1_model1}")


In [None]:
# evaluate_and_save_predictions(trained_model1, test_loader, output_csv='21F1000641.csv')

In [14]:
import pandas as pd
output_df = pd.read_csv("/kaggle/working/21F1000641.csv")
print(len(output_df))

2000
