In [1]:
# !pip install ipywidgets
# !pip install tensorflow-gpu
# !pip install torch
# !pip torchvision
# !pip transformers
# !pip install timm
# !pip install numpy
# !pip install matplotlib
# !pip install ipykernel

# !pip install jupyter

In [2]:
# jupyter notebook
# !pip install --upgrade numpy
# !pip uninstall tensorflow -y
# !pip install tensorflow transformers torch torchvision timm pandas scikit-learn


In [3]:
# import torch
# import numpy as np
# import tensorflow as tf
# import transformers
# import timm

# print("Torch version:", torch.__version__)
# print("NumPy version:", np.__version__)
# print("TensorFlow version:", tf.__version__)
# print("Transformers version:", transformers.__version__)
# print("Timm version:", timm.__version__)


# Install required libraries
# !pip install torch torchvision transformers timm pandas scikit-learn

# Import packages
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
from transformers import ViTModel, ViTConfig
import timm
from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
import numpy as np
import matplotlib.pyplot as plt

2025-03-04 18:15:02.377634: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
# import os

# data_dir = '/content/drive/MyDrive/data'

# def check_empty_dirs(root_dir):
#     empty = []
#     for dirpath, dirnames, filenames in os.walk(root_dir):
#         if not filenames and "val" in dirpath:  # Check only train/val/test
#             empty.append(dirpath)
#     return empty

# empty_dirs = check_empty_dirs(data_dir)
# if empty_dirs:
#     print("🚨 Empty directories found:", empty_dirs)
# else:
#     print("✅ No empty directories found.")

# import os
# os.environ["HF_HOME"] = "/workspace/awadh/nvidia/Nitesh/huggingface_cache"
# os.environ["HUGGINGFACE_HUB_CACHE"] = "/workspace/awadh/nvidia/Nitesh/huggingface_cache"


In [5]:


import os
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Custom ImageFolder to skip empty directories
class SafeImageFolder(datasets.ImageFolder):
    @staticmethod
    def make_dataset(
        directory, class_to_idx, extensions=None, is_valid_file=None, allow_empty=False
    ):
        instances = []
        directory = os.path.expanduser(directory)

        for target_class in sorted(class_to_idx.keys()):
            target_dir = os.path.join(directory, target_class)
            if not os.path.isdir(target_dir):
                continue

            # Check if directory has valid files
            has_files = any(
                datasets.folder.has_file_allowed_extension(os.path.join(root, fname), extensions)
                for root, _, fnames in os.walk(target_dir, followlinks=True)
                for fname in fnames
            )

            if not has_files and not allow_empty:
                continue  # Skip empty directory if allow_empty is False

            # Collect valid files
            for root, _, fnames in os.walk(target_dir, followlinks=True):
                for fname in sorted(fnames):
                    path = os.path.join(root, fname)
                    if (is_valid_file or datasets.folder.has_file_allowed_extension(path, extensions)):
                        item = (path, class_to_idx[target_class])
                        instances.append(item)
        return instances

# ImageNet Mean and Std
IMAGENET_DEFAULT_MEAN = [0.485, 0.456, 0.406]
IMAGENET_DEFAULT_STD = [0.229, 0.224, 0.225]

# Define transformations
img_size = 224
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(img_size),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD)  # ✅ Corrected
])

val_transform = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD)  # ✅ Corrected
])

# Load datasets with SafeImageFolder
data_dir = '../dataset/all-data'
train_dataset = SafeImageFolder(root=f'{data_dir}/train', transform=train_transform)
val_dataset = SafeImageFolder(root=f'{data_dir}/val', transform=val_transform)
test_dataset = SafeImageFolder(root=f'{data_dir}/test', transform=val_transform)

# Create data loaders (fixed batch_size typo)
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)  # ✅ Fixed b_size -> batch_size
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)  # ✅ Fixed b_size -> batch_size



In [None]:


# class ResNetForAnimalRecognition(nn.Module):
#     def __init__(self, num_classes=200):
#         super().__init__()
#         # Load pre-trained ResNet50 from timm
#         self.resnet = timm.create_model('resnet50', pretrained=True, num_classes=0)  # num_classes=0 removes final FC layer
#         self.global_pool = nn.AdaptiveAvgPool2d((1, 1))  # Global Average Pooling
#         self.classifier = nn.Linear(2048, num_classes)  # ResNet50 has 2048-dim features

#     def forward(self, x):
#         features = self.resnet.forward_features(x)  # Extract feature maps
#         features = self.global_pool(features)  # Apply Global Average Pooling
#         features = torch.flatten(features, 1)  # Flatten to (batch_size, 2048)
#         return self.classifier(features)

#     def extract_features(self, x):
#         """Extracts feature embeddings from the ResNet backbone (without classification layer)."""
#         features = self.resnet.forward_features(x)
#         features = self.global_pool(features)
#         return torch.flatten(features, 1)

# # Detect available device (CUDA for Colab, MPS for Mac)
# device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")

# # Initialize and move model to the device
# model = ResNetForAnimalRecognition(num_classes=1072).to(device)

# # If using multiple GPUs in Colab, enable DataParallel
# if torch.cuda.device_count() > 1:
#     print(f"Using {torch.cuda.device_count()} GPUs for training!")
#     model = nn.DataParallel(model)

# print(f"Model loaded on {device}")


class EfficientNetForAnimalRecognition(nn.Module):
    def __init__(self, num_classes=1072):
        super().__init__()
        # Load pre-trained EfficientNet-B3 from timm
        self.efficientnet = timm.create_model('efficientnet_b3', pretrained=True, num_classes=0)  # Removes classifier
        
        self.global_pool = nn.AdaptiveAvgPool2d((1, 1))  # Global Average Pooling
        self.classifier = nn.Linear(1536, num_classes)  # EfficientNet-B3 has 1536-dim feature embeddings

    def forward(self, x):
        features = self.efficientnet.forward_features(x)  # Extract feature maps
        features = self.global_pool(features)  # Apply Global Average Pooling
        features = torch.flatten(features, 1)  # Flatten to (batch_size, 1536)
        return self.classifier(features)

    def extract_features(self, x):
        """Extracts feature embeddings from the EfficientNet backbone (without classification layer)."""
        features = self.efficientnet.forward_features(x)
        features = self.global_pool(features)
        return torch.flatten(features, 1)

# Detect available device (CUDA for Colab, MPS for Mac)
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")

# Initialize and move model to the device
model = EfficientNetForAnimalRecognition(num_classes=1072).to(device)

# If using multiple GPUs in Colab, enable DataParallel
if torch.cuda.device_count() > 1:
    print(f"Using {torch.cuda.device_count()} GPUs for training!")
    model = nn.DataParallel(model)

print(f"Model loaded on {device}")


In [7]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-5, weight_decay=0.01)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=2)

In [8]:
import torch

def train_model(model, num_epochs=10):
    best_acc = 0.0

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            labels = labels.long()  # ✅ Ensure labels are of type torch.long

            optimizer.zero_grad()
            outputs = model(inputs)

            # ✅ Debugging: Check label range
            if labels.max() >= outputs.shape[1] or labels.min() < 0:
                print(f"🔥 Label out of range! Max: {labels.max()}, Min: {labels.min()}")
                print(f"Expected range: 0 to {outputs.shape[1]-1}")
                return

            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)

        epoch_loss = running_loss / len(train_dataset)

        # Validation phase
        model.eval()
        correct = 0
        total = 0
        val_loss = 0.0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                labels = labels.long()  # ✅ Ensure labels are torch.long

                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * inputs.size(0)

                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_acc = correct / total
        val_loss = val_loss / len(val_dataset)
        scheduler.step(val_acc)

        print(f'Epoch {epoch+1}/{num_epochs}')
        print(f'Train Loss: {epoch_loss:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}')

        # Save best model
        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), './best_model.pth')

    print(f'Best Validation Accuracy: {best_acc:.4f}')

# ✅ Add CUDA debugging environment variable (optional, for debugging)
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

train_model(model, num_epochs=15)


Epoch 1/15
Train Loss: 6.5255 | Val Loss: 5.6510 | Val Acc: 0.1325
Epoch 2/15
Train Loss: 4.7781 | Val Loss: 3.6753 | Val Acc: 0.4935
Epoch 3/15
Train Loss: 3.2811 | Val Loss: 2.2988 | Val Acc: 0.7357
Epoch 4/15
Train Loss: 2.2580 | Val Loss: 1.4178 | Val Acc: 0.8436
Epoch 5/15
Train Loss: 1.5546 | Val Loss: 0.9211 | Val Acc: 0.8830
Epoch 6/15
Train Loss: 1.1166 | Val Loss: 0.6434 | Val Acc: 0.9027
Epoch 7/15
Train Loss: 0.8460 | Val Loss: 0.4891 | Val Acc: 0.9094
Epoch 8/15
Train Loss: 0.6662 | Val Loss: 0.3735 | Val Acc: 0.9157
Epoch 9/15
Train Loss: 0.5480 | Val Loss: 0.3096 | Val Acc: 0.9191
Epoch 10/15
Train Loss: 0.4665 | Val Loss: 0.2686 | Val Acc: 0.9245
Epoch 11/15
Train Loss: 0.4099 | Val Loss: 0.2427 | Val Acc: 0.9251
Epoch 12/15
Train Loss: 0.3555 | Val Loss: 0.2202 | Val Acc: 0.9212
Epoch 13/15
Train Loss: 0.3269 | Val Loss: 0.2048 | Val Acc: 0.9288
Epoch 14/15
Train Loss: 0.3017 | Val Loss: 0.1858 | Val Acc: 0.9315
Epoch 15/15
Train Loss: 0.2719 | Val Loss: 0.1815 | Val A

In [9]:
def extract_features(model, dataloader):
    model.eval()
    features = []
    labels_list = []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            features_batch = model.extract_features(inputs)
            features.append(features_batch.cpu().numpy())
            labels_list.append(labels.numpy())

    return np.concatenate(features), np.concatenate(labels_list)

# Extract features from training data
train_features, train_labels = extract_features(model, train_loader)

# Save features
np.save('./train_features.npy', train_features)
np.save('./train_labels.npy', train_labels)

In [11]:
def evaluate(model, test_loader):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Test Accuracy: {correct / total:.4f}')

# Load best model
model.load_state_dict(torch.load('./best_model.pth'))
evaluate(model, test_loader)

Test Accuracy: 0.9285
