<a href="https://colab.research.google.com/github/khadijasaeed683/APPLICATION/blob/main/AIProject.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import zipfile
zip_path = "/content/drive/MyDrive/Animal_Dataset/raw-img.zip"
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall("/content/drive/MyDrive/Animal_Dataset_Unzipped")


In [2]:
import os
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout


In [3]:
import os, shutil
import random
from tqdm import tqdm

original_dataset_dir = "/content/drive/MyDrive/Animal_Dataset/raw-img"
base_dir = "/content/drive/MyDrive/Animal_Dataset_Split"

# Create folders
for split in ['train', 'val', 'test']:
    for category in os.listdir(original_dataset_dir):
        os.makedirs(os.path.join(base_dir, split, category), exist_ok=True)

# Split data
split_ratio = {'train': 0.7, 'val': 0.15, 'test': 0.15}

for category in tqdm(os.listdir(original_dataset_dir)):
    imgs = os.listdir(os.path.join(original_dataset_dir, category))
    random.shuffle(imgs)

    n_total = len(imgs)
    n_train = int(n_total * split_ratio['train'])
    n_val = int(n_total * split_ratio['val'])

    train_imgs = imgs[:n_train]
    val_imgs = imgs[n_train:n_train+n_val]
    test_imgs = imgs[n_train+n_val:]

    for img_name in train_imgs:
        shutil.copy(os.path.join(original_dataset_dir, category, img_name),
                    os.path.join(base_dir, 'train', category, img_name))

    for img_name in val_imgs:
        shutil.copy(os.path.join(original_dataset_dir, category, img_name),
                    os.path.join(base_dir, 'val', category, img_name))

    for img_name in test_imgs:
        shutil.copy(os.path.join(original_dataset_dir, category, img_name),
                    os.path.join(base_dir, 'test', category, img_name))


# **Images Preprocessing**
Scaling and fixing pixel sizes

In [7]:
train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)
# Define paths FIRST
original_dataset_dir = "/content/drive/MyDrive/Animal_Dataset/raw-img"
base_dir = "/content/drive/MyDrive/Animal_Dataset_Split"
train_generator = train_datagen.flow_from_directory(
    base_dir + '/train',
    target_size=(150, 150),
    batch_size=32,
    class_mode='categorical'
)

val_generator = val_datagen.flow_from_directory(
    base_dir + '/val',
    target_size=(150, 150),
    batch_size=32,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    base_dir + '/test',
    target_size=(150, 150),
    batch_size=32,
    class_mode='categorical',
    shuffle=False  # Important for correct evaluation
)


Found 18492 images belonging to 10 classes.
Found 3958 images belonging to 10 classes.
Found 3971 images belonging to 10 classes.


# **Pytorch + Hugging Face Implementation**

In [8]:
!pip install torch torchvision transformers datasets

Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting 

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
from transformers import ViTForImageClassification, ViTFeatureExtractor

In [10]:
from torchvision import transforms

# Define transform for your dataset
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # ViT expects 224x224 images
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])


In [16]:
from torch.utils.data import DataLoader
from torchvision import datasets

# Define the paths
train_path = "/content/drive/MyDrive/Animal_Dataset_Split/train"
val_path = "/content/drive/MyDrive/Animal_Dataset_Split/val"
test_path = "/content/drive/MyDrive/Animal_Dataset_Split/test"

# Load datasets
train_dataset = datasets.ImageFolder(root=train_path, transform=transform)
val_dataset = datasets.ImageFolder(root=val_path, transform=transform)
test_dataset = datasets.ImageFolder(root=test_path, transform=transform)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

print(f"Train samples: {len(train_dataset)}")
print(f"Validation samples: {len(val_dataset)}")
print(f"Test samples: {len(test_dataset)}")


Train samples: 18491
Validation samples: 3958
Test samples: 3971


In [14]:
import torch
from transformers import ViTForImageClassification

# Number of classes
num_classes = len(train_dataset.classes)
print(f"Number of Classes: {num_classes}")

# Load pretrained ViT model
model = ViTForImageClassification.from_pretrained(
    "google/vit-base-patch16-224-in21k",
    num_labels=num_classes,
    ignore_mismatched_sizes=True
)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

print("Model loaded successfully on", device)


Number of Classes: 10


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model loaded successfully on cuda


In [15]:
from PIL import Image, UnidentifiedImageError
import os

def clean_corrupted_images(folder_path):
    for root, _, files in os.walk(folder_path):
        for file in files:
            if file.lower().endswith(('jpg', 'jpeg', 'png')):
                file_path = os.path.join(root, file)
                try:
                    img = Image.open(file_path)
                    img.verify()  # Verify without fully loading
                except (UnidentifiedImageError, IOError, SyntaxError) as e:
                    print(f"Removing corrupted file: {file_path}")
                    os.remove(file_path)

# Run once:
clean_corrupted_images('/content/drive/MyDrive/Animal_Dataset_Split/train')
clean_corrupted_images('/content/drive/MyDrive/Animal_Dataset_Split/val')
clean_corrupted_images('/content/drive/MyDrive/Animal_Dataset_Split/test')


Removing corrupted file: /content/drive/MyDrive/Animal_Dataset_Split/train/cow/OIP-exouo3Sb_jBaZy3-nDnRtAAAAA.jpeg


In [17]:
import torch
import torch.nn as nn
import torch.optim as optim

# Make sure model is on the correct device
model = model.to(device)

# Define optimizer and loss function
optimizer = optim.AdamW(model.parameters(), lr=2e-5)  # AdamW is better for Transformers
criterion = nn.CrossEntropyLoss()

# Number of epochs
num_epochs = 5

# Training loop
for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    print("-" * 20)

    # --- Training ---
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images).logits  # ViTForImageClassification returns a dict-like output
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(train_loader)
    epoch_acc = 100 * correct / total

    print(f"Train Loss: {epoch_loss:.4f} | Train Accuracy: {epoch_acc:.2f}%")

    # --- Validation ---
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images).logits
            loss = criterion(outputs, labels)

            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()

    val_epoch_loss = val_loss / len(val_loader)
    val_epoch_acc = 100 * val_correct / val_total

    print(f"Val Loss: {val_epoch_loss:.4f} | Val Accuracy: {val_epoch_acc:.2f}%\n")


Epoch 1/5
--------------------
Train Loss: 0.6524 | Train Accuracy: 92.83%
Val Loss: 0.2121 | Val Accuracy: 98.11%

Epoch 2/5
--------------------
Train Loss: 0.1253 | Train Accuracy: 99.17%
Val Loss: 0.1172 | Val Accuracy: 98.51%

Epoch 3/5
--------------------


KeyboardInterrupt: 

In [18]:
torch.save(model.state_dict(), 'animal_classifier.pth')

## Initializing Model


In [22]:
# Initialize the model
model = ViTForImageClassification.from_pretrained(
    "google/vit-base-patch16-224-in21k",  # Pretrained ViT model
    num_labels=len(train_dataset.classes),  # Set number of classes based on your dataset
    ignore_mismatched_sizes=True
)

# Load the saved weights into the model
model.load_state_dict(torch.load('animal_classifier.pth'))

# Move the model to the device (GPU or CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Set the model to evaluation mode
model.eval()

print("Model loaded successfully!")


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model loaded successfully!


In [23]:
# Set the model to evaluation mode
model.eval()

# Variables to track loss and accuracy
test_loss = 0.0
test_correct = 0
test_total = 0

# Disable gradient calculations since we are in evaluation mode
with torch.no_grad():
    # Iterate over the test dataset
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(images).logits

        # Calculate loss
        loss = criterion(outputs, labels)
        test_loss += loss.item()

        # Get the predicted class
        _, predicted = torch.max(outputs, 1)

        # Update accuracy
        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()

# Calculate average test loss and accuracy
test_loss /= len(test_loader)
test_accuracy = 100 * test_correct / test_total

# Print the results
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.2f}%")


Test Loss: 0.0880
Test Accuracy: 98.54%


# Image Prediction

In [27]:
from PIL import Image
import torch
from torchvision import transforms

# Define the transformation used during training (resize, normalize, and convert to tensor)
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # ViT expects 224x224 images
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Function to predict the class of an image
def predict_image(image_path, model, class_names):
    # Open the image
    image = Image.open(image_path)

    # Apply the same transformations that were applied during training
    image = transform(image).unsqueeze(0)  # Add a batch dimension

    # Move the image to the same device as the model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    image = image.to(device)

    # Put the model in evaluation mode and make the prediction
    model.eval()
    with torch.no_grad():
        outputs = model(image).logits  # ViTForImageClassification returns a dict-like output
        _, predicted_class = torch.max(outputs, 1)  # Get the class with the highest score

    # Convert the predicted class to the actual label
    predicted_class = predicted_class.item()  # Get the integer class index
    predicted_label = class_names[predicted_class]  # Get the corresponding class name

    return predicted_label

# Example usage:
image_path = "/content/drive/MyDrive/Animal_Dataset_Split/test/squirrel/OIP-56uLj5ndFXTFV5ahbt5TNgHaKD.jpeg"  # Replace with the path to your image
class_names = train_dataset.classes  # List of class labels (from training dataset)

# Make a prediction
predicted_label = predict_image(image_path, model, class_names)
print(f"Predicted class: {predicted_label}")


Predicted class: squirrel


# Loss Curve (Training vs Validation Loss)

In [24]:
# # Assuming you have stored training and validation loss during training in lists
# # You should have `train_losses` and `val_losses` for each epoch

# import matplotlib.pyplot as plt

# # Plot the loss curve
# epochs = range(1, num_epochs + 1)
# plt.figure(figsize=(10, 5))
# plt.plot(epochs, train_losses, label='Train Loss', color='blue')
# plt.plot(epochs, val_losses, label='Validation Loss', color='red')
# plt.xlabel('Epochs')
# plt.ylabel('Loss')
# plt.title('Train vs Validation Loss')
# plt.legend()
# plt.show()


NameError: name 'train_losses' is not defined

<Figure size 1000x500 with 0 Axes>