In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
!pip install roboflow
!pip install torchvision
!pip install matplotlib


Collecting roboflow
  Downloading roboflow-1.1.66-py3-none-any.whl.metadata (9.7 kB)
Collecting idna==3.7 (from roboflow)
  Downloading idna-3.7-py3-none-any.whl.metadata (9.9 kB)
Collecting opencv-python-headless==4.10.0.84 (from roboflow)
  Downloading opencv_python_headless-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Collecting pillow-heif>=0.18.0 (from roboflow)
  Downloading pillow_heif-0.22.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)
Collecting python-dotenv (from roboflow)
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Collecting filetype (from roboflow)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Downloading roboflow-1.1.66-py3-none-any.whl (86 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.7/86.7 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading idna-3.7-py3-none-any.whl (66 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
from roboflow import Roboflow

# Initialize Roboflow
rf = Roboflow(api_key="ntp7VqO5aAW6Ze0uGW0U")
project = rf.workspace("birds-dglcq").project("bird-classification-hp8xp")
version = project.version(1)

# Download dataset in classification format (ImageNet-style structure)
dataset = version.download("yolov5")  # 'folder' gives you ImageNet-like structure

loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in bird-classification-1 to yolov5pytorch:: 100%|██████████| 31070/31070 [00:00<00:00, 47516.68it/s]





Extracting Dataset Version Zip to bird-classification-1 in yolov5pytorch:: 100%|██████████| 1750/1750 [00:00<00:00, 8119.30it/s]


In [5]:
import os
import cv2
from pathlib import Path
from tqdm import tqdm

# Root dataset location
dataset_path = dataset.location  # e.g., "bird-classification-1"

# Define input and output splits
splits = ['train', 'valid', 'test']
output_root = "classification_dataset"

for split in splits:
    image_dir = os.path.join(dataset_path, split, "images")
    label_dir = os.path.join(dataset_path, split, "labels")

    # Output directory for classification dataset
    output_dir = os.path.join(output_root, split)
    os.makedirs(output_dir, exist_ok=True)

    print(f"\nProcessing {split} split...")

    for label_file in tqdm(os.listdir(label_dir)):
        if not label_file.endswith(".txt"):
            continue

        image_name = label_file.replace(".txt", ".jpg")
        image_path = os.path.join(image_dir, image_name)
        label_path = os.path.join(label_dir, label_file)

        if not os.path.exists(image_path):
            continue

        image = cv2.imread(image_path)
        if image is None:
            continue
        h, w, _ = image.shape

        with open(label_path, "r") as f:
            for i, line in enumerate(f):
                parts = line.strip().split()
                if len(parts) != 5:
                    continue
                cls, x_center, y_center, bw, bh = map(float, parts)

                # Convert YOLO to pixel coords
                x1 = int((x_center - bw / 2) * w)
                y1 = int((y_center - bh / 2) * h)
                x2 = int((x_center + bw / 2) * w)
                y2 = int((y_center + bh / 2) * h)

                # Clip coordinates
                x1 = max(0, x1)
                y1 = max(0, y1)
                x2 = min(w, x2)
                y2 = min(h, y2)

                cropped = image[y1:y2, x1:x2]

                # Skip empty crops
                if cropped.size == 0:
                    continue

                class_dir = os.path.join(output_dir, str(int(cls)))
                os.makedirs(class_dir, exist_ok=True)

                out_path = os.path.join(class_dir, f"{label_file.replace('.txt','')}_{i}.jpg")
                cv2.imwrite(out_path, cropped)



Processing train split...


100%|██████████| 656/656 [00:01<00:00, 354.98it/s]



Processing valid split...


100%|██████████| 110/110 [00:00<00:00, 364.56it/s]



Processing test split...


100%|██████████| 103/103 [00:00<00:00, 346.43it/s]


In [6]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

train_dataset = datasets.ImageFolder("classification_dataset/train", transform=transform)
val_dataset = datasets.ImageFolder("classification_dataset/valid", transform=transform)
test_dataset = datasets.ImageFolder("classification_dataset/test", transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

print("Classes:", train_dataset.classes)


Classes: ['1', '2', '3', '4']


In [30]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from tqdm import tqdm

# Load pretrained ResNet18
model = models.resnet18(pretrained=True)
num_classes = len(train_dataset.classes)

# Replace the final layer
model.fc = nn.Linear(model.fc.in_features, num_classes)

# Move to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 20
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    
    print(f"Epoch {epoch+1} Loss: {running_loss / len(train_loader):.4f}")


The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.
Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet18_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet18_Weights.DEFAULT` to get the most up-to-date weights.
Epoch 1/20: 100%|██████████| 21/21 [00:04<00:00,  4.74it/s]


Epoch 1 Loss: 0.3771


Epoch 2/20: 100%|██████████| 21/21 [00:04<00:00,  4.95it/s]


Epoch 2 Loss: 0.1594


Epoch 3/20: 100%|██████████| 21/21 [00:04<00:00,  4.95it/s]


Epoch 3 Loss: 0.0451


Epoch 4/20: 100%|██████████| 21/21 [00:04<00:00,  5.03it/s]


Epoch 4 Loss: 0.0191


Epoch 5/20: 100%|██████████| 21/21 [00:04<00:00,  4.95it/s]


Epoch 5 Loss: 0.1567


Epoch 6/20: 100%|██████████| 21/21 [00:04<00:00,  4.99it/s]


Epoch 6 Loss: 0.1308


Epoch 7/20: 100%|██████████| 21/21 [00:04<00:00,  4.95it/s]


Epoch 7 Loss: 0.1278


Epoch 8/20: 100%|██████████| 21/21 [00:04<00:00,  4.95it/s]


Epoch 8 Loss: 0.0668


Epoch 9/20: 100%|██████████| 21/21 [00:04<00:00,  4.95it/s]


Epoch 9 Loss: 0.0240


Epoch 10/20: 100%|██████████| 21/21 [00:04<00:00,  4.90it/s]


Epoch 10 Loss: 0.0185


Epoch 11/20: 100%|██████████| 21/21 [00:04<00:00,  4.88it/s]


Epoch 11 Loss: 0.0168


Epoch 12/20: 100%|██████████| 21/21 [00:04<00:00,  4.86it/s]


Epoch 12 Loss: 0.0114


Epoch 13/20: 100%|██████████| 21/21 [00:04<00:00,  4.93it/s]


Epoch 13 Loss: 0.0298


Epoch 14/20: 100%|██████████| 21/21 [00:04<00:00,  4.89it/s]


Epoch 14 Loss: 0.0446


Epoch 15/20: 100%|██████████| 21/21 [00:04<00:00,  4.88it/s]


Epoch 15 Loss: 0.0559


Epoch 16/20: 100%|██████████| 21/21 [00:04<00:00,  4.89it/s]


Epoch 16 Loss: 0.0392


Epoch 17/20: 100%|██████████| 21/21 [00:04<00:00,  4.85it/s]


Epoch 17 Loss: 0.0266


Epoch 18/20: 100%|██████████| 21/21 [00:04<00:00,  4.85it/s]


Epoch 18 Loss: 0.0232


Epoch 19/20: 100%|██████████| 21/21 [00:04<00:00,  4.81it/s]


Epoch 19 Loss: 0.0053


Epoch 20/20: 100%|██████████| 21/21 [00:04<00:00,  4.86it/s]

Epoch 20 Loss: 0.0031





In [32]:
from sklearn.metrics import classification_report

labels = list(range(len(train_dataset.classes)))  # e.g., [0, 1, 2, 3]

print(classification_report(
    all_labels, all_preds,
    labels=labels,
    target_names=train_dataset.classes
))


              precision    recall  f1-score   support

           1       0.10      0.01      0.02        98
           2       0.00      0.00      0.00         9
           3       0.00      0.00      0.00         0
           4       0.00      0.00      0.00         0

   micro avg       0.01      0.01      0.01       107
   macro avg       0.03      0.00      0.00       107
weighted avg       0.09      0.01      0.02       107



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.


In [18]:
from sklearn.metrics import classification_report
from tqdm import tqdm

model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for images, labels in tqdm(test_loader, desc="Evaluating"):
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)

        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

print(classification_report(all_labels, all_preds, target_names=train_dataset.classes))



Evaluating: 100%|██████████| 4/4 [00:00<00:00,  7.39it/s]

              precision    recall  f1-score   support

           1       0.25      0.03      0.05        98
           2       0.00      0.00      0.00         9
           3       0.00      0.00      0.00         0
           4       0.00      0.00      0.00         0

    accuracy                           0.03       107
   macro avg       0.06      0.01      0.01       107
weighted avg       0.23      0.03      0.05       107




Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.


In [38]:
from PIL import Image
from torchvision import transforms

# Define same transform as during training
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])
image_path="/kaggle/working/bird-classification-1/test/images/100_jpg.rf.60312e2103e13b5b4c8fe29eca7d2db6.jpg"
def predict_image(image_path, model, class_names):
    image = Image.open(image_path).convert('RGB')
    img_tensor = transform(image).unsqueeze(0).to(device)

    model.eval()
    with torch.no_grad():
        outputs = model(img_tensor)
        _, predicted = torch.max(outputs, 1)
    
    predicted_class = class_names[predicted.item()]
    return predicted_class
