In [2]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import os

In [3]:
# Define paths
train_dir = 'train'
valid_dir = 'valid'
test_dir = 'test'

In [15]:
# Data transformations
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'valid': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [16]:
# Load data
image_datasets = {
    'train': datasets.ImageFolder(train_dir, data_transforms['train']),
    'valid': datasets.ImageFolder(valid_dir, data_transforms['valid']),
    'test': datasets.ImageFolder(test_dir, data_transforms['test'])
}

In [17]:
dataloaders = {
    'train': DataLoader(image_datasets['train'], batch_size=32, shuffle=True, num_workers=4),
    'valid': DataLoader(image_datasets['valid'], batch_size=32, shuffle=False, num_workers=4),
    'test': DataLoader(image_datasets['test'], batch_size=32, shuffle=False, num_workers=4)
}

In [18]:
# Define the model
class BirdClassifier(nn.Module):
    def __init__(self, num_classes):
        super(BirdClassifier, self).__init__()
        self.model = models.resnet18(pretrained=True)
        num_ftrs = self.model.fc.in_features
        self.model.fc = nn.Linear(num_ftrs, num_classes)

    def forward(self, x):
        return self.model(x)

In [19]:
num_classes = len(image_datasets['train'].classes)
model = BirdClassifier(num_classes)



In [20]:
# Specify loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [21]:
# Train the model
num_epochs = 25
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    print(f'Epoch {epoch+1}/{num_epochs}')
    print('-' * 10)

    for phase in ['train', 'valid']:
        if phase == 'train':
            model.train()
        else:
            model.eval()

        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in dataloaders[phase]:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / len(image_datasets[phase])
        epoch_acc = running_corrects.double() / len(image_datasets[phase])

        print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

Epoch 1/25
----------
train Loss: 5.0247 Acc: 0.0702
valid Loss: 3.1224 Acc: 0.2956
Epoch 2/25
----------
train Loss: 3.1238 Acc: 0.3218
valid Loss: 1.5400 Acc: 0.6152
Epoch 3/25
----------
train Loss: 2.3057 Acc: 0.4776
valid Loss: 0.9393 Acc: 0.7482
Epoch 4/25
----------
train Loss: 1.9083 Acc: 0.5619
valid Loss: 0.7788 Acc: 0.7832
Epoch 5/25
----------
train Loss: 1.6459 Acc: 0.6149
valid Loss: 0.6250 Acc: 0.8324
Epoch 6/25
----------
train Loss: 1.4990 Acc: 0.6461
valid Loss: 0.5597 Acc: 0.8438
Epoch 7/25
----------
train Loss: 1.3590 Acc: 0.6779
valid Loss: 0.4552 Acc: 0.8792
Epoch 8/25
----------
train Loss: 1.2570 Acc: 0.6985
valid Loss: 0.4314 Acc: 0.8895
Epoch 9/25
----------
train Loss: 1.1831 Acc: 0.7163
valid Loss: 0.3395 Acc: 0.9101
Epoch 10/25
----------
train Loss: 1.1168 Acc: 0.7304
valid Loss: 0.3461 Acc: 0.9051
Epoch 11/25
----------
train Loss: 1.0619 Acc: 0.7433
valid Loss: 0.3104 Acc: 0.9097
Epoch 12/25
----------
train Loss: 1.0019 Acc: 0.7564
valid Loss: 0.3140 A

In [22]:
# Evaluate the model on the test set
model.eval()
test_corrects = 0

with torch.no_grad():
    for inputs, labels in dataloaders['test']:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        test_corrects += torch.sum(preds == labels.data)

test_acc = test_corrects.double() / len(image_datasets['test'])
print(f'Test accuracy: {test_acc:.4f}')

Test accuracy: 0.9657


In [23]:
torch.save(model.state_dict(), 'bird_classifier.pth')

In [24]:
import torch
from torchvision import transforms
from PIL import Image

In [25]:
data_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [26]:
class BirdClassifier(nn.Module):
    def __init__(self, num_classes):
        super(BirdClassifier, self).__init__()
        self.model = models.resnet18(pretrained=True)
        num_ftrs = self.model.fc.in_features
        self.model.fc = nn.Linear(num_ftrs, num_classes)

    def forward(self, x):
        return self.model(x)

num_classes = len(image_datasets['train'].classes)  # Assuming you have this from your training script
model = BirdClassifier(num_classes)
model.load_state_dict(torch.load('bird_classifier.pth'))
model.eval()
model.to(device)

BirdClassifier(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, trac

In [27]:
def predict(image_path):
    image = Image.open(image_path)
    image = data_transforms(image).unsqueeze(0).to(device)
    
    with torch.no_grad():
        outputs = model(image)
        _, preds = torch.max(outputs, 1)
    
    return image_datasets['train'].classes[preds[0]]

In [29]:
# Example usage
image_path = 'pigeon.jpg'
prediction = predict(image_path)
print(f'Predicted class: {prediction}')

Predicted class: ROCK DOVE
