In [1]:
!pip install torch pillow torchvision numpy matplotlib



In [6]:
from torch.utils.data import Dataset, DataLoader, random_split
import os
from PIL import Image
import xml.etree.ElementTree as ET
from torchvision import transforms
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch

In [7]:
class CustomDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.data = []
        self.family_to_idx = {}
        self.idx_to_family = {}
        self.family_count = 0

        for subdir in os.listdir(root_dir):
            subdir_path = os.path.join(root_dir, subdir)
            if os.path.isdir(subdir_path):
                for file in os.listdir(subdir_path):
                    if file.endswith('.jpg'):
                        img_path = os.path.join(subdir_path, file)
                        xml_path = os.path.join(subdir_path, file.replace(".jpg", ".xml"))
                        if os.path.exists(xml_path):
                            if self.parse(xml_path):
                                self.data.append((img_path, xml_path))

    def parse(self, xml_path):
        try:
            tree = ET.parse(xml_path)
            root = tree.getroot()
            family = root.find('Family').text
            if family not in self.family_to_idx:
                self.family_to_idx[family] = self.family_count
                self.idx_to_family[self.family_count] = family
                self.family_count += 1
            return True
        except ET.ParseError:
            print(f'Warning: Could not parse XML file {xml_path}')
            return False


    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path, xml_path = self.data[idx]
        image = Image.open(img_path).convert("RGB")

        try:
            tree = ET.parse(xml_path)
            root = tree.getroot()
            family = root.find('Family').text
        except ET.ParseError:
            print(f"Warning: Could not parse XML file {xml_path}")

        if self.transform:
            image = self.transform(image)
        
        return image, self.family_to_idx[family] 


In [4]:
root_dir = "data"
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

dataset = CustomDataset(root_dir = root_dir, transform=transform)



In [5]:
dataset_size = len(dataset)
train_size = int(0.7*dataset_size)
val_size = int(0.1*dataset_size)
test_size = dataset_size - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

In [6]:
train_dataloader = DataLoader(dataset=train_dataset, batch_size=1024, shuffle=True)
val_dataloader = DataLoader(dataset=val_dataset, batch_size=1024, shuffle=True)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=1024, shuffle=True)

In [7]:
class FamilyModel(nn.Module):
    def __init__(self, num_classes):
        super(FamilyModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(200704, 512)
        self.fc2 = nn.Linear(512, num_classes)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.3)
    
    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        #x = self.pool(self.relu(self.conv2(x)))
        #x = self.pool(self.relu(self.conv3(x)))
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

In [8]:
learning_rates = [0.0001, 0.001, 0.01]
num_epochs = 2

In [9]:
models = {lr: FamilyModel(dataset.family_count) for lr in learning_rates}

In [10]:
criterion = nn.CrossEntropyLoss()
optimizers = {lr: optim.Adam(models[lr].parameters(), lr=lr) for lr in learning_rates}

In [11]:
train_loss_values = {lr: [] for lr in learning_rates}
val_loss_values = {lr: [] for lr in learning_rates}

In [12]:

for epoch in range(num_epochs):
    for lr in learning_rates:
        models[lr].train()

    running_loss = {lr: 0.0 for lr in learning_rates}
    num_batches = len(train_dataloader)
    print_interval = max(1, num_batches // 10)

    for batch_idx, (images, labels) in enumerate(train_dataloader):

        for lr in learning_rates:
            optimizers[lr].zero_grad()
            outputs = models[lr](images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizers[lr].step()
            running_loss[lr] += loss.item()
            
            train_loss_values[lr].append(loss.item())

            if (batch_idx + 1) % print_interval == 0 or (batch_idx + 1) == num_batches:
                progress = (batch_idx + 1) / num_batches * 100
                print(f"LR {lr} Training progress {progress:.2f}% Loss: {loss.item():.4f}")

    for lr in learning_rates:
        print(f'LR {lr} Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss[lr]/len(train_dataloader):.4f}')
        
        models[lr].eval()
        val_loss = 0.0
        with torch.no_grad():
            for images, labels in val_dataloader:

                outputs = models[lr](images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
            
        val_loss_values[lr].append(val_loss / len(val_dataloader))
        print(f'LR {lr} Epoch [{epoch+1}/{num_epochs}], Validation Loss: {val_loss/len(val_dataloader):.4f}')
    

print("Finished Training")


LR 0.0001 Training progress 9.83% Loss: 5.0053
LR 0.001 Training progress 9.83% Loss: 5.6325
LR 0.01 Training progress 9.83% Loss: 5.5885
LR 0.0001 Training progress 19.65% Loss: 4.8375
LR 0.001 Training progress 19.65% Loss: 5.1333
LR 0.01 Training progress 19.65% Loss: 4.8633
LR 0.0001 Training progress 29.48% Loss: 4.7526
LR 0.001 Training progress 29.48% Loss: 4.8037
LR 0.01 Training progress 29.48% Loss: 4.6315
LR 0.0001 Training progress 39.31% Loss: 4.7532
LR 0.001 Training progress 39.31% Loss: 4.8013
LR 0.01 Training progress 39.31% Loss: 4.6572
LR 0.0001 Training progress 49.13% Loss: 4.6594
LR 0.001 Training progress 49.13% Loss: 4.6747
LR 0.01 Training progress 49.13% Loss: 4.5684
LR 0.0001 Training progress 58.96% Loss: 4.7162
LR 0.001 Training progress 58.96% Loss: 4.7068
LR 0.01 Training progress 58.96% Loss: 4.6276
LR 0.0001 Training progress 68.79% Loss: 4.5987
LR 0.001 Training progress 68.79% Loss: 4.6464
LR 0.01 Training progress 68.79% Loss: 4.5820


In [4]:
import matplotlib.pyplot as plt

In [5]:
plt.figure(figsize=(12, 6))
for lr in learning_rates:
    plt.plot(train_loss_values[lr], label=f'Train Loss (LR={lr})')
    plt.plot([len(train_loss_values[lr]) // num_epochs * i for i in range(num_epochs)], val_loss_values[lr], 'o-', label=f'Val Loss (LR={lr})')

plt.xlabel('Batch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss for Different Learning Rates')
plt.legend()
plt.show()

NameError: name 'learning_rates' is not defined

<Figure size 1200x600 with 0 Axes>

In [None]:
torch.save(model_family.state_dict(), 'model_family.pth')