## import

In [1]:
from torch.utils.data import DataLoader, Dataset
import torch
import torch.nn as nn
from torchvision import transforms
import pandas as pd
import os
from PIL import Image
from tqdm import tqdm
import torch.optim as optim
import torch.nn.functional as F

In [2]:
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)

## data loader

In [3]:
class ButterflyMothDataset(Dataset):
    def __init__(
        self,
        data_folder="data",
        csv_file="butterflies and moths.csv",
        dataset="train",
    ):
        self.data_folder = data_folder
        self.df = pd.read_csv(os.path.join(self.data_folder, csv_file))
        self.df = self.df[self.df["class id"] < 95]
        min_class_id = self.df["class id"].min()
        max_class_id = self.df["class id"].max()
        self.num_classes = max_class_id - min_class_id + 1
        self.df = self.df[self.df["data set"] == dataset]
        self.transform = transforms.Compose(
            [
                transforms.Resize((224, 224)),
                transforms.ToTensor(),
                transforms.Normalize(
                    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
                ),
            ]
        )

    def __getitem__(self, index):
        item = self.df.iloc[index]
        image = Image.open(os.path.join(self.data_folder, item["filepaths"])).convert(
            "RGB"
        )
        image = self.transform(image)
        class_id = item["class id"]
        label = item["labels"]
        return image, class_id

    def __len__(self):
        return len(self.df)


class ButterflyMothLoader(DataLoader):
    def __init__(self, dataset="train", **kwargs):
        self.dataset = ButterflyMothDataset(dataset=dataset)
        super().__init__(dataset=self.dataset, **kwargs)

## model

In [4]:
class VGG19(nn.Module):
    def __init__(self, num_classes=1000):
        super(VGG19, self).__init__()
        self.features = nn.Sequential(
            # 第一塊
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # 第二塊
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # 第三塊
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # 第四塊
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # 第五塊
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x


## train

In [5]:
class Trainer:
    def __init__(
        self,
        model: nn.Module,
        criterion,
        optimizer,
        train_loader,
        valid_loader,
        test_loader,
        device,
    ):
        self.model = model
        self.criterion = criterion
        self.optimizer = optimizer
        self.train_loader = train_loader
        self.valid_loader = valid_loader
        self.test_loader = test_loader
        self.device = device
        self.epoch = -1
        self.num_epochs = -1

    def validate_one_epoch(self, mode="valid"):
        self.model.eval()
        total_loss = 0
        total_correct = 0
        if mode == "valid":
            progress_bar = tqdm(self.valid_loader, desc="Validating", leave=False)
        else:
            progress_bar = tqdm(self.test_loader, desc="Testing", leave=False)
        with torch.no_grad():
            for idx, (inputs, labels) in enumerate(progress_bar):
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                outputs = self.model(inputs)
                loss = self.criterion(outputs, labels)
                total_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total_correct += (predicted == labels).sum().item()
                progress_bar.set_postfix(loss=loss.item(), accuracy=total_correct / (idx + 1) * self.valid_loader.batch_size)
        avg_loss = total_loss / len(self.valid_loader)
        accuracy = total_correct / len(self.valid_loader.dataset)
        return avg_loss, accuracy

    def train_one_epoch(self):
        self.model.train()
        total_loss = 0.0
        total_correct = 0
        progress_bar = tqdm(
            self.train_loader,
            desc=f"Epoch {self.epoch + 1}/{self.num_epochs}",
            leave=False,
        )
        for idx, (inputs, labels) in enumerate(progress_bar):
            inputs, labels = inputs.to(self.device), labels.to(self.device)
            self.optimizer.zero_grad()
            outputs = self.model(inputs)
            loss = self.criterion(outputs, labels)
            loss.backward()
            self.optimizer.step()
            total_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total_correct += (predicted == labels).sum().item()
            progress_bar.set_postfix(
                loss=loss.item(),
                accuracy=total_correct / ((idx + 1) * self.train_loader.batch_size),
            )
        avg_loss = total_loss / len(self.train_loader)
        accuracy = total_correct / len(self.train_loader.dataset)
        return avg_loss, accuracy

    def train_model(self, num_epochs=10):
        print(f"Training for {num_epochs} epochs...")
        self.num_epochs = num_epochs
        for epoch in range(self.num_epochs):
            self.epoch = epoch
            train_loss, train_accuracy = self.train_one_epoch()
            print(
                f"Epoch {epoch+1}, Average Loss: {train_loss:.4f},"
                f"Accuracy: {train_accuracy:.4f}"
            )
            valid_loss, valid_accuracy = self.validate_one_epoch(mode="valid")
            print(
                f"Validation, Average Loss: {valid_loss:.4f},"
                f"Accuracy: {valid_accuracy:.4f}"
            )
        test_loss, test_accuracy = self.validate_one_epoch(mode="test")
        print(f"Test, Average Loss: {test_loss:.4f}, Accuracy: {test_accuracy:.4f}")

In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


train_loader = ButterflyMothLoader("train", batch_size=32, shuffle=True)
test_loader = ButterflyMothLoader("test", batch_size=32, shuffle=False)
valid_loader = ButterflyMothLoader("valid", batch_size=32, shuffle=False)

num_classes = train_loader.dataset.num_classes
print(f"Number of classes: {num_classes}")

model = VGG19(num_classes=num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

Number of classes: 95


## Note

| lr | softmax | batch_norm | epoch | classes | acc |
|----|---------|------------|-------|---------|-----|
| 1e-4 | 0 | 2 | 57 | 100 | 0.01 |
| 1e-4 | 0 | 2 | 57 | 5 | 0.9~1 |
| 1e-4 | 0 | 2 | 57 | 30 | 0.7, 0.9 |
| 1e-4 | 0 | 2 | 12 | 50 | 0.69, 0.82 |
| 1e-4 | 0 | 2 | 3 | 90 | 0.011, 0.015 |
| 1e-4 | 0 | 2 | 65 | 70 | 0.6714, 0.9884 |




In [7]:
trainer = Trainer(
    model, criterion, optimizer, train_loader, valid_loader, test_loader, device
)
trainer.train_model(num_epochs=300)

Training for 300 epochs...


                                                                                          

Epoch 1, Average Loss: 4.5535,Accuracy: 0.0136


                                                                                     

Validation, Average Loss: 4.5552,Accuracy: 0.0105


                                                                                          

Epoch 2, Average Loss: 4.5516,Accuracy: 0.0150


                                                                                     

Validation, Average Loss: 4.5558,Accuracy: 0.0105


                                                                                          

Epoch 3, Average Loss: 4.5500,Accuracy: 0.0153


                                                                                     

Validation, Average Loss: 4.5585,Accuracy: 0.0105


                                                                                          

Epoch 4, Average Loss: 4.5497,Accuracy: 0.0156


                                                                                     

Validation, Average Loss: 4.5576,Accuracy: 0.0105


                                                                                          

Epoch 5, Average Loss: 4.5493,Accuracy: 0.0154


                                                                                     

Validation, Average Loss: 4.5585,Accuracy: 0.0105


                                                                                          

Epoch 6, Average Loss: 4.5488,Accuracy: 0.0156


                                                                                     

Validation, Average Loss: 4.5577,Accuracy: 0.0105


                                                                                          

Epoch 7, Average Loss: 4.5492,Accuracy: 0.0154


                                                                                     

Validation, Average Loss: 4.5586,Accuracy: 0.0105


                                                                                          

Epoch 8, Average Loss: 4.5489,Accuracy: 0.0156


                                                                                     

Validation, Average Loss: 4.5582,Accuracy: 0.0105


                                                                                          

Epoch 9, Average Loss: 4.5488,Accuracy: 0.0151


                                                                                     

Validation, Average Loss: 4.5604,Accuracy: 0.0105


                                                                                           

Epoch 10, Average Loss: 4.5487,Accuracy: 0.0156


                                                                                     

Validation, Average Loss: 4.5591,Accuracy: 0.0105


                                                                                           

Epoch 11, Average Loss: 4.5486,Accuracy: 0.0156


                                                                                     

Validation, Average Loss: 4.5587,Accuracy: 0.0105


                                                                                           

Epoch 12, Average Loss: 4.5487,Accuracy: 0.0151


                                                                                     

Validation, Average Loss: 4.5597,Accuracy: 0.0105


                                                                                           

Epoch 13, Average Loss: 4.5484,Accuracy: 0.0156


                                                                                     

Validation, Average Loss: 4.5610,Accuracy: 0.0105


                                                                                           

Epoch 14, Average Loss: 4.5487,Accuracy: 0.0156


                                                                                     

Validation, Average Loss: 4.5591,Accuracy: 0.0105


                                                                                           

Epoch 15, Average Loss: 4.5485,Accuracy: 0.0156


                                                                                     

Validation, Average Loss: 4.5596,Accuracy: 0.0105


                                                                                           

Epoch 16, Average Loss: 4.5484,Accuracy: 0.0156


                                                                                     

Validation, Average Loss: 4.5601,Accuracy: 0.0105


                                                                                           

Epoch 17, Average Loss: 4.5483,Accuracy: 0.0156


                                                                                     

Validation, Average Loss: 4.5603,Accuracy: 0.0105


                                                                                           

Epoch 18, Average Loss: 4.5484,Accuracy: 0.0156


                                                                                     

Validation, Average Loss: 4.5599,Accuracy: 0.0105


                                                                                           

Epoch 19, Average Loss: 4.5484,Accuracy: 0.0156


                                                                                     

Validation, Average Loss: 4.5596,Accuracy: 0.0105


                                                                                           

Epoch 20, Average Loss: 4.5485,Accuracy: 0.0156


                                                                                     

Validation, Average Loss: 4.5595,Accuracy: 0.0105


                                                                                           

Epoch 21, Average Loss: 4.5482,Accuracy: 0.0156


                                                                                     

Validation, Average Loss: 4.5603,Accuracy: 0.0105


                                                                                           

Epoch 22, Average Loss: 4.5483,Accuracy: 0.0156


                                                                                     

Validation, Average Loss: 4.5602,Accuracy: 0.0105


                                                                                           

Epoch 23, Average Loss: 4.5481,Accuracy: 0.0156


                                                                                     

Validation, Average Loss: 4.5604,Accuracy: 0.0105


                                                                                           

Epoch 24, Average Loss: 4.5484,Accuracy: 0.0156


                                                                                     

Validation, Average Loss: 4.5600,Accuracy: 0.0105


                                                                                           

Epoch 25, Average Loss: 4.5481,Accuracy: 0.0156


                                                                                     

Validation, Average Loss: 4.5599,Accuracy: 0.0105


                                                                                           

Epoch 26, Average Loss: 4.5483,Accuracy: 0.0156


                                                                                     

Validation, Average Loss: 4.5596,Accuracy: 0.0105


                                                                                           

Epoch 27, Average Loss: 4.5482,Accuracy: 0.0156


                                                                                     

Validation, Average Loss: 4.5605,Accuracy: 0.0105


                                                                                           

Epoch 28, Average Loss: 4.5483,Accuracy: 0.0156


                                                                                     

Validation, Average Loss: 4.5598,Accuracy: 0.0105


                                                                                           

Epoch 29, Average Loss: 4.5482,Accuracy: 0.0156


                                                                                     

Validation, Average Loss: 4.5604,Accuracy: 0.0105


                                                                                           

Epoch 30, Average Loss: 4.5483,Accuracy: 0.0156


                                                                                     

Validation, Average Loss: 4.5602,Accuracy: 0.0105


                                                                                           

Epoch 31, Average Loss: 4.5481,Accuracy: 0.0154


                                                                                     

Validation, Average Loss: 4.5602,Accuracy: 0.0105


Epoch 32/300:  54%|█████▍    | 203/376 [00:53<00:45,  3.81it/s, accuracy=0.0162, loss=4.54]