In [1]:
import torch
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import cv2
from torchvision import transforms
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm
import torchvision.models as models
import torch.nn as nn
import os
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv("preprocessed_data/description.csv", index_col = [0])
bad_idxs = []
for idxs, file in enumerate(df['image_name']):
    path = f"{'preprocessed_data/images'}/{file}"
    if( not os.path.isfile(path)):
        bad_idxs.append(idxs)
df = df.drop(bad_idxs)
df = df.sample(frac=1).reset_index(drop=True)


In [3]:
class SwanDataset(Dataset):
    def __init__(self, img_folder, description_df) -> None:
        super().__init__()
        self.size = (256, 256)
        self.img_folder = img_folder
        self.description_df = description_df
        self.labels = self.description_df.swan_id.to_list()
        self.imgs = self.description_df.image_name.to_list()
        self.transforms = transforms.Compose([
            transforms.ToTensor(),
            transforms.Resize(self.size),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225]),
        ]
        )
    
    def __len__(self):
        return len(self.description_df)
        
    def __getitem__(self, idx):
        img = cv2.imread(f"{self.img_folder}/{self.imgs[idx]}")
        if(len(img.shape) > 3):
            img = img[:,:,:3]
        label = self.labels[idx]
        x = self.transforms(img)
        return (x, label)

In [4]:
train_df, val_df = train_test_split(df, test_size=0.2)
train_ds = SwanDataset("preprocessed_data/images", train_df)
val_ds = SwanDataset("preprocessed_data/images", val_df)

batch_size = 32
train_dl = DataLoader(dataset=train_ds, batch_size=batch_size, shuffle=True)
val_dl = DataLoader(dataset=val_ds, batch_size=batch_size, shuffle=True)

In [5]:
train_df['swan_id'].value_counts()

1    4834
0    3465
2    2963
Name: swan_id, dtype: int64

In [6]:
val_df['swan_id'].value_counts()

1    1189
0     892
2     735
Name: swan_id, dtype: int64

In [7]:
vgg16 = models.vgg16(pretrained=True)
vgg16.classifier[6] = nn.Linear(4096, 3)



In [8]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, scheduler = None):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    for epoch in range(num_epochs):
        model.train(True)  # Set the model to training mode

        running_loss = 0.0
        correct_predictions = 0
        total_samples = 0

        for images, labels in tqdm(train_loader):
            images = images.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

            predicted = outputs.argmax(dim=1)
            total_samples += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()
            running_loss += loss.item()

        epoch_loss = running_loss / len(train_loader)
        epoch_accuracy = correct_predictions / total_samples

        print(f"Epoch {epoch+1}/{num_epochs} | Train Loss: {epoch_loss:.4f} | Train Accuracy: {epoch_accuracy*100:.2f}%")
        if(scheduler is not None):
            scheduler.step()
        # Validation evaluation
        model.eval()  # Set the model to evaluation mode

        val_loss = 0.0
        val_correct_predictions = 0
        val_total_samples = 0

        with torch.no_grad():
            for val_images, val_labels in tqdm(val_loader):
                val_images = val_images.to(device)
                val_labels = val_labels.to(device)

                val_outputs = model(val_images)
                val_loss += criterion(val_outputs, val_labels).item()

                val_predicted = val_outputs.argmax(dim=1)
                val_total_samples += val_labels.size(0)
                val_correct_predictions += (val_predicted == val_labels).sum().item()

        val_epoch_loss = val_loss / len(val_loader)
        val_epoch_accuracy = val_correct_predictions / val_total_samples

        print(f"Epoch {epoch+1}/{num_epochs} | Val Loss: {val_epoch_loss:.4f} | Val Accuracy: {val_epoch_accuracy*100:.2f}%")

    print("Training completed.")

In [10]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(vgg16.parameters(), lr = 1e-3)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.8)
num_epoch = 30

In [11]:
train_model(vgg16, train_dl, val_dl, criterion, optimizer, num_epoch, scheduler)

  0%|          | 0/352 [00:00<?, ?it/s]

Epoch 1/30 | Train Loss: 0.9368 | Train Accuracy: 55.26%


  0%|          | 0/88 [00:00<?, ?it/s]

Epoch 1/30 | Val Loss: 0.6635 | Val Accuracy: 72.62%


  0%|          | 0/352 [00:00<?, ?it/s]

Epoch 2/30 | Train Loss: 0.6602 | Train Accuracy: 71.52%


  0%|          | 0/88 [00:00<?, ?it/s]

Epoch 2/30 | Val Loss: 0.6341 | Val Accuracy: 70.85%


  0%|          | 0/352 [00:00<?, ?it/s]

Epoch 3/30 | Train Loss: 0.6033 | Train Accuracy: 75.61%


  0%|          | 0/88 [00:00<?, ?it/s]

Epoch 3/30 | Val Loss: 0.5826 | Val Accuracy: 77.27%


  0%|          | 0/352 [00:00<?, ?it/s]

Epoch 4/30 | Train Loss: 0.5088 | Train Accuracy: 81.42%


  0%|          | 0/88 [00:00<?, ?it/s]

Epoch 4/30 | Val Loss: 0.5008 | Val Accuracy: 82.17%


  0%|          | 0/352 [00:00<?, ?it/s]

In [10]:
torch.save(vgg16.state_dict(), 'vgg_0.8.pth')

In [11]:
def calculate_model_perfomance(model, val_loader):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    predicted_labels = []
    real_labels = []
    with torch.no_grad():
        for val_images, val_labels in tqdm(val_loader):
            val_images = val_images.to(device)
            val_labels = val_labels.to(device)

            val_outputs = model(val_images)

            _, val_predicted = torch.max(val_outputs.data, 1)
            predicted_labels += list(val_predicted.cpu().numpy())
            real_labels += list(val_labels.cpu().numpy())
    return predicted_labels, real_labels

In [12]:
y_pred, y_true = calculate_model_perfomance(vgg16, val_dl)

  0%|          | 0/88 [00:00<?, ?it/s]

In [14]:
y_pred = np.array(y_pred)
y_true = np.array(y_true)

In [15]:
(y_pred == y_true).sum()/len(y_pred)

0.5916193181818182

In [18]:
from sklearn.metrics import confusion_matrix

In [16]:
cm = confusion_matrix(y_true,y_pred)
cm

array([[215, 634,   2],
       [249, 997,   4],
       [113, 148, 454]])