## ResNet results:

Test Loss: 0.6026

Test Accuracy: 0.5964

## ViT results:

Test Loss: 0.6563

Test Accuracy: 0.6036

## Swin results:

Test Loss: 0.6000

Test Accuracy: 0.6036

In [None]:
# Unpack the dataset
import zipfile
import os

zip_path = 'shiny_dataset.zip'
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall('.')

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path

# Check GPU availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

In [None]:
data_dir = Path('dataset')
train_dir = data_dir / 'train'
val_dir = data_dir / 'val'
test_dir = data_dir / 'test'

batch_size = 32
num_epochs = 50
learning_rate = 0.01

# Transformations for training data (with augmentation)
train_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

val_test_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

train_dataset = datasets.ImageFolder(train_dir, transform=train_transforms)
val_dataset = datasets.ImageFolder(val_dir, transform=val_test_transforms)
test_dataset = datasets.ImageFolder(test_dir, transform=val_test_transforms)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

print(f'Classes: {train_dataset.classes}')
print(f'Train samples: {len(train_dataset)}')
print(f'Validation samples: {len(val_dataset)}')
print(f'Test samples: {len(test_dataset)}')

In [None]:
def frozen_parameters(model):
    # Freeze base model weights
    for param in model.parameters():
        param.requires_grad = False

def last_layer(num_features):
    # Replace the last fully connected layer with a new one for binary classification
    layer = nn.Sequential(
        nn.Linear(num_features, 256),
        nn.ReLU(),
        nn.Dropout(0.3),
        nn.Linear(256, 2)
    )
    print("New classification layer:")
    print(layer)
    return layer

def resnet_initialization():
    # Load pretrained ResNet152 model
    model = models.resnet152(pretrained=True)
    frozen_parameters(model)
    model.fc = last_layer(model.fc.in_features)
    model = model.to(device)
    optimizer = optim.Adam(model.fc.parameters(), lr=learning_rate)
    return model, optimizer

def vit_initialization():
    # Load pretrained ViT model
    model = models.vit_b_16(pretrained=True)
    frozen_parameters(model)
    model.heads.head = last_layer(model.heads.head.in_features)
    model = model.to(device)
    optimizer = optim.Adam(model.heads.head.parameters(), lr=learning_rate)
    return model, optimizer

def swin_initialization():
    # Load pretrained Swin Transformer Base model
    model = models.swin_b(pretrained=True)
    frozen_parameters(model)
    model.head = last_layer(model.head.in_features)
    model = model.to(device)
    optimizer = optim.Adam(model.head.parameters(), lr=learning_rate)
    return model, optimizer

In [None]:
model, optimizer = resnet_initialization()
#model, optimizer = vit_initialization()
#model, optimizer = swin_initialization()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Learning rate scheduler for improved training
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)

In [None]:
def train_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

# Function to validate the model
def validate_epoch(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

In [None]:
train_losses = []
train_accs = []
val_losses = []
val_accs = []
best_val_loss = float('inf')

print("Starting training...")
print("-" * 60)

for epoch in range(num_epochs):
    # Training
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
    train_losses.append(train_loss)
    train_accs.append(train_acc)

    # Validation
    val_loss, val_acc = validate_epoch(model, val_loader, criterion, device)
    val_losses.append(val_loss)
    val_accs.append(val_acc)

    scheduler.step(val_loss)
    current_lr = optimizer.param_groups[0]['lr']

    print(f'Epoch [{epoch+1}/{num_epochs}]')
    print(f'Learning Rate: {current_lr:.6f}')
    print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')
    print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')
    print("-" * 60)

    # Save the best model by validation loss
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), 'best_model.pth')
        print(f'Best model saved! Val Loss: {val_loss:.4f}')
        print("-" * 60)

print("Training completed!")

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

# Loss function plot
ax1.plot(train_losses, label='Train Loss', marker='o')
ax1.plot(val_losses, label='Validation Loss', marker='o')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.set_title('Training and Validation Loss')
ax1.set_ylim(top=1)
ax1.legend()
ax1.grid(True)

# Accuracy plot
ax2.plot(train_accs, label='Train Accuracy', marker='o')
ax2.plot(val_accs, label='Validation Accuracy', marker='o')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Accuracy')
ax2.set_title('Training and Validation Accuracy')
ax2.set_ylim(bottom=0)
ax2.legend()
ax2.grid(True)

plt.tight_layout()
plt.show()

In [None]:
# Load the best model for testing
model.load_state_dict(torch.load('best_model.pth'))
model.eval()

test_loss, test_acc = validate_epoch(model, test_loader, criterion, device)
print(f'Test Loss: {test_loss:.4f}')
print(f'Test Accuracy: {test_acc:.4f}')

In [None]:
def predict_with_probabilities(model, image_path, device):
    model.eval()

    # Load and preprocess image
    image = datasets.folder.default_loader(image_path)
    image_tensor = val_test_transforms(image).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(image_tensor)
        probabilities = torch.softmax(output, dim=1)[0].cpu().numpy()
        predicted_class = torch.argmax(output, 1).item()

    class_names = train_dataset.classes

    print(f"Predicted class: {class_names[predicted_class]}")
    print(f"Probabilities:")
    for i, class_name in enumerate(class_names):
        print(f"  {class_name}: {probabilities[i]:.4f} ({probabilities[i]*100:.2f}%)")

    return predicted_class, probabilities

import random
test_image = random.choice([f for f in test_dir.rglob('*.png')])
print(f"Testing on image: {test_image}")
predict_with_probabilities(model, test_image, device)

In [None]:
# Visualize several predictions
def visualize_predictions(model, dataset, device, num_images=8):
    model.eval()

    # Select random images
    indices = random.sample(range(len(dataset)), num_images)

    fig, axes = plt.subplots(2, 4, figsize=(16, 8))
    axes = axes.ravel()

    for idx, ax in zip(indices, axes):
        image_path = dataset.samples[idx][0]
        true_label = dataset.samples[idx][1]

        original_image = datasets.folder.default_loader(image_path)
        image_tensor = dataset.transform(original_image).unsqueeze(0).to(device)

        with torch.no_grad():
            output = model(image_tensor)
            probabilities = torch.softmax(output, dim=1)[0].cpu().numpy()
            predicted_label = torch.argmax(output, 1).item()

        ax.imshow(original_image)
        ax.axis('off')

        class_names = dataset.classes
        pred_class = class_names[predicted_label]
        true_class = class_names[true_label]

        # Show probability of the second class (shiny)
        # If >0.5 - prediction is shiny, if <0.5 - normal
        shiny_prob = probabilities[1]

        color = 'green' if predicted_label == true_label else 'red'
        title = f'True: {true_class}\nShiny prob: {shiny_prob:.3f}'
        ax.set_title(title, color=color, fontsize=10)

    plt.tight_layout()
    plt.show()

# Visualize predictions on test set
visualize_predictions(model, test_dataset, device, num_images=8)