In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt

In [2]:
device = torch.device('cuda:0')

In [3]:
train_df = pd.read_csv('data/train.csv', index_col=0)
test_df = pd.read_csv('data/valid.csv', index_col=0)
train_df.shape, test_df.shape

((7000, 5), (3000, 5))

In [4]:
IMG_SIZE = 256
BATCH_SIZE = 32

In [5]:
data_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])
full_train_dataset = datasets.ImageFolder(
    root='data/rvf10k/train',
    transform=data_transform
)

train_size = int(0.8 * len(full_train_dataset))
val_size = len(full_train_dataset) - train_size

train_dataset, val_dataset = torch.utils.data.random_split(
    full_train_dataset, [train_size, val_size]
)

train_data_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=4
)
val_data_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=4
)

test_dataset = datasets.ImageFolder(
    root='data/rvf10k/valid',
    transform=data_transform
)
test_data_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=4
)

In [7]:
# Define the CNN architecture
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5, padding='same')
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5, padding='same')
        self.fc1 = nn.Linear(16 * (IMG_SIZE//4) * (IMG_SIZE//4), 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 2) # 2 for binary classification

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize the model, loss function, and optimizer
model = SimpleCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
for epoch in range(8): # Number of epochs
    running_loss = 0.0
    for inputs, labels in train_data_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    model.eval()
    val_loss = 0
    with torch.no_grad():
        for inputs, labels in val_data_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
    print(
        f"Epoch {epoch + 1}",
        "Training Loss:", '{:.5f}'.format(running_loss / len(train_data_loader)),
        "Validation Loss:", '{:.5f}'.format(val_loss / len(val_data_loader))
    )

torch.save(model.state_dict(), 'model.pt')

Epoch 1 Training Loss: 0.66477 Validation Loss: 0.58825
Epoch 2 Training Loss: 0.51229 Validation Loss: 0.53835
Epoch 3 Training Loss: 0.34110 Validation Loss: 0.54084
Epoch 4 Training Loss: 0.16849 Validation Loss: 0.69874
Epoch 5 Training Loss: 0.07419 Validation Loss: 0.90802
Epoch 6 Training Loss: 0.02577 Validation Loss: 1.22364
Epoch 7 Training Loss: 0.02204 Validation Loss: 1.48247


KeyboardInterrupt: 

In [None]:
from sklearn.metrics import accuracy_score

# Set the model to evaluation mode
model.eval()

# Disable gradient calculation
with torch.no_grad():
    predictions = []
    for inputs, _ in test_data_loader:
        inputs = inputs.to(device)
        outputs = model(inputs)
        # Assuming the model outputs logits, apply softmax to get probabilities
        probabilities = torch.nn.functional.softmax(outputs, dim=1)
        # Round probabilities to get class predictions
        _, predicted = torch.max(probabilities, 1)
        predictions.extend(predicted.tolist())

accuracy_score(test_df.label, predictions)

0.499

In [None]:
accuracy_score(test_df.label, np.random.randint(0, 2, size=3000))

0.497