In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import ToTensor
from PIL import Image
import os
import pandas as pd

# Custom dataset class with labels
class SportBallsDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.data_info = self.load_data_info()

    def __len__(self):
        return len(self.data_info)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.data_info.iloc[idx, 0] + '.png')  # Append '.png'
        if not os.path.exists(img_name):
            print(f"File not found: {img_name}")
        image = Image.open(img_name).convert('RGB')
        label = int(self.data_info.iloc[idx, 1])  # Ensure label is an integer
        label = torch.tensor(label, dtype=torch.long)  # Convert to tensor
        
        if self.transform:
            image = self.transform(image)
        
        return image, label

    def load_data_info(self):
        labels_path = os.path.join(self.root_dir, 'labels.csv')
        data_info = pd.read_csv(labels_path, header=None)
        return data_info

# Transforms for resizing and normalization
transform = ToTensor()

# Load datasets
train_dataset = SportBallsDataset(root_dir='Sportballs/Train/', transform=transform)
test_dataset = SportBallsDataset(root_dir='Sportballs/Test/', transform=transform)

# Example usage of the datasets
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Print the length of the datasets
print(f"Number of training examples: {len(train_dataset)}")
print(f"Number of test examples: {len(test_dataset)}")


  warn(


Number of training examples: 10000
Number of test examples: 100


In [2]:
# Autoencoder model
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(32 * 32 * 3, 256),
            nn.ReLU(),
            nn.Linear(256, 2)
        )
        self.decoder = nn.Sequential(
            nn.Linear(2, 256),
            nn.ReLU(),
            nn.Linear(256, 32 * 32 * 3),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = x.view(x.size(0), -1)
        z = self.encoder(x)
        x = self.decoder(z)
        x = x.view(x.size(0), 3, 32, 32)
        return x

# Instantiate and train the autoencoder
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Autoencoder().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    for images, labels in train_loader:
        images = images.to(device)
        outputs = model(images)
        loss = criterion(outputs, images)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Test and calculate reconstruction loss
model.eval()
test_loss = 0
with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        outputs = model(images)
        loss = criterion(outputs, images)
        test_loss += loss.item()

test_loss /= len(test_loader)
print(f'Test Reconstruction Loss: {test_loss:.4f}')


Epoch [1/5], Loss: 0.0188
Epoch [2/5], Loss: 0.0123
Epoch [3/5], Loss: 0.0134
Epoch [4/5], Loss: 0.0152
Epoch [5/5], Loss: 0.0159
Test Reconstruction Loss: 0.0129


In [3]:
# Variational Autoencoder model
class VAE(nn.Module):
    def __init__(self):
        super(VAE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(32 * 32 * 3, 256),
            nn.ReLU(),
            nn.Linear(256, 4)  # 2 for mean and 2 for log variance
        )
        self.decoder = nn.Sequential(
            nn.Linear(2, 256),
            nn.ReLU(),
            nn.Linear(256, 32 * 32 * 3),
            nn.Sigmoid()
        )

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        x = x.view(x.size(0), -1)
        h = self.encoder(x)
        mu, logvar = h[:, :2], h[:, 2:]
        z = self.reparameterize(mu, logvar)
        x = self.decoder(z)
        x = x.view(x.size(0), 3, 32, 32)
        return x, mu, logvar

def vae_loss(recon_x, x, mu, logvar):
    BCE = nn.functional.binary_cross_entropy(recon_x, x, reduction='sum')
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return BCE + KLD

# Instantiate and train the VAE
vae_model = VAE().to(device)
vae_optimizer = optim.Adam(vae_model.parameters(), lr=1e-3)

# Training loop for VAE
for epoch in range(num_epochs):
    vae_model.train()
    for images, labels in train_loader:
        images = images.to(device)
        recon_images, mu, logvar = vae_model(images)
        loss = vae_loss(recon_images, images, mu, logvar)
        vae_optimizer.zero_grad()
        loss.backward()
        vae_optimizer.step()
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Test and calculate reconstruction loss for VAE
vae_model.eval()
vae_test_loss = 0
with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        recon_images, mu, logvar = vae_model(images)
        loss = vae_loss(recon_images, images, mu, logvar)
        vae_test_loss += loss.item()

vae_test_loss /= len(test_loader.dataset)
print(f'VAE Test Reconstruction Loss: {vae_test_loss:.4f}')


Epoch [1/5], Loss: 9641.1221
Epoch [2/5], Loss: 8153.7861
Epoch [3/5], Loss: 8372.4766
Epoch [4/5], Loss: 8220.1201
Epoch [5/5], Loss: 8310.2217
VAE Test Reconstruction Loss: 519.3874


In [7]:
def compute_accuracy(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            
            z = model.encoder(images.view(images.size(0), -1))
            # recon_images = model.decoder(z)
            _, predicted = torch.max(z.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total

# Calculate accuracy for Autoencoder
ae_accuracy = compute_accuracy(model, test_loader)
print(f'Autoencoder Accuracy: {ae_accuracy:.4f}')

# Calculate accuracy for VAE
vae_accuracy = compute_accuracy(vae_model, test_loader)
print(f'VAE Accuracy: {vae_accuracy:.4f}')


Autoencoder Accuracy: 0.2400
VAE Accuracy: 0.2200
