In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import torchvision

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device



In [None]:
class PinGroupDataset(Dataset):
    def __init__(self, data_frame, transform=None):
        self.data = data_frame
        self.transform = transform or transforms.Compose([
            transforms.Resize((128, 128)),  # Resize images to 128x128
            transforms.ToTensor()
        ])

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = self.data.iloc[idx, 0]
        image = Image.open(img_path).convert("RGB")
        group_id = self.data.iloc[idx, 3]  # group_id

        if self.transform:
            image = self.transform(image)

        return image, group_id

In [3]:
# Load the dataset
csv_file = 'annotated_data.csv'  
full_dataset = pd.read_csv(csv_file)

# Encode group_id to integers
label_encoder = LabelEncoder()
full_dataset['group_id'] = label_encoder.fit_transform(full_dataset['group_id'])

# Split the dataset into training and validation sets
train_data, val_data = train_test_split(full_dataset, test_size=0.2, random_state=42)

# Create datasets for training and validation
train_dataset = PinGroupDataset(data_frame=train_data)
val_dataset = PinGroupDataset(data_frame=val_data)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)


In [4]:
# Function to show an image
def imshow(img):
    img = img / 2 + 0.5  # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.axis('off')  # Hide axes
    plt.show()

# Get some random training images
dataiter = iter(train_loader)
images, labels = next(dataiter)

# Show images
imshow(torchvision.utils.make_grid(images))

# Print labels
# Convert labels to their original string representation
classes = label_encoder.inverse_transform(labels.numpy())
print(' '.join(f'{classes[j]:5s}' for j in range(len(classes))))





In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 8, kernel_size=3)  # Conv layer with 3x3 kernel, depth=8
        self.conv2 = nn.Conv2d(8, 16, kernel_size=3)  # Conv layer with 3x3 kernel, depth=16
        self.pool1 = nn.MaxPool2d(kernel_size=2)  # Max pooling with 2x2 kernel
        self.conv3 = nn.Conv2d(16, 32, kernel_size=3)  # Conv layer with 3x3 kernel, depth=32
        self.conv4 = nn.Conv2d(32, 64, kernel_size=3)  # Conv layer with 3x3 kernel, depth=64
        self.pool2 = nn.MaxPool2d(kernel_size=2)  # Max pooling with 2x2 kernel
        
        # Calculate the output size after the conv layers
        # Input size: 128x128
        # After conv1: 126x126 (128 - 3 + 1)
        # After conv2: 124x124 (126 - 3 + 1)
        # After pool1: 62x62 (124 / 2)
        # After conv3: 60x60 (62 - 3 + 1)
        # After conv4: 58x58 (60 - 3 + 1)
        # After pool2: 29x29 (58 / 2)
        
        self.fc1 = nn.Linear(64 * 29 * 29, 4096)  # Fully connected with 4096 nodes
        self.fc2 = nn.Linear(4096, num_classes * 50)  # Fully connected with 1000 nodes
        self.fc3 = nn.Linear(num_classes * 50, num_classes)  # Fully connected with 10 nodes

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.pool1(x)
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = self.pool2(x)
        x = torch.flatten(x, 1)  # Flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [None]:


import torch.optim as optim

net = Net(num_classes=len(label_encoder.classes_)).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)



for epoch in range(10):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[0].to(device), data[1].to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        loss = loss.item()
       
        print(f'[{epoch + 1}, {i + 1:5d}] loss: {loss:.3f}')
        del inputs, labels, outputs, loss
        torch.cuda.empty_cache()

print('Finished Training')





In [18]:
# Prepare to count predictions for each class and store one correct example
classes = label_encoder.classes_
correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}
correct_examples = {classname: None for classname in classes}

# Again, no gradients needed
with torch.no_grad():
    for i, data in enumerate(val_loader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)  # Move inputs and labels to the same device as the model
        outputs = net(inputs)  # Use inputs instead of images
        _, top3_predicted = torch.topk(outputs, 3, dim=1)  # Get top 3 predictions
        for img, label, top3 in zip(inputs, labels, top3_predicted):
            if label in top3:
                correct_pred[classes[label]] += 1
                # Store one correct example per class
                if correct_examples[classes[label]] is None:
                    correct_examples[classes[label]] = img.cpu()
            total_pred[classes[label]] += 1

# Print accuracy for each class
for classname, correct_count in correct_pred.items():
    if total_pred[classname] > 0:  # Avoid division by zero
        accuracy = 100 * float(correct_count) / total_pred[classname]
        print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')
    else:
        print(f'Accuracy for class: {classname:5s} is N/A (no predictions)')

# Show one correct image for each class
for classname, img in correct_examples.items():
    if img is not None:
        print(f"Correct example for class {classname}:")
        imshow(img)












