In [17]:
import os
import pandas as pd
from PIL import Image
from sklearn.model_selection import train_test_split
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [18]:
class CustomImageDataset(Dataset):
    def __init__(self, img_dir, dataframe, transform=None):
        self.img_labels = dataframe
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        # img_name = self.img_labels.iloc[idx, 0]
        img_path = os.path.join(self.img_dir, f"{idx}.png")
        image = Image.open(img_path)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        return image, label

In [19]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 32 * 32, 512)  # Adjust the input features to match your image size
        self.fc2 = nn.Linear(512, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 128 * 32 * 32)  # Flatten the tensor
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.fc2(x)
        return x

In [20]:
# Define a transform to normalize the data
transform = transforms.Compose([
    transforms.Resize((256, 256)),  # Resize images if they are not the same size
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalization parameters for pre-trained models
])

In [21]:
# Load CSV file
csv_file = 'generated_datasets/animals/stable_diffusion_1_0/256/labels.csv'
img_dir = 'generated_datasets/animals/stable_diffusion_1_0/256'
label_frame = pd.read_csv(csv_file)

# Map string labels to integer indices
class_to_idx = {'dog': 0, 'cat': 1, 'horse': 2, 'spider': 3, 'butterfly': 4, 
                'chicken': 5, 'sheep': 6, 'cow': 7, 'squirrel': 8, 'elephant': 9}

# Add a new column to the DataFrame with the mapped integer labels
label_frame['label_idx'] = label_frame['label'].map(class_to_idx)

# Split the data into training and validation sets
train_df, val_df = train_test_split(label_frame, test_size=0.2, random_state=42)

# Create the custom datasets
train_dataset = CustomImageDataset(img_dir=img_dir, dataframe=train_df, transform=transform)
val_dataset = CustomImageDataset(img_dir=img_dir, dataframe=val_df, transform=transform)

# Instantiate the model
num_classes = 10  # Change this to your number of classes
model = SimpleCNN(num_classes)

In [22]:
# Create the custom datasets
train_dataset = CustomImageDataset(img_dir=img_dir, dataframe=train_df, transform=transform)
val_dataset = CustomImageDataset(img_dir=img_dir, dataframe=val_df, transform=transform)

# DataLoader for batching operations
batch_size = 4
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)

# Choose an optimizer and criterion
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
criterion = nn.CrossEntropyLoss()  # Use CrossEntropyLoss for multi-class classification

# Transfer the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

SimpleCNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=131072, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=10, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)

In [23]:
# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    # Training pass
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Validation pass
    model.eval()
    val_running_loss = 0.0
    correct_preds = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            correct_preds += (predicted == labels).sum().item()

    train_loss = running_loss / len(train_loader)
    val_loss = val_running_loss / len(val_loader)
    val_accuracy = correct_preds / len(val_dataset)
    print(f'Epoch {epoch + 1}, Training Loss: {train_loss}, Validation Loss: {val_loss}, Validation Accuracy: {val_accuracy}')

print('Finished Training')

Epoch 1, Training Loss: 2.3059205758571624, Validation Loss: 2.3032289657592773, Validation Accuracy: 0.092
Epoch 2, Training Loss: 2.3012804951667785, Validation Loss: 2.303448634147644, Validation Accuracy: 0.096
Epoch 3, Training Loss: 2.295215271472931, Validation Loss: 2.3047938690185545, Validation Accuracy: 0.098
Epoch 4, Training Loss: 2.2749225630760193, Validation Loss: 2.319379331588745, Validation Accuracy: 0.086
Epoch 5, Training Loss: 2.2062627804279327, Validation Loss: 2.4266917839050293, Validation Accuracy: 0.1
Finished Training
