<a href="https://colab.research.google.com/github/meisisoiisme/FH-ML_PyTorch/blob/main/FullPipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [17]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from torchvision.datasets import ImageFolder
from sklearn.metrics import confusion_matrix
from PIL import Image
import os
import numpy as np

from google.colab import files
import zipfile

In [18]:
# Upload the synthetic dataset zip file to Colab
uploaded = files.upload()

# Extract the contents of the zip file
dataset_zip_path = next(iter(uploaded))
dataset_root = "./synthetic_dataset"
with zipfile.ZipFile(dataset_zip_path, 'r') as zip_ref:
    zip_ref.extractall(dataset_root)


StopIteration: 

In [52]:
class CustomDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.classes = os.listdir(root_dir)
        self.class_to_idx = {cls: idx for idx, cls in enumerate(self.classes)}
        self.images = self.make_dataset()

    def make_dataset(self):
        images = []
        for cls in self.classes:
            class_path = os.path.join(self.root_dir, cls)
            if os.path.isdir(class_path):  # Check if it's a directory
                for img_name in os.listdir(class_path):
                    img_path = os.path.join(class_path, img_name)
                    if os.path.isfile(img_path):  # Check if it's a file
                        item = (img_path, self.class_to_idx[cls])
                        images.append(item)
        return images

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path, label = self.images[idx]
        img = Image.open(img_path).convert("RGB")

        if self.transform:
            img = self.transform(img)

        return img, label

In [46]:
class TestDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.images = self.make_dataset()

    def make_dataset(self):
        images = []
        for img_name in os.listdir(self.root_dir):
            img_path = os.path.join(self.root_dir, img_name)
            item = (img_path, -1)  # Use -1 as a placeholder label for testing
            images.append(item)
        return images

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path, _ = self.images[idx]
        img = Image.open(img_path).convert("RGB")

        if self.transform:
            img = self.transform(img)

        return img

In [48]:
class AlexNet(nn.Module):
    def __init__(self, num_classes=6):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

class LeNet5(nn.Module):
    def __init__(self, num_classes=6):
        super(LeNet5, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 6, kernel_size=5),
            nn.Tanh(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(6, 16, kernel_size=5),
            nn.Tanh(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(16, 120, kernel_size=5),
            nn.Tanh(),
        )
        self.classifier = nn.Sequential(
            nn.Linear(120 * 53 * 53, 84),
            nn.Tanh(),
            nn.Linear(84, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

class ResNetModel(nn.Module):
    def __init__(self, num_classes=6):
        super(ResNetModel, self).__init__()
        self.model = models.resnet50(pretrained=True)
        in_features = self.model.fc.in_features
        self.model.fc = nn.Sequential(
            nn.Linear(in_features, 1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, num_classes)
        )

    def forward(self, x):
        return self.model(x)

In [49]:
class ModelTrainer:
    def __init__(self, model, train_loader, val_loader, test_loader, criterion, optimizer, num_epochs=30):
        self.model = model
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.test_loader = test_loader
        self.criterion = criterion
        self.optimizer = optimizer
        self.num_epochs = num_epochs
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    def train(self):
        self.model.to(self.device)

        for epoch in range(self.num_epochs):
            self.model.train()
            for inputs, labels in self.train_loader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)

                self.optimizer.zero_grad()
                outputs = self.model(inputs)
                loss = self.criterion(outputs, labels)
                loss.backward()
                self.optimizer.step()

            self.model.eval()
            val_loss = 0.0
            correct = 0
            total = 0

            with torch.no_grad():
                for inputs, labels in self.val_loader:
                    inputs, labels = inputs.to(self.device), labels.to(self.device)

                    outputs = self.model(inputs)
                    loss = self.criterion(outputs, labels)
                    val_loss += loss.item()

                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()

            print(f'Epoch {epoch+1}/{self.num_epochs}, Loss: {val_loss/len(self.val_loader)}, Validation Accuracy: {correct/total}')

    def evaluate_and_save_results(self):
        self.model.eval()
        all_preds = []
        all_labels = []

        with torch.no_grad():
            for inputs, labels in self.test_loader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                outputs = self.model(inputs)
                _, preds = torch.max(outputs, 1)

                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        confusion_matrix_result = confusion_matrix(all_labels, all_preds)
        np.savetxt("CM.csv", confusion_matrix_result)
        return confusion_matrix_result

    def visualize_interpretations(self):
        self.model.eval()

        for inputs in self.test_loader:
            inputs = inputs.to(self.device)
            outputs = self.model(inputs)

            # Visualization code for Grad-CAM or other interpretation methods goes here
            # ...


In [51]:
if __name__ == "__main__":
    # Data transformations
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    # Load datasets
    train_dataset = CustomDataset(root_dir=dataset_root, transform=transform)
    test_dataset = TestDataset(root_dir=dataset_root, transform=transform)

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

    # Build models
    alexnet_model = AlexNet(num_classes=len(train_dataset.classes))
    lenet5_model = LeNet5(num_classes=len(train_dataset.classes))
    resnet_model = ResNetModel(num_classes=len(train_dataset.classes))

    # Define loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer_alexnet = optim.RMSprop(alexnet_model.parameters(), lr=1e-4)
    optimizer_lenet5 = optim.RMSprop(lenet5_model.parameters(), lr=1e-4)
    optimizer_resnet = optim.RMSprop(resnet_model.parameters(), lr=1e-4)

    # Train and evaluate AlexNet
    trainer_alexnet = ModelTrainer(alexnet_model, train_loader, test_loader, test_loader, criterion, optimizer_alexnet)
    trainer_alexnet.train()

    # Evaluate and save results for AlexNet
    trainer_alexnet.evaluate_and_save_results()

    # Visualize interpretations for AlexNet
    trainer_alexnet.visualize_interpretations()

    # Repeat the same for LeNet5
    trainer_lenet5 = ModelTrainer(lenet5_model, train_loader, test_loader, test_loader, criterion, optimizer_lenet5)
    trainer_lenet5.train()
    trainer_lenet5.evaluate_and_save_results()
    trainer_lenet5.visualize_interpretations()

    # Repeat the same for ResNet
    trainer_resnet = ModelTrainer(resnet_model, train_loader, test_loader, test_loader, criterion, optimizer_resnet)
    trainer_resnet.train()
    trainer_resnet.evaluate_and_save_results()
    trainer_resnet.visualize_interpretations()

IsADirectoryError: [Errno 21] Is a directory: './synthetic_dataset/class3'