# Importing Libraries

In [None]:
import torch
from torch import nn, optim
from torchvision import datasets, transforms, models, utils
from torchvision.models import inception_v3
from torchvision.datasets import ImageFolder, DatasetFolder
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from torch.autograd import Function
import torchvision.models as models
import torchvision.transforms as transforms
import shutil
import cv2
import random
from tqdm.notebook import tqdm
import cv2
import torch.nn as nn
import torch.nn.functional as F
import torchvision.utils as vutils
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings

# Setup Dataset

In [None]:
warnings.filterwarnings('ignore')
%load_ext autoreload
%autoreload 2
%matplotlib inline

sns.set()
sns.set_palette('bwr')
SNS_CMAP = 'bwr'
plt.style.use("dark_background")
plt.rcParams['grid.color'] = '#444444'
colors = sns.palettes.color_palette(SNS_CMAP)
pd.options.mode.chained_assignment = None

# Generated Dataset

In [None]:
transform_generated = transforms.Compose([
    transforms.Resize((256, 256)),  # double checking that each image is of the same size
    transforms.ToTensor(),         
    transforms.Normalize(mean=[0.5], std=[0.5])  
])

In [None]:
class GeneratedDataset(Dataset):
    def __init__(self, root_dir, transform=None, subset="generated"):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = []
        self.subset = subset

        self.folders = {
            "real_used": os.path.join(root_dir, "real_used"),
            "real_not_used": os.path.join(root_dir, "real_not_used"),
            "generated": os.path.join(root_dir, "generated")
        }

        if subset == "generated":
            folder = self.folders["generated"]
            for img_name in os.listdir(folder):
                img_path = os.path.join(folder, img_name)
                self.image_paths.append((img_path, "generated"))

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path, label = self.image_paths[idx]
        image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)

        # convert NumPy array to PIL Image for transformation
        image = Image.fromarray(image)

        if self.transform:
            image = self.transform(image)

        return image, label

Note: Currently not including actual dataset in the repository to maintain privacy.

In [None]:
dataset_generated = GeneratedDataset(root_dir= "include dataset path here", transform=transform_generated, subset="generated")
dataloader_generated = DataLoader(dataset_generated, batch_size=16, shuffle=True)

print(f"Total generated images: {len(dataset_generated)}")

In [None]:
def show(dataset, num_images=10):
    fig, axes = plt.subplots(1, num_images, figsize=(20, 20))
    for i in range(num_images):
        image, label = dataset[i]

        image = image.squeeze(0).numpy()  

        image = (image - image.min()) / (image.max() - image.min())  # normalize for display

        axes[i].imshow(image, cmap='gray')  
        axes[i].axis('off')
        axes[i].set_title(f"{label}")

    plt.show()

show(dataset_generated)

# Real Used Dataset

In [3]:
transform_real_used = transforms.Compose([
    transforms.Resize((256, 256)),  # double checking that each image is of the same size
    transforms.ToTensor(),         
    transforms.Normalize(mean=[0.5], std=[0.5])  
])

In [None]:
class RealUsedDataset(Dataset):
    def __init__(self, image_paths, transform=None):
        self.image_paths = image_paths
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        image = Image.fromarray(image)

        if self.transform:
            image = self.transform(image)

        return image, "real_used"

In [None]:
dataset_real_used = RealUsedDataset(root_dir= "include dataset path here", transform=transform_real_used, subset="real_used")
dataloader_real_used = DataLoader(dataset_real_used, batch_size=16, shuffle=True)

print(f"Total real used images: {len(dataset_real_used)}")

# Real Not Used Dataset

In [None]:
transform_real_not_used = transforms.Compose([
    transforms.Resize((256, 256)),  # double checking that each image is of the same size
    transforms.ToTensor(),         
    transforms.Normalize(mean=[0.5], std=[0.5])  
])

In [None]:
class RealNotUsedDataset(Dataset):
    def __init__(self, image_paths, transform=None):
        self.image_paths = image_paths
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        image = Image.fromarray(image)

        if self.transform:
            image = self.transform(image)

        return image, "real_not_used"

In [None]:
dataset_real_not_used = RealNotUsedDataset(root_dir= "include dataset path here", transform=transform_real_not_used, subset="real_not_used")
dataloader_real_not_used = DataLoader(dataset_real_not_used, batch_size=16, shuffle=True)

print(f"Total real not used images: {len(dataset_real_not_used)}")