In [1]:
import os
import numpy as np
import pandas as pd
from PIL import Image

import shutil

In [2]:
# Define dataset paths
cwd = os.getcwd()
original_dataset_dir = os.path.join(cwd, "dataset")
resnet_dataset_dir = os.path.join(cwd, "resnet_dataset")
efficientnet_dataset_dir = os.path.join(cwd, "efficientnet_dataset")

# Create directories for new datasets
for dataset_dir in [resnet_dataset_dir, efficientnet_dataset_dir]:
    for split in ["train", "test", "val"]:
        os.makedirs(os.path.join(dataset_dir, split), exist_ok=True)

In [3]:
# Load the existing dataset mappings without headers
train_df = pd.read_csv(os.path.join(original_dataset_dir, "train", "train_mapping.csv"), header=None, names=["filename", "class"])
test_df = pd.read_csv(os.path.join(original_dataset_dir, "test", "test_mapping.csv"), header=None, names=["filename", "class"])
val_df = pd.read_csv(os.path.join(original_dataset_dir, "val", "val_mapping.csv"), header=None, names=["filename", "class"])

In [4]:
# Normalization functions
def normalize_resnet(image):
    image = np.array(image) / 255.0
    image = 2 * image - 1                 # [-1, 1]
    return Image.fromarray(((image + 1) / 2 * 255).astype(np.uint8))  # back to [0, 255]

def normalize_efficientnet(image):
    image = np.array(image) / 255.0       # [0, 1]
    return Image.fromarray((image * 255).astype(np.uint8))            # back to [0, 255]

In [5]:
# General processing function
def process_images(df, source_dir, dest_dir, split_name, normalize_fn, resize_size, to_rgb=True):
    for _, row in df.iterrows():
        img_name = row["filename"]
        label = str(row["class"])

        src_path = os.path.join(source_dir, split_name, img_name)
        dest_folder = os.path.join(dest_dir, split_name)
        os.makedirs(dest_folder, exist_ok=True)
        dest_path = os.path.join(dest_folder, img_name)

        # Read and preprocess image
        img = Image.open(src_path).convert("L")  # Convert to grayscale
        if to_rgb:
            img = img.convert("RGB")             # Convert to 3 channels
        img = img.resize(resize_size)
        img = normalize_fn(img)
        img.save(dest_path)

In [6]:
# Process for ResNet: size 224x224, [-1,1]
process_images(train_df, original_dataset_dir, resnet_dataset_dir, "train", normalize_resnet, resize_size=(224, 224))
process_images(test_df, original_dataset_dir, resnet_dataset_dir, "test", normalize_resnet, resize_size=(224, 224))
process_images(val_df, original_dataset_dir, resnet_dataset_dir, "val", normalize_resnet, resize_size=(224, 224))

print("ResNet dataset processed and normalized successfully.")

ResNet dataset processed and normalized successfully.


In [7]:
# Process for EfficientNet B3: size 300x300, [0,1]
process_images(train_df, original_dataset_dir, efficientnet_dataset_dir, "train", normalize_efficientnet, resize_size=(300, 300))
process_images(test_df, original_dataset_dir, efficientnet_dataset_dir, "test", normalize_efficientnet, resize_size=(300, 300))
process_images(val_df, original_dataset_dir, efficientnet_dataset_dir, "val", normalize_efficientnet, resize_size=(300, 300))

print("EfficientNet B3 dataset processed and normalized successfully.")

EfficientNet B3 dataset processed and normalized successfully.


In [8]:
# Copy mapping CSVs into each split folder for both datasets
split_csvs = {
    "train": train_df,
    "test": test_df,
    "val": val_df
}

for dataset_dir in [resnet_dataset_dir, efficientnet_dataset_dir]:
    for split, df in split_csvs.items():
        csv_path = os.path.join(dataset_dir, split, f"{split}_mapping.csv")
        df.to_csv(csv_path, index=False, header=False)
        print(f"Saved mapping CSV to {csv_path}")

print("All mapping CSVs added to each split folder.")


Saved mapping CSV to C:\Users\Safi\Desktop\Masters\UTArlington\Sem2\ML\Project\resnet_dataset\train\train_mapping.csv
Saved mapping CSV to C:\Users\Safi\Desktop\Masters\UTArlington\Sem2\ML\Project\resnet_dataset\test\test_mapping.csv
Saved mapping CSV to C:\Users\Safi\Desktop\Masters\UTArlington\Sem2\ML\Project\resnet_dataset\val\val_mapping.csv
Saved mapping CSV to C:\Users\Safi\Desktop\Masters\UTArlington\Sem2\ML\Project\efficientnet_dataset\train\train_mapping.csv
Saved mapping CSV to C:\Users\Safi\Desktop\Masters\UTArlington\Sem2\ML\Project\efficientnet_dataset\test\test_mapping.csv
Saved mapping CSV to C:\Users\Safi\Desktop\Masters\UTArlington\Sem2\ML\Project\efficientnet_dataset\val\val_mapping.csv
All mapping CSVs added to each split folder.
