In [37]:
#import library
import os
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import pickle

In [38]:
# Preprocessing transformations (applied to both train and validation sets)
preprocessing_transforms = transforms.Compose([
    #transforms.CenterCrop(224),  # Crop the center 224x224 part of the image
    transforms.ToTensor(),  # Convert the image to a tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize for VGG16
])

# Augmentation transformations (applied only to the training set)
augmentation_transforms = transforms.Compose([
    transforms.RandomRotation(30),  # Randomly rotate the image by up to 30 degrees
    transforms.RandomHorizontalFlip(),  # Randomly flip the image horizontally
    transforms.RandomVerticalFlip(),  # Randomly flip the image vertically
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),  # Random color adjustments
    transforms.RandomResizedCrop(128),  # Randomly crop and resize the image to 224x224

    transforms.RandomAffine(degrees=30, translate=(0.1, 0.1), scale=(0.8, 1.2)),  # Apply affine transformations
    transforms.RandomErasing(p=0.5),  # Randomly erase part of the image
    transforms.RandomPerspective(distortion_scale=0.2, p=0.5, interpolation=3),  # Random perspective distortion
    # transforms.RandomGrayscale(p=0.1),  # Convert to grayscale with 10% probability

    transforms.ToTensor(),  # Convert the image to a tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize for VGG16
])

In [39]:
#Combine preprocessing and augmentation for the training set
train_transform = transforms.Compose([preprocessing_transforms, augmentation_transforms])

In [40]:
#Apply only preprocessing to the validation set
valid_transform = preprocessing_transforms

In [41]:
# Path to the dataset root (containing 'Keratoconus', 'Normal', 'Suspect')
dataset_path = r'C:\Users\user\Documents\!TA\!TA\cornealtopography\Train_Validation sets'

# Ensure the path points to the correct parent directory, not a folder inside it
dataset_path = os.path.join(dataset_path, 'Train_Validation sets')

In [42]:
# Load the dataset using ImageFolder
dataset = datasets.ImageFolder(root=dataset_path, transform=preprocessing_transforms)

In [43]:
#check jpg
jpg_files = []
for idx, (image, label) in enumerate(dataset):
    image_path = dataset.imgs[idx][0]
    if image_path.lower().endswith('.jpg'):
        jpg_files.append(image_path)

#print jpg
print(f'Found {len(jpg_files)}.jpg images in the dataset:')
for file in jpg_files:
    print(file)

Found 2961.jpg images in the dataset:
C:\Users\user\Documents\!TA\!TA\cornealtopography\Train_Validation sets\Train_Validation sets\Keratoconus\case1\KCN_1_CT_A.jpg
C:\Users\user\Documents\!TA\!TA\cornealtopography\Train_Validation sets\Train_Validation sets\Keratoconus\case1\KCN_1_EC_A.jpg
C:\Users\user\Documents\!TA\!TA\cornealtopography\Train_Validation sets\Train_Validation sets\Keratoconus\case1\KCN_1_EC_P.jpg
C:\Users\user\Documents\!TA\!TA\cornealtopography\Train_Validation sets\Train_Validation sets\Keratoconus\case1\KCN_1_Elv_A.jpg
C:\Users\user\Documents\!TA\!TA\cornealtopography\Train_Validation sets\Train_Validation sets\Keratoconus\case1\KCN_1_Elv_P.jpg
C:\Users\user\Documents\!TA\!TA\cornealtopography\Train_Validation sets\Train_Validation sets\Keratoconus\case1\KCN_1_Sag_A.jpg
C:\Users\user\Documents\!TA\!TA\cornealtopography\Train_Validation sets\Train_Validation sets\Keratoconus\case1\KCN_1_Sag_P.jpg
C:\Users\user\Documents\!TA\!TA\cornealtopography\Train_Validation se

In [44]:
# Automatically split the dataset (80% train, 20% validation)
train_size = int(0.8 * len(dataset))  # 80% for training
valid_size = len(dataset) - train_size  # 20% for validation

train_dataset, valid_dataset = random_split(dataset, [train_size, valid_size])

#Apply different transformations to train and validation datasets
# train_dataset = datasets.ImageFolder(root=dataset_path, transform=train_transform)
# valid_dataset = datasets.ImageFolder(root=dataset_path, transform=valid_transform)

train_dataset.dataset.transform = train_transform
valid_dataset.dataset.transform = valid_transform

In [45]:
# Create a DataLoader to load the data in batches
# batch = 32
batch = 64

train_loader = DataLoader(train_dataset, batch_size=batch, shuffle=True)

valid_loader = DataLoader(valid_dataset, batch_size=batch, shuffle=False)

In [46]:
# Save the training and validation datasets
with open("train_loader.pkl", "wb") as f:
    pickle.dump(train_loader, f)
with open("valid_loader.pkl", "wb") as f:
    pickle.dump(valid_loader, f)

In [47]:
# Check the class names in the datasets
# train_class_names = train_dataset.classes
# valid_class_names = valid_dataset.classes

# print(f"Training classes found: {train_class_names}")
# print(f"Validation classes found: {valid_class_names}")

class_names = dataset.classes
print(f'Classes : {class_names}')

Classes : ['Keratoconus', 'Normal', 'Suspect']


In [48]:
# # Check the number of images in each class
# class_names = dataset.classes
# print(f"Classes found: {class_names}")