## <span style='color: darkgreen'> This portion of code is for finding the images which are under represented for one-hot encodeing. <span>

In [1]:
import torch
from collections import defaultdict
import os


In [2]:
# === Step 1: Load the .pt file ===
# This assumes your .pt file is saved as a list of dicts, each like:
# {'filename': 'mask_XYZ.png', one_hot label}
training_graph_dataset = torch.load('../DTDataset_Class/Graphs/1024/FullDataset/20250410_TrainGraphAugmentedDatasetWithLabels_and_filenames_one_hot.pt')
print(f'\nA sample of the Dataset: \n', training_graph_dataset[0],'\n\n\n', training_graph_dataset[0][0].y)


  training_graph_dataset = torch.load('../DTDataset_Class/Graphs/1024/FullDataset/20250410_TrainGraphAugmentedDatasetWithLabels_and_filenames_one_hot.pt')



A sample of the Dataset: 
 [Data(x=[1024, 7], pos=[1024, 2], edge_index=[2, 6250], y=1), 'ZT76_39_A_4_12'] 


 1


In [11]:
# === step 2: reading the training dataset and storing the file names which indicated as the benign cores
benign_file_names = []
for entry in training_graph_dataset:
    if entry[0].y==0:
        benign_file_names.append(entry[1])
        # print(entry[1])
print('The number of benign images with label healthy is :', len(benign_file_names))
print(f'The number of total images in the dataset is {len(training_graph_dataset)}')
print(f'The number of cancerous images {len(training_graph_dataset) - len(benign_file_names)} ')
print(f'Thus, For each benign image, we need to upsample it with 4 different rigid augmentation. after upsampling, the number of benign images will be {len(benign_file_names) + 4 * len(benign_file_names)}')
print(f'sample of benign file names: {benign_file_names[:5]}')

The number of benign images with label healthy is : 103
The number of total images in the dataset is 641
The number of cancerous images 538 
Thus, For each benign image, we need to upsample it with 4 different rigid augmentation. after upsampling, the number of benign images will be 515
sample of benign file names: ['ZT76_39_A_1_4', 'ZT199_1_A_5_8', 'ZT76_39_A_1_1', 'ZT199_1_A_7_3', 'ZT199_1_B_7_1']


 === Step 3: copy the under-represented images (here is benign files) into the Uder_rep_images folder ===
import shutil
from pathlib import Path

SOURCE_FOLDER = f'../../dataset/images/'
DEST_FOLDER = f'../DTDataset_Class/Under_rep_images/'

for filename in benign_file_names:
    filename = filename + '.jpg'
    if not Path(DEST_FOLDER + filename).exists():
        shutil.copy(SOURCE_FOLDER + filename, DEST_FOLDER)
        print(f'file has been copied: {filename}')

In [14]:
# === Read the benign images from the dataset and apply the rigid augmentation. Then, save them in the augmented folder ===
from PIL import Image
import albumentations as A
from albumentations.pytorch import ToTensorV2
import numpy as np
import os

SOURCE_FOLDER = f'../../dataset/images/'
DEST_FOLDER = f'../DTDataset_class/AugmentedBenignImages/'
AugmentationNumber = 4

# Define the augmentation pipeline
transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.5),
    A.GaussNoise(p=0.2),
    A.ElasticTransform(alpha=1, sigma=50, alpha_affine=50, p=0.5),
    A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=0.5)
])

# read the benign images and apply the augmentation
if not os.path.exists(DEST_FOLDER):
    os.makedirs(DEST_FOLDER)

for i, benign_file_name in enumerate(benign_file_names):
    # load the benign image
    benign_image_path = os.path.join(SOURCE_FOLDER, benign_file_name+'.jpg')
    benign_image = Image.open(benign_image_path)
    benign_image = benign_image.convert('RGB')  # Ensure the image is in RGB format
    benign_image = np.array(benign_image)
    # Apply the augmentation
    for j in range(AugmentationNumber):
        AugmentedImage = transform(image=benign_image)['image']
        # Save the augmented image
        augmented_image_path = os.path.join(DEST_FOLDER, f'{benign_file_name}_aug_{j}.jpg')
        Image.fromarray(AugmentedImage).save(augmented_image_path)
        print(f'Augmented image {j} saved at {augmented_image_path}')



  A.ElasticTransform(alpha=1, sigma=50, alpha_affine=50, p=0.5),


Augmented image 0 saved at ../DTDataset_class/AugmentedBenignImages/ZT76_39_A_1_4_aug_0.jpg
Augmented image 1 saved at ../DTDataset_class/AugmentedBenignImages/ZT76_39_A_1_4_aug_1.jpg
Augmented image 2 saved at ../DTDataset_class/AugmentedBenignImages/ZT76_39_A_1_4_aug_2.jpg
Augmented image 3 saved at ../DTDataset_class/AugmentedBenignImages/ZT76_39_A_1_4_aug_3.jpg
Augmented image 0 saved at ../DTDataset_class/AugmentedBenignImages/ZT199_1_A_5_8_aug_0.jpg
Augmented image 1 saved at ../DTDataset_class/AugmentedBenignImages/ZT199_1_A_5_8_aug_1.jpg
Augmented image 2 saved at ../DTDataset_class/AugmentedBenignImages/ZT199_1_A_5_8_aug_2.jpg
Augmented image 3 saved at ../DTDataset_class/AugmentedBenignImages/ZT199_1_A_5_8_aug_3.jpg
Augmented image 0 saved at ../DTDataset_class/AugmentedBenignImages/ZT76_39_A_1_1_aug_0.jpg
Augmented image 1 saved at ../DTDataset_class/AugmentedBenignImages/ZT76_39_A_1_1_aug_1.jpg
Augmented image 2 saved at ../DTDataset_class/AugmentedBenignImages/ZT76_39_A_1_