In [None]:
import os
import pandas as pd

def get_image_list(image_dir):
    """
    Reads all image files from the specified directory into a list.
    
    Parameters:
    image_dir (str): Path to the directory containing image files.
    
    Returns:
    list: List of image filenames (excluding hidden files like .DS_Store).
    """
    return [filename for filename in os.listdir(image_dir) if not filename.startswith('.')]

if __name__ == '__main__':
    # Load the train labels CSV
    labels_file_path = r"C:\Users\janan\OneDrive\Desktop\AI\AI Project\Final Project\diabetic-retinopathy-detection\trainLabels_original.csv"
    train_labels = pd.read_csv(labels_file_path)
    
    # Get the list of image files in the resized image directory
    resized_image_dir = './train_resized/resized-256/'
    image_list = get_image_list(resized_image_dir)
    
    # Add ".jpeg" suffix to image names in the train labels DataFrame
    train_labels['image'] = train_labels['image'] + '.jpeg'
    
    # Create a new DataFrame from the image list
    augmented_labels = pd.DataFrame({'image': image_list})
    
    # Create a new column by removing augmentation suffixes from the image filenames
    augmented_labels['original_image'] = augmented_labels['image'].apply(
        lambda x: '_'.join(x.split('_')[:2]).rstrip('.jpeg') + '.jpeg'
    )
    
    # Rename columns for clarity
    augmented_labels.columns = ['train_image_name', 'image']
    
    # Merge the original train labels with the augmented image data
    merged_labels = pd.merge(train_labels, augmented_labels, how='outer', on='image')
    
    # Drop rows with missing values (i.e., unmatched images)
    merged_labels = merged_labels.dropna()
    
    # Print the shape of the final DataFrame
    print(f"Final merged dataset shape: {merged_labels.shape}")
    
    # Write the merged DataFrame to a new CSV file
    output_csv_path = './trainLabels_augmented.csv'
    print("Writing to CSV...")
    merged_labels.to_csv(output_csv_path, index=False, header=True)
    print(f"CSV saved at {output_csv_path}")
