In [6]:
import os
import shutil
import glob
import numpy as np
import pandas as pd



In [None]:
glucose_mappings = [] # mappings from images to glucose levels.

data_path = "/glucose-prediction/Dataset/CSV"
image_path = "/glucose-prediction/RP_Images/"

# sort filenames based on their numerical identifiers.
def sort_key(filepath):
    filename = filepath.split('/')[-1]  
    parts = filename.split('_')
    return int(parts[1]), int(parts[2].split('.')[0])

for i in range(1, 15):  
    all_image_files = []
    participant_dir = os.path.join(data_path, f"P{i}")  
    dexcom_file = os.path.join(participant_dir, f"Dexcom_{str(i).zfill(3)}.csv")  
    dexcom_df = pd.read_csv(dexcom_file)  

    for split in ['train', 'val']:  # Loop over the 'train' and 'val' directories.
        images_folder = os.path.join(image_path, split)  
        image_files = [f for f in os.listdir(images_folder) if f.startswith(f'COMBO_{str(i).zfill(3)}')]  # Get only files related to the current participant.
        all_image_files += [os.path.join(images_folder, img_f) for img_f in image_files]

    # Sort the image files using the sort_key function.
    all_image_files = sorted(all_image_files, key=sort_key)

    # Loop over all the image files and match them with the corresponding glucose level.
    for j, image_file in enumerate(all_image_files):
        glucose_level = dexcom_df.iloc[j]['Glucose Value (mg/dL)']  # Get the glucose level for the current image from the DataFrame.
        glucose_mappings.append((image_file, glucose_level))  # Append a tuple containing the image path and glucose level to the list.

glucose_levels_df = pd.DataFrame(glucose_mappings, columns=['image_name', 'glucose_level'])  # Convert the list to a pandas DataFrame.
glucose_levels_df.to_csv('/glucose-prediction/Mappings/glucose_mappings.csv', index=False) 


In [None]:

glucose_levels_df = pd.read_csv('/glucose-prediction/Mappings/glucose_mappings.csv')

# Split the dataframe into training and validation sets based on the 'train' and 'val' keywords in the image paths.
train_df = glucose_levels_df[glucose_levels_df['image_name'].str.contains('train')]
val_df = glucose_levels_df[glucose_levels_df['image_name'].str.contains('val')]

train_df.to_csv('/glucose-prediction/Mappings/train_mappings.csv', index=False)

val_df.to_csv('/glucose-prediction/Mappings/val_mappings.csv', index=False)
