In [3]:
import pandas as pd

df = pd.read_excel('breast_fastMRI_final.xlsx')
print(df[['Patient Coded Name', 'Data split (0=training, 1=testing)', 'Lesion status (0 = negative, 1= malignancy, 2= benign)', 'Laterality (1=right, 2=left)']].head())

   Patient Coded Name  Data split (0=training, 1=testing)  \
0  fastMRI_breast_001                                   0   
1  fastMRI_breast_002                                   0   
2  fastMRI_breast_003                                   0   
3  fastMRI_breast_004                                   0   
4  fastMRI_breast_005                                   0   

   Lesion status (0 = negative, 1= malignancy, 2= benign)  \
0                                                  2        
1                                                  2        
2                                                  2        
3                                                  2        
4                                                  2        

  Laterality (1=right, 2=left)  
0                          NaN  
1                          NaN  
2                          NaN  
3                          NaN  
4                          NaN  


# Split Train/Test and Calculate Counts

In [40]:

# List of incompatible patient names
incompatible_dirs = ['fastMRI_breast_272', 'fastMRI_breast_070', 'fastMRI_breast_237', 
                         'fastMRI_breast_114', 'fastMRI_breast_156', 'fastMRI_breast_279', 
                         'fastMRI_breast_289', 'fastMRI_breast_299', 'fastMRI_breast_159', 
                         'fastMRI_breast_048', 'fastMRI_breast_194', 'fastMRI_breast_240', 'fastMRI_breast_281']


# Step 1: Remove incompatible patient names
df_filtered = df[~df["Patient Coded Name"].isin(incompatible_dirs)]

# Step 2: Create separate lists for training and test sets
train_patients = df_filtered[df_filtered["Data split (0=training, 1=testing)"] == 0]["Patient Coded Name"].tolist()
test_patients = df_filtered[df_filtered["Data split (0=training, 1=testing)"] == 1]["Patient Coded Name"].tolist()

train_lesion_counts = df_filtered[df_filtered["Data split (0=training, 1=testing)"] == 0]["Lesion status (0 = negative, 1= malignancy, 2= benign)"].value_counts()
test_lesion_counts = df_filtered[df_filtered["Data split (0=training, 1=testing)"] == 1]["Lesion status (0 = negative, 1= malignancy, 2= benign)"].value_counts()

train_lesion_laterality = df_filtered[df_filtered["Data split (0=training, 1=testing)"] == 0][df_filtered["Lesion status (0 = negative, 1= malignancy, 2= benign)"] == 1]["Laterality (1=right, 2=left)"].value_counts()
test_lesion_laterality = df_filtered[df_filtered["Data split (0=training, 1=testing)"] == 1][df_filtered["Lesion status (0 = negative, 1= malignancy, 2= benign)"] == 1]["Laterality (1=right, 2=left)"].value_counts()

# Print results
print("Training Patients:", train_patients)
print("Test Patients:", test_patients)

print("Training Patients:", len(train_patients))
print("Test Patients:", len(test_patients))

print("Training Lesion Counts:", train_lesion_counts)
print("Test Lesion Counts:", test_lesion_counts)

print("Training Laterality Counts:", train_lesion_laterality)
print("Test Laterality Counts:", test_lesion_laterality)


Training Patients: ['fastMRI_breast_001', 'fastMRI_breast_002', 'fastMRI_breast_003', 'fastMRI_breast_004', 'fastMRI_breast_005', 'fastMRI_breast_007', 'fastMRI_breast_008', 'fastMRI_breast_009', 'fastMRI_breast_010', 'fastMRI_breast_012', 'fastMRI_breast_013', 'fastMRI_breast_014', 'fastMRI_breast_015', 'fastMRI_breast_016', 'fastMRI_breast_020', 'fastMRI_breast_022', 'fastMRI_breast_024', 'fastMRI_breast_027', 'fastMRI_breast_031', 'fastMRI_breast_032', 'fastMRI_breast_033', 'fastMRI_breast_036', 'fastMRI_breast_037', 'fastMRI_breast_038', 'fastMRI_breast_039', 'fastMRI_breast_040', 'fastMRI_breast_041', 'fastMRI_breast_042', 'fastMRI_breast_043', 'fastMRI_breast_044', 'fastMRI_breast_045', 'fastMRI_breast_046', 'fastMRI_breast_047', 'fastMRI_breast_049', 'fastMRI_breast_050', 'fastMRI_breast_051', 'fastMRI_breast_052', 'fastMRI_breast_053', 'fastMRI_breast_054', 'fastMRI_breast_055', 'fastMRI_breast_056', 'fastMRI_breast_057', 'fastMRI_breast_059', 'fastMRI_breast_061', 'fastMRI_bre

  train_lesion_laterality = df_filtered[df_filtered["Data split (0=training, 1=testing)"] == 0][df_filtered["Lesion status (0 = negative, 1= malignancy, 2= benign)"] == 1]["Laterality (1=right, 2=left)"].value_counts()
  test_lesion_laterality = df_filtered[df_filtered["Data split (0=training, 1=testing)"] == 1][df_filtered["Lesion status (0 = negative, 1= malignancy, 2= benign)"] == 1]["Laterality (1=right, 2=left)"].value_counts()


# Move Test Data to Separate Folder

In [49]:
import os
import shutil

# Define directories
parent_directory = "/ess/scratch/scratch1/rachelgordon/pre_contrast_fs/cs_maps"  # Directory containing all patient folders
destination_directory = "/ess/scratch/scratch1/rachelgordon/test_data/pre_contrast_fs/cs_maps"  # Where test patient folders will be moved


# Ensure the destination directory exists
os.makedirs(destination_directory, exist_ok=True)

# Move matching directories
for patient in test_patients:
    patient_dir = os.path.join(parent_directory, patient + '_1_cs_maps')
    
    # Check if directory exists before moving
    if os.path.isdir(patient_dir):
        shutil.move(patient_dir, os.path.join(destination_directory, patient + '_1_cs_maps'))
        print(f"Moved: {patient_dir} -> {destination_directory}")
    else:
        print(f"Directory not found: {patient_dir}")

print("All test patient directories have been moved.")


Moved: /ess/scratch/scratch1/rachelgordon/pre_contrast_fs/cs_maps/fastMRI_breast_006_1_cs_maps -> /ess/scratch/scratch1/rachelgordon/test_data/pre_contrast_fs/cs_maps
Moved: /ess/scratch/scratch1/rachelgordon/pre_contrast_fs/cs_maps/fastMRI_breast_011_1_cs_maps -> /ess/scratch/scratch1/rachelgordon/test_data/pre_contrast_fs/cs_maps
Moved: /ess/scratch/scratch1/rachelgordon/pre_contrast_fs/cs_maps/fastMRI_breast_017_1_cs_maps -> /ess/scratch/scratch1/rachelgordon/test_data/pre_contrast_fs/cs_maps
Moved: /ess/scratch/scratch1/rachelgordon/pre_contrast_fs/cs_maps/fastMRI_breast_018_1_cs_maps -> /ess/scratch/scratch1/rachelgordon/test_data/pre_contrast_fs/cs_maps
Moved: /ess/scratch/scratch1/rachelgordon/pre_contrast_fs/cs_maps/fastMRI_breast_019_1_cs_maps -> /ess/scratch/scratch1/rachelgordon/test_data/pre_contrast_fs/cs_maps
Moved: /ess/scratch/scratch1/rachelgordon/pre_contrast_fs/cs_maps/fastMRI_breast_021_1_cs_maps -> /ess/scratch/scratch1/rachelgordon/test_data/pre_contrast_fs/cs_map

# Rename/Reorganize Data

In [None]:
# rename cs maps directories
import os

# Define the parent directory containing the subdirectories
parent_directory = "/ess/scratch/scratch1/rachelgordon/complex_fully_sampled/cs_maps"  # Change this to your directory

# Iterate through all items in the directory
for dir_name in os.listdir(parent_directory):
    old_path = os.path.join(parent_directory, dir_name)

    # Ensure it is a directory
    if os.path.isdir(old_path):
        # Check if it already ends with '_cs_maps'
        if not dir_name.endswith("_cs_maps"):
            new_name = dir_name + "_cs_maps"
            new_path = os.path.join(parent_directory, new_name)

            # Rename the directory
            os.rename(old_path, new_path)
            print(f"Renamed: {dir_name} -> {new_name}")

print("Renaming completed.")


Renamed: fastMRI_breast_096_1 -> fastMRI_breast_096_1_cs_maps
Renamed: fastMRI_breast_050_1 -> fastMRI_breast_050_1_cs_maps
Renamed: fastMRI_breast_141_1 -> fastMRI_breast_141_1_cs_maps
Renamed: fastMRI_breast_251_1 -> fastMRI_breast_251_1_cs_maps
Renamed: fastMRI_breast_111_1 -> fastMRI_breast_111_1_cs_maps
Renamed: fastMRI_breast_014_1 -> fastMRI_breast_014_1_cs_maps
Renamed: fastMRI_breast_032_1 -> fastMRI_breast_032_1_cs_maps
Renamed: fastMRI_breast_004_1 -> fastMRI_breast_004_1_cs_maps
Renamed: fastMRI_breast_270_1 -> fastMRI_breast_270_1_cs_maps
Renamed: fastMRI_breast_161_1 -> fastMRI_breast_161_1_cs_maps
Renamed: fastMRI_breast_040_1 -> fastMRI_breast_040_1_cs_maps
Renamed: fastMRI_breast_173_1 -> fastMRI_breast_173_1_cs_maps
Renamed: fastMRI_breast_288_1 -> fastMRI_breast_288_1_cs_maps
Renamed: fastMRI_breast_091_1 -> fastMRI_breast_091_1_cs_maps
Renamed: fastMRI_breast_179_1 -> fastMRI_breast_179_1_cs_maps
Renamed: fastMRI_breast_285_1 -> fastMRI_breast_285_1_cs_maps
Renamed: