In [1]:
import pandas as pd
import re

In [2]:

def get_subject_dict(file_path):
    """
    Reads a file line-by-line, extracts three dice scores (TC, WT, ET) from lines
    in bracket format (e.g., '[0.90, 0.85, 0.88]'), and associates them with 
    subject IDs found in lines beginning with './prediction_results' that match 
    pattern 'BraTS-\\w+-\\d+-\\d+'.

    :param file_path: Path to the text file containing dice scores and subjects.
    :return: Dictionary mapping subject_id -> [TC_score, WT_score, ET_score]
    """
    subject_dict = {}
    # We'll store the last dice scores found to assign them to the next subject line
    last_scores = None

    # Compile a regex to extract subject from lines that include './prediction_results'
    subject_regex = re.compile(r'BraTS-\w+-\d+-\d+')

    with open(file_path, 'r') as f:
        for line in f:
            line = line.strip()

            # Check if this line contains bracketed dice scores
            if line.startswith("[") and line.endswith("]"):
                # Parse the three scores into a list of floats
                scores_str = line.strip("[]")
                scores_str_list = scores_str.split(",")
                # Convert each string to float (trim spaces if necessary)
                last_scores = [float(x.strip()) for x in scores_str_list]

            # Check if this line contains a subject path
            elif line.startswith("./prediction_results"):
                match = subject_regex.search(line)
                if match and last_scores is not None:
                    subject_id = match.group()
                    # Assign the previously read dice scores to this subject
                    subject_dict[subject_id] = last_scores

    return subject_dict


In [3]:
combined_path = r"C:\Users\abjaw\OneDrive\Documents\GitHub\mshead_3d\playground\brats_organwise_score\combined_sizes.csv"

test_brats_ablation_max_pool = r"C:\Users\abjaw\OneDrive\Documents\GitHub\mshead_3d\playground\brats_organwise_score\test_brats_ablation_max_pool.59565521.txt"
test_brats_residual_up_idwt_dec = r"C:\Users\abjaw\OneDrive\Documents\GitHub\mshead_3d\playground\brats_organwise_score\test_brats_residual_up_idwt_dec.59391381.txt"
test_brats_residual_up_simple_ref = r"C:\Users\abjaw\OneDrive\Documents\GitHub\mshead_3d\playground\brats_organwise_score\test_brats_residual_up_simple_ref.59623506.txt"
test_brats_simple_up_idwt_dec = r"C:\Users\abjaw\OneDrive\Documents\GitHub\mshead_3d\playground\brats_organwise_score\test_brats_simple_up_idwt_dec.59614094.txt"


# read the csv file
df = pd.read_csv(combined_path)
# get the subject dictionary
subject_dict = get_subject_dict(test_brats_simple_up_idwt_dec)

subject_df = pd.DataFrame.from_dict(subject_dict, orient='index', columns=['TC_Dice', 'WT_Dice', 'ET_Dice'])
subject_df.reset_index(inplace=True)
subject_df.rename(columns={'index': 'Case Name'}, inplace=True)

# NaN check
print(subject_df.isnull().sum())
subject_df.head()
df.head()

Case Name    0
TC_Dice      1
WT_Dice      0
ET_Dice      2
dtype: int64


Unnamed: 0,Case Name,TC size,WT size,ET size
0,BraTS-GLI-01295-000,24987.0,38684.0,15671.0
1,BraTS-GLI-00099-001,16998.0,97770.0,8384.0
2,BraTS-GLI-00568-000,63310.0,224832.0,42721.0
3,BraTS-GLI-01035-000,905.0,3745.0,800.0
4,BraTS-GLI-00736-000,16797.0,44711.0,11613.0


In [4]:
# create new column in dataframe named TC_category
df['TC_category'] = 'Nan'
# create new column in dataframe named WT_category
df['WT_category'] = 'Nan'
# create new column in dataframe named ET_category
df['ET_category'] = 'NaN'


small_threshold_tc = 20000
medium_threshold_tc = 40000
# small_threshold = df["TC size"].quantile(0.33) # 33rd percentile
# medium_threshold = df["TC size"].quantile(0.66) # 66th percentile
print("TC Thresholds: ", small_threshold_tc, medium_threshold_tc)

df.loc[df["TC size"] < small_threshold_tc, "TC_category"] = "S"
df.loc[(df["TC size"] >= small_threshold_tc) & (df["TC size"] < medium_threshold_tc), "TC_category"] = "M"
df.loc[df["TC size"] >= medium_threshold_tc, "TC_category"] = "L"

# do the same for WT and TC
small_threshold = df["WT size"].quantile(0.33) # 33rd percentile
medium_threshold = df["WT size"].quantile(0.66) # 66th percentile
print("WT Thresholds: ", small_threshold, medium_threshold)

df.loc[df["WT size"] < small_threshold, "WT_category"] = "S"
df.loc[(df["WT size"] >= small_threshold) & (df["WT size"] < medium_threshold), "WT_category"] = "M"
df.loc[df["WT size"] >= medium_threshold, "WT_category"] = "L"


# use observed value
small_threshold_et = 12000
medium_threshold_et = 30000
# small_threshold = df["ET size"].quantile(0.33) # 33rd percentile
# medium_threshold = df["ET size"].quantile(0.66) # 66th percentile

print("ET Thresholds: ", small_threshold_et, medium_threshold_et)
# Categorizing into Small, Medium, Large
df.loc[df["ET size"] < small_threshold_et, "ET_category"] = "S"
df.loc[(df["ET size"] >= small_threshold_et) & (df["ET size"] < medium_threshold_et), "ET_category"] = "M"
df.loc[df["ET size"] >= medium_threshold_et, "ET_category"] = "L"



df.head()

# save the dataframe to a csv file
df.to_csv("Size_wise_categories.csv", index=False)


TC Thresholds:  20000 40000
WT Thresholds:  70989.13 118825.38
ET Thresholds:  12000 30000


In [5]:
# combine the two dataframes based on the Case Name
combined_df = pd.merge(df, subject_df, on='Case Name') 
combined_df['ET_Dice'] = combined_df['ET_Dice'].fillna(0)
combined_df['WT_Dice'] = combined_df['WT_Dice'].fillna(0)
combined_df['TC_Dice'] = combined_df['TC_Dice'].fillna(0)
combined_df.head()
# generate csv for combined_df
# combined_df.to_csv('combined_df.csv', index=False)

Unnamed: 0,Case Name,TC size,WT size,ET size,TC_category,WT_category,ET_category,TC_Dice,WT_Dice,ET_Dice
0,BraTS-GLI-01295-000,24987.0,38684.0,15671.0,M,S,M,0.96606,0.949927,0.937995
1,BraTS-GLI-00099-001,16998.0,97770.0,8384.0,S,M,S,0.895147,0.946903,0.938851
2,BraTS-GLI-00568-000,63310.0,224832.0,42721.0,L,L,L,0.949258,0.966584,0.928768
3,BraTS-GLI-01035-000,905.0,3745.0,800.0,S,S,S,0.856014,0.526647,0.828996
4,BraTS-GLI-00736-000,16797.0,44711.0,11613.0,S,S,S,0.952795,0.855557,0.909364


In [6]:
mean_et = combined_df.groupby('ET_category')['ET_Dice'].mean()
mean_et_from_df = combined_df['ET_Dice'].mean()
print(mean_et, mean_et_from_df)


mean_wt = combined_df.groupby('WT_category')['WT_Dice'].mean()
mean_wt_from_df = combined_df['WT_Dice'].mean()
print(mean_wt, mean_wt_from_df)

mean_tc = combined_df.groupby('TC_category')['TC_Dice'].mean()
mean_tc_from_df = combined_df['TC_Dice'].mean()
print(mean_tc, mean_tc_from_df)


ET_category
L    0.919181
M    0.913733
S    0.773357
Name: ET_Dice, dtype: float64 0.871139259883113
WT_category
L    0.949159
M    0.939949
S    0.906059
Name: WT_Dice, dtype: float64 0.9318287084526227
TC_category
L    0.936968
M    0.928515
S    0.838105
Name: TC_Dice, dtype: float64 0.9029650079887149
