In [1]:
import pandas as pd
import re

In [2]:

def get_subject_dict(file_path):
    """
    Reads a file line-by-line, extracts three dice scores (TC, WT, ET) from lines
    in bracket format (e.g., '[0.90, 0.85, 0.88]'), and associates them with 
    subject IDs found in lines beginning with './prediction_results' that match 
    pattern 'BraTS-\\w+-\\d+-\\d+'.

    :param file_path: Path to the text file containing dice scores and subjects.
    :return: Dictionary mapping subject_id -> [TC_score, WT_score, ET_score]
    """
    subject_dict = {}
    # We'll store the last dice scores found to assign them to the next subject line
    last_scores = None

    # Compile a regex to extract subject from lines that include './prediction_results'
    subject_regex = re.compile(r'BraTS-\w+-\d+-\d+')

    with open(file_path, 'r') as f:
        for line in f:
            line = line.strip()

            # Check if this line contains bracketed dice scores
            if line.startswith("[") and line.endswith("]"):
                # Parse the three scores into a list of floats
                scores_str = line.strip("[]")
                scores_str_list = scores_str.split(",")
                # Convert each string to float (trim spaces if necessary)
                last_scores = [float(x.strip()) for x in scores_str_list]

            # Check if this line contains a subject path
            elif line.startswith("./prediction_results"):
                match = subject_regex.search(line)
                if match and last_scores is not None:
                    subject_id = match.group()
                    # Assign the previously read dice scores to this subject
                    subject_dict[subject_id] = last_scores

    return subject_dict


In [3]:
combined_path = r"C:\Users\abjaw\OneDrive\Documents\GitHub\mshead_3d\playground\brats_organwise_score\combined_sizes.csv"
test_log_path = r"C:\Users\abjaw\OneDrive\Documents\GitHub\mshead_3d\playground\brats_organwise_score\test_brats_residual_up_idwt_dec.59391381.txt"
# read the csv file
df = pd.read_csv(combined_path)
# get the subject dictionary
subject_dict = get_subject_dict(test_log_path)

subject_df = pd.DataFrame.from_dict(subject_dict, orient='index', columns=['TC_Dice', 'WT_Dice', 'ET_Dice'])
subject_df.reset_index(inplace=True)
subject_df.rename(columns={'index': 'Case Name'}, inplace=True)

# NaN check
print(subject_df.isnull().sum())
subject_df.head()

Case Name    0
TC_Dice      0
WT_Dice      0
ET_Dice      3
dtype: int64


Unnamed: 0,Case Name,TC_Dice,WT_Dice,ET_Dice
0,BraTS-GLI-01055-000,0.975964,0.967935,0.949857
1,BraTS-GLI-00736-000,0.94634,0.827341,0.89978
2,BraTS-GLI-01115-000,0.979522,0.951861,0.970134
3,BraTS-GLI-01272-000,0.952981,0.938604,0.895306
4,BraTS-GLI-01392-000,0.994134,0.973888,0.98488


In [13]:
# create new column in dataframe named ET_category
df['ET_category'] = 'NaN'
# create new column in dataframe named WT_category
df['WT_category'] = 'Nan'
# create new column in dataframe named TC_category
df['TC_category'] = 'Nan'

small_threshold = df["ET size"].quantile(0.33) # 33rd percentile
medium_threshold = df["ET size"].quantile(0.66) # 66th percentile
print("ET Thresholds: ", small_threshold, medium_threshold)
# Categorizing into Small, Medium, Large
df.loc[df["ET size"] < small_threshold, "ET_category"] = "S"
df.loc[(df["ET size"] >= small_threshold) & (df["ET size"] < medium_threshold), "ET_category"] = "M"
df.loc[df["ET size"] >= medium_threshold, "ET_category"] = "L"

# do the same for WT and TC
small_threshold = df["WT size"].quantile(0.33) # 33rd percentile
medium_threshold = df["WT size"].quantile(0.66) # 66th percentile
print("WT Thresholds: ", small_threshold, medium_threshold)

df.loc[df["WT size"] < small_threshold, "WT_category"] = "S"
df.loc[(df["WT size"] >= small_threshold) & (df["WT size"] < medium_threshold), "WT_category"] = "M"
df.loc[df["WT size"] >= medium_threshold, "WT_category"] = "L"

small_threshold = df["TC size"].quantile(0.33) # 33rd percentile
medium_threshold = df["TC size"].quantile(0.66) # 66th percentile
print("TC Thresholds: ", small_threshold, medium_threshold)

df.loc[df["TC size"] < small_threshold, "TC_category"] = "S"
df.loc[(df["TC size"] >= small_threshold) & (df["TC size"] < medium_threshold), "TC_category"] = "M"
df.loc[df["TC size"] >= medium_threshold, "TC_category"] = "L"

df.head()

# save the dataframe to a csv file
df.to_csv("Size_wise_categories.csv", index=False)


ET Thresholds:  12660.14 29463.86
WT Thresholds:  70989.13 118825.38
TC Thresholds:  20574.2 44159.9


In [5]:
# combine the two dataframes based on the Case Name
combined_df = pd.merge(df, subject_df, on='Case Name') 
# combined_df['ET_Dice'] = combined_df['ET_Dice'].fillna(0)
# combined_df['WT_Dice'] = combined_df['WT_Dice'].fillna(0)
# combined_df['TC_Dice'] = combined_df['TC_Dice'].fillna(0)
combined_df.head()
# generate csv for combined_df
# combined_df.to_csv('combined_df.csv', index=False)

Unnamed: 0,Case Name,TC size,WT size,ET size,ET_category,WT_category,TC_category,TC_Dice,WT_Dice,ET_Dice
0,BraTS-GLI-01295-000,24987.0,38684.0,15671.0,M,S,M,0.972407,0.961154,0.946566
1,BraTS-GLI-00099-001,16998.0,97770.0,8384.0,S,M,S,0.973439,0.943915,0.931894
2,BraTS-GLI-00568-000,63310.0,224832.0,42721.0,L,L,L,0.96897,0.965737,0.948388
3,BraTS-GLI-01035-000,905.0,3745.0,800.0,S,S,S,0.883316,0.814574,0.852891
4,BraTS-GLI-00736-000,16797.0,44711.0,11613.0,S,S,S,0.94634,0.827341,0.89978


In [12]:
mean_et = combined_df.groupby('ET_category')['ET_Dice'].mean()
print(combined_df['ET_category'].value_counts())

mean_et = combined_df.groupby('ET_category')['ET_Dice'].mean()
print(mean_et)

mean_et_from_df = combined_df['ET_Dice'].mean()
print(mean_et_from_df)


mean_wt = combined_df.groupby('WT_category')['WT_Dice'].mean()
print(combined_df['WT_category'].value_counts())

mean_wt = combined_df.groupby('WT_category')['WT_Dice'].mean()
print(mean_wt)

mean_wt_from_df = combined_df['WT_Dice'].mean()
print(mean_wt_from_df)

mean_tc = combined_df.groupby('TC_category')['TC_Dice'].mean()
print(combined_df['TC_category'].value_counts())

mean_tc = combined_df.groupby('TC_category')['TC_Dice'].mean()
print(mean_tc)

mean_tc_from_df = combined_df['TC_Dice'].mean()
print(mean_tc_from_df)



ET_category
L    85
S    83
M    82
Name: count, dtype: int64
ET_category
L    0.932000
M    0.926672
S    0.818350
Name: ET_Dice, dtype: float64
0.8934213828651077
WT_category
L    85
S    83
M    82
Name: count, dtype: int64
WT_category
L    0.952419
M    0.945777
S    0.914077
Name: WT_Dice, dtype: float64
0.937510774825614
TC_category
L    85
S    83
M    82
Name: count, dtype: int64
TC_category
L    0.942590
M    0.954285
S    0.859221
Name: TC_Dice, dtype: float64
0.9187473466995455
