In [1]:
import pandas as pd 
import numpy as np

In [2]:
methods = ['CTBET', 'Robust-CTBET', 'SynthStrip', 'HD-CTBET', 'CT_BET', 'Brainchop', 'CTbet_Docker']
exclude_prefixes = ("6046", "6084", "6096", "6246", "6315", "6342", "6499")

brainchop_exclude = ['6109-317_20150302_0647_ct.png', '6142-308_20150610_0707_ct.png', '6193-324_20150924_1431_ct.png', '6257-335_20160118_1150_ct.png',
                     '6418-193_20161228_1248_ct.png', '6470-296_20170602_0607_ct.png', '6480-154_20170622_0937_ct.png']

dockerctbet_exclude = ['6109-317_20150302_0647_ct.png', '6142-308_20150610_0707_ct.png', '6193-324_20150924_1431_ct.png', '6257-335_20160118_1150_ct.png',
                       '6418-193_20161228_1248_ct.png', '6470-296_20170602_0607_ct.png', '6480-154_20170622_0937_ct.png']

all_results = []

for method in methods:
    print(f"Processing method: {method}")
    # Read the single CSV file with the new columns
    csv_file = pd.read_csv(f'/Users/rushil/brain_extraction/data/qc/{method}/{method}_final_v2.csv')
    if method == 'Brainchop':
        csv_file = csv_file[~csv_file['Filename'].isin(brainchop_exclude)]
        print(f"Filtered out {len(brainchop_exclude)} files from brainchop")
        
    if method == 'CTbet_Docker':
        csv_file = csv_file[~csv_file['Filename'].isin(dockerctbet_exclude)]
        print(f"Filtered out {len(dockerctbet_exclude)} files from dockerctbet")    

    
    # Apply exclusions based on filename prefixes
    csv_file["basename"] = csv_file["Filename"].str.split("/").str[-1]

    # Apply exclusions on the basename
    mask = ~csv_file["basename"].str.startswith(exclude_prefixes)
    print(f"Excluding {len(csv_file) - mask.sum()} files from {method}")
    csv_file = csv_file[mask]
            
    # Define the failure columns to check
    failure_columns = ['1 - Volumetrics', '2 - Registration', '3 - DL']
    available_columns = [col for col in failure_columns if col in csv_file.columns]
    print(f"Available failure columns for {method}: {available_columns}")
    
    if method != 'CT_BET':
        total_count = 499
    else:
        total_count = 461
        
    csv_file['Subject_ID'] = csv_file['Filename'].apply(lambda x: x.split('_')[0])
    csv_file['Scan_Date'] = csv_file['Filename'].apply(lambda x: x.split('_')[1])
    csv_file['Scan_Time'] = csv_file['Filename'].apply(lambda x: x.split('_')[2])
    csv_file = csv_file.groupby('Subject_ID').agg({
        '1 - Volumetrics': lambda x: 'yes' if 'yes' in x.values else 'no',
        '2 - Registration': lambda x: 'yes' if 'yes' in x.values else 'no',
        '3 - DL': lambda x: 'yes' if 'yes' in x.values else 'no'
    }).reset_index()
    
    # Count failures for each individual column
    volumetrics_count = len(csv_file[csv_file['1 - Volumetrics'] == 'yes']) if '1 - Volumetrics' in available_columns else 0
    registration_count = len(csv_file[csv_file['2 - Registration'] == 'yes']) if '2 - Registration' in available_columns else 0
    dl_count = len(csv_file[csv_file['3 - DL'] == 'yes']) if '3 - DL' in available_columns else 0
    
    # Count total failures (any failure in any column)
    total_failures_count = len(csv_file[csv_file[available_columns].eq('yes').any(axis=1)]) if available_columns else 0
    
    # Count multiple failures (failures in 2 or more columns)
    yes_counts_per_row = csv_file[available_columns].eq('yes').sum(axis=1) if available_columns else pd.Series([0] * len(csv_file))
    multiple_failures_count = len(csv_file[yes_counts_per_row >= 2])
    
    # Create results dataframe
    df = pd.DataFrame({
        'Method': [method],
        'Total_Count': total_count,
        'Total_Failure_Count': [total_failures_count],
        'Total_Failure_Rate': [total_failures_count / total_count * 100 if total_count > 0 else 0],
        'Volumetrics_Count': [volumetrics_count],
        'Volumetrics_Rate': [volumetrics_count / total_count * 100 if total_count > 0 else 0],
        'Registration_Count': [registration_count],
        'Registration_Rate': [registration_count / total_count * 100 if total_count > 0 else 0],
        'DL_Count': [dl_count],
        'DL_Rate': [dl_count / total_count * 100 if total_count > 0 else 0],
        'Multiple_Failures_Count': [multiple_failures_count],
        'Multiple_Failures_Rate': [multiple_failures_count / total_count * 100 if total_count > 0 else 0]
    })
    
    all_results.append(df)

# Combine all results
final_results = pd.concat(all_results, ignore_index=True)

# Define column order
column_order = [
    "Method",
    "Total_Count",
    "Total_Failure_Count", "Total_Failure_Rate",
    "Volumetrics_Count", "Volumetrics_Rate",
    "Registration_Count", "Registration_Rate",
    "DL_Count", "DL_Rate",
    "Multiple_Failures_Count", "Multiple_Failures_Rate"
]

final_results = final_results[column_order]
final_results.to_csv('/Users/rushil/brain_extraction/results/qualitative/subj_lvl/Rushil_QC_subj_results.csv', index=False)
print("Results saved to CSV file")
final_results

Processing method: CTBET
Excluding 2 files from CTBET
Available failure columns for CTBET: ['1 - Volumetrics', '2 - Registration', '3 - DL']
Processing method: Robust-CTBET
Excluding 0 files from Robust-CTBET
Available failure columns for Robust-CTBET: ['1 - Volumetrics', '2 - Registration', '3 - DL']
Processing method: SynthStrip
Excluding 40 files from SynthStrip
Available failure columns for SynthStrip: ['1 - Volumetrics', '2 - Registration', '3 - DL']
Processing method: HD-CTBET
Excluding 15 files from HD-CTBET
Available failure columns for HD-CTBET: ['1 - Volumetrics', '2 - Registration', '3 - DL']
Processing method: CT_BET
Excluding 5 files from CT_BET
Available failure columns for CT_BET: ['1 - Volumetrics', '2 - Registration', '3 - DL']
Processing method: Brainchop
Filtered out 7 files from brainchop
Excluding 5 files from Brainchop
Available failure columns for Brainchop: ['1 - Volumetrics', '2 - Registration', '3 - DL']
Processing method: CTbet_Docker
Filtered out 7 files fro

Unnamed: 0,Method,Total_Count,Total_Failure_Count,Total_Failure_Rate,Volumetrics_Count,Volumetrics_Rate,Registration_Count,Registration_Rate,DL_Count,DL_Rate,Multiple_Failures_Count,Multiple_Failures_Rate
0,CTBET,499,169,33.867735,168,33.667335,118,23.647295,126,25.250501,126,25.250501
1,Robust-CTBET,499,14,2.805611,14,2.805611,8,1.603206,9,1.803607,9,1.803607
2,SynthStrip,499,308,61.723447,307,61.523046,136,27.254509,182,36.472946,190,38.076152
3,HD-CTBET,499,105,21.042084,105,21.042084,0,0.0,0,0.0,0,0.0
4,CT_BET,461,315,68.329718,314,68.112798,249,54.013015,126,27.331887,271,58.785249
5,Brainchop,499,222,44.488978,222,44.488978,167,33.466934,60,12.024048,167,33.466934
6,CTbet_Docker,499,52,10.420842,52,10.420842,22,4.408818,28,5.611222,38,7.61523


In [3]:
# Craniotomy Subject-Level Analysis
methods = ['CTBET', 'Robust-CTBET', 'SynthStrip', 'HD-CTBET', 'CT_BET', 'Brainchop', 'CTbet_Docker']
exclude_prefixes = ("6046", "6084", "6096", "6246", "6315", "6342", "6499")

brainchop_exclude = ['6109-317_20150302_0647_ct.png', '6142-308_20150610_0707_ct.png', '6193-324_20150924_1431_ct.png', '6257-335_20160118_1150_ct.png',
                     '6418-193_20161228_1248_ct.png', '6470-296_20170602_0607_ct.png', '6480-154_20170622_0937_ct.png']

dockerctbet_exclude = ['6109-317_20150302_0647_ct.png', '6142-308_20150610_0707_ct.png', '6193-324_20150924_1431_ct.png', '6257-335_20160118_1150_ct.png',
                       '6418-193_20161228_1248_ct.png', '6470-296_20170602_0607_ct.png', '6480-154_20170622_0937_ct.png']

craniotomy_results = []

for method in methods:
    print(f"Processing craniotomy subject-level results for method: {method}")
    # Read the CSV file with the new columns
    csv_file = pd.read_csv(f'/Users/rushil/brain_extraction/data/qc/{method}/{method}_final_v2.csv')
    
    if method == 'Brainchop':
        csv_file = csv_file[~csv_file['Filename'].isin(brainchop_exclude)]
        print(f"Filtered out {len(brainchop_exclude)} files from brainchop")
        
    if method == 'CTbet_Docker':
        csv_file = csv_file[~csv_file['Filename'].isin(dockerctbet_exclude)]
        print(f"Filtered out {len(dockerctbet_exclude)} files from dockerctbet")    

    # Apply exclusions based on filename prefixes
    mask = ~csv_file['Filename'].str.startswith(exclude_prefixes)
    csv_file = csv_file[mask]
    
    # Filter for craniotomy cases only
    craniotomy_cases = csv_file[csv_file['craniotomy'] == 1]
    print(f"Found {len(craniotomy_cases)} craniotomy scans for {method}")
    
    # Define the failure columns to check
    failure_columns = ['1 - Volumetrics', '2 - Registration', '3 - DL']
    available_columns = [col for col in failure_columns if col in craniotomy_cases.columns]
    print(f"Available failure columns for {method}: {available_columns}")
    
    # Set total count based on method (subject-level counts)
    if method != 'CT_BET':
        total_count = 14  # Total subjects with craniotomy
    else:
        total_count = 7   # Total subjects with craniotomy for ctbet
    
    # Extract subject information and aggregate at subject level
    craniotomy_cases['Subject_ID'] = craniotomy_cases['Filename'].apply(lambda x: x.split('_')[0])
    craniotomy_cases['Scan_Date'] = craniotomy_cases['Filename'].apply(lambda x: x.split('_')[1])
    craniotomy_cases['Scan_Time'] = craniotomy_cases['Filename'].apply(lambda x: x.split('_')[2])
    
    # Group by subject and aggregate (if any scan for a subject has failure, subject has failure)
    craniotomy_subj = craniotomy_cases.groupby('Subject_ID').agg({
        '1 - Volumetrics': lambda x: 'yes' if 'yes' in x.values else 'no',
        '2 - Registration': lambda x: 'yes' if 'yes' in x.values else 'no',
        '3 - DL': lambda x: 'yes' if 'yes' in x.values else 'no'
    }).reset_index()
    
    print(f"Found {len(craniotomy_subj)} unique subjects with craniotomy for {method}")
    
    # Count failures for each individual column
    volumetrics_count = len(craniotomy_subj[craniotomy_subj['1 - Volumetrics'] == 'yes']) if '1 - Volumetrics' in available_columns else 0
    registration_count = len(craniotomy_subj[craniotomy_subj['2 - Registration'] == 'yes']) if '2 - Registration' in available_columns else 0
    dl_count = len(craniotomy_subj[craniotomy_subj['3 - DL'] == 'yes']) if '3 - DL' in available_columns else 0
    
    # Count total failures (any failure in any column)
    total_failures_count = len(craniotomy_subj[craniotomy_subj[available_columns].eq('yes').any(axis=1)]) if available_columns else 0
    
    # Count multiple failures (failures in 2 or more columns)
    yes_counts_per_row = craniotomy_subj[available_columns].eq('yes').sum(axis=1) if available_columns else pd.Series([0] * len(craniotomy_subj))
    multiple_failures_count = len(craniotomy_subj[yes_counts_per_row >= 2])
    
    # Create results dataframe
    df = pd.DataFrame({
        'Method': [method],
        'Total_Count': [total_count],
        'Total_Failure_Count': [total_failures_count],
        'Total_Failure_Rate': [total_failures_count / total_count * 100 if total_count > 0 else 0],
        'Volumetrics_Count': [volumetrics_count],
        'Volumetrics_Rate': [volumetrics_count / total_count * 100 if total_count > 0 else 0],
        'Registration_Count': [registration_count],
        'Registration_Rate': [registration_count / total_count * 100 if total_count > 0 else 0],
        'DL_Count': [dl_count],
        'DL_Rate': [dl_count / total_count * 100 if total_count > 0 else 0],
        'Multiple_Failures_Count': [multiple_failures_count],
        'Multiple_Failures_Rate': [multiple_failures_count / total_count * 100 if total_count > 0 else 0]
    })
    
    craniotomy_results.append(df)

# Combine all craniotomy results
final_craniotomy_results = pd.concat(craniotomy_results, ignore_index=True)

# Define column order
column_order = [
    "Method",
    "Total_Count",
    "Total_Failure_Count", "Total_Failure_Rate",
    "Volumetrics_Count", "Volumetrics_Rate",
    "Registration_Count", "Registration_Rate",
    "DL_Count", "DL_Rate",
    "Multiple_Failures_Count", "Multiple_Failures_Rate"
]

final_craniotomy_results = final_craniotomy_results[column_order]
final_craniotomy_results.to_csv('/Users/rushil/brain_extraction/results/qualitative/subj_lvl/Rushil_QC_craniotomy_subj_results.csv', index=False)
print("Craniotomy subject-level results saved to CSV file")
final_craniotomy_results

Processing craniotomy subject-level results for method: CTBET
Found 7 craniotomy scans for CTBET
Available failure columns for CTBET: ['1 - Volumetrics', '2 - Registration', '3 - DL']
Found 6 unique subjects with craniotomy for CTBET
Processing craniotomy subject-level results for method: Robust-CTBET
Found 0 craniotomy scans for Robust-CTBET
Available failure columns for Robust-CTBET: ['1 - Volumetrics', '2 - Registration', '3 - DL']
Found 0 unique subjects with craniotomy for Robust-CTBET
Processing craniotomy subject-level results for method: SynthStrip
Found 31 craniotomy scans for SynthStrip
Available failure columns for SynthStrip: ['1 - Volumetrics', '2 - Registration', '3 - DL']
Found 10 unique subjects with craniotomy for SynthStrip
Processing craniotomy subject-level results for method: HD-CTBET
Found 10 craniotomy scans for HD-CTBET
Available failure columns for HD-CTBET: ['1 - Volumetrics', '2 - Registration', '3 - DL']
Found 5 unique subjects with craniotomy for HD-CTBET
P

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  craniotomy_cases['Subject_ID'] = craniotomy_cases['Filename'].apply(lambda x: x.split('_')[0])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  craniotomy_cases['Scan_Date'] = craniotomy_cases['Filename'].apply(lambda x: x.split('_')[1])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  craniotomy_cases

Unnamed: 0,Method,Total_Count,Total_Failure_Count,Total_Failure_Rate,Volumetrics_Count,Volumetrics_Rate,Registration_Count,Registration_Rate,DL_Count,DL_Rate,Multiple_Failures_Count,Multiple_Failures_Rate
0,CTBET,14,4,28.571429,4,28.571429,2,14.285714,2,14.285714,3,21.428571
1,Robust-CTBET,14,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
2,SynthStrip,14,7,50.0,7,50.0,4,28.571429,5,35.714286,6,42.857143
3,HD-CTBET,14,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
4,CT_BET,7,6,85.714286,6,85.714286,4,57.142857,6,85.714286,6,85.714286
5,Brainchop,14,2,14.285714,2,14.285714,1,7.142857,0,0.0,1,7.142857
6,CTbet_Docker,14,1,7.142857,1,7.142857,0,0.0,0,0.0,0,0.0


In [4]:
# CTA Subject-Level Analysis
cta_results = []

for method in methods:
    print(f"Processing CTA subject-level results for method: {method}")
    # Read the CSV file with the new columns
    csv_file = pd.read_csv(f'/Users/rushil/brain_extraction/data/qc/{method}/{method}_final_v2.csv')
    
    if method == 'Brainchop':
        csv_file = csv_file[~csv_file['Filename'].isin(brainchop_exclude)]
        print(f"Filtered out {len(brainchop_exclude)} files from brainchop")
        
    if method == 'CTbet_Docker':
        csv_file = csv_file[~csv_file['Filename'].isin(dockerctbet_exclude)]
        print(f"Filtered out {len(dockerctbet_exclude)} files from dockerctbet")    

    # Apply exclusions based on filename prefixes
    mask = ~csv_file['Filename'].str.startswith(exclude_prefixes)
    csv_file = csv_file[mask]
    
    # Filter for CTA cases only
    cta_cases = csv_file[csv_file['cta'] == 1]
    print(f"Found {len(cta_cases)} CTA scans for {method}")
    
    # Define the failure columns to check
    failure_columns = ['1 - Volumetrics', '2 - Registration', '3 - DL']
    available_columns = [col for col in failure_columns if col in cta_cases.columns]
    print(f"Available failure columns for {method}: {available_columns}")
    
    # Set total count based on method (subject-level counts)
    if method != 'CT_BET':
        total_count = 10  # Total subjects with CTA
    else:
        total_count = 7   # Total subjects with CTA for ctbet
    
    # Extract subject information and aggregate at subject level
    cta_cases['Subject_ID'] = cta_cases['Filename'].apply(lambda x: x.split('_')[0])
    cta_cases['Scan_Date'] = cta_cases['Filename'].apply(lambda x: x.split('_')[1])
    cta_cases['Scan_Time'] = cta_cases['Filename'].apply(lambda x: x.split('_')[2])
    
    # Group by subject and aggregate (if any scan for a subject has failure, subject has failure)
    cta_subj = cta_cases.groupby('Subject_ID').agg({
        '1 - Volumetrics': lambda x: 'yes' if 'yes' in x.values else 'no',
        '2 - Registration': lambda x: 'yes' if 'yes' in x.values else 'no',
        '3 - DL': lambda x: 'yes' if 'yes' in x.values else 'no'
    }).reset_index()
    
    print(f"Found {len(cta_subj)} unique subjects with CTA for {method}")
    
    # Count failures for each individual column
    volumetrics_count = len(cta_subj[cta_subj['1 - Volumetrics'] == 'yes']) if '1 - Volumetrics' in available_columns else 0
    registration_count = len(cta_subj[cta_subj['2 - Registration'] == 'yes']) if '2 - Registration' in available_columns else 0
    dl_count = len(cta_subj[cta_subj['3 - DL'] == 'yes']) if '3 - DL' in available_columns else 0
    
    # Count total failures (any failure in any column)
    total_failures_count = len(cta_subj[cta_subj[available_columns].eq('yes').any(axis=1)]) if available_columns else 0
    
    # Count multiple failures (failures in 2 or more columns)
    yes_counts_per_row = cta_subj[available_columns].eq('yes').sum(axis=1) if available_columns else pd.Series([0] * len(cta_subj))
    multiple_failures_count = len(cta_subj[yes_counts_per_row >= 2])
    
    # Create results dataframe
    df = pd.DataFrame({
        'Method': [method],
        'Total_Count': [total_count],
        'Total_Failure_Count': [total_failures_count],
        'Total_Failure_Rate': [total_failures_count / total_count * 100 if total_count > 0 else 0],
        'Volumetrics_Count': [volumetrics_count],
        'Volumetrics_Rate': [volumetrics_count / total_count * 100 if total_count > 0 else 0],
        'Registration_Count': [registration_count],
        'Registration_Rate': [registration_count / total_count * 100 if total_count > 0 else 0],
        'DL_Count': [dl_count],
        'DL_Rate': [dl_count / total_count * 100 if total_count > 0 else 0],
        'Multiple_Failures_Count': [multiple_failures_count],
        'Multiple_Failures_Rate': [multiple_failures_count / total_count * 100 if total_count > 0 else 0]
    })
    
    cta_results.append(df)

# Combine all CTA results
final_cta_results = pd.concat(cta_results, ignore_index=True)

# Define column order
column_order = [
    "Method",
    "Total_Count",
    "Total_Failure_Count", "Total_Failure_Rate",
    "Volumetrics_Count", "Volumetrics_Rate",
    "Registration_Count", "Registration_Rate",
    "DL_Count", "DL_Rate",
    "Multiple_Failures_Count", "Multiple_Failures_Rate"
]

final_cta_results = final_cta_results[column_order]
final_cta_results.to_csv('/Users/rushil/brain_extraction/results/qualitative/subj_lvl/Rushil_QC_cta_subj_results.csv', index=False)
print("CTA subject-level results saved to CSV file")
final_cta_results

Processing CTA subject-level results for method: CTBET
Found 3 CTA scans for CTBET
Available failure columns for CTBET: ['1 - Volumetrics', '2 - Registration', '3 - DL']
Found 3 unique subjects with CTA for CTBET
Processing CTA subject-level results for method: Robust-CTBET
Found 0 CTA scans for Robust-CTBET
Available failure columns for Robust-CTBET: ['1 - Volumetrics', '2 - Registration', '3 - DL']
Found 0 unique subjects with CTA for Robust-CTBET
Processing CTA subject-level results for method: SynthStrip
Found 4 CTA scans for SynthStrip
Available failure columns for SynthStrip: ['1 - Volumetrics', '2 - Registration', '3 - DL']
Found 4 unique subjects with CTA for SynthStrip
Processing CTA subject-level results for method: HD-CTBET
Found 1 CTA scans for HD-CTBET
Available failure columns for HD-CTBET: ['1 - Volumetrics', '2 - Registration', '3 - DL']
Found 1 unique subjects with CTA for HD-CTBET
Processing CTA subject-level results for method: CT_BET
Found 6 CTA scans for CT_BET
Ava

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cta_cases['Subject_ID'] = cta_cases['Filename'].apply(lambda x: x.split('_')[0])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cta_cases['Scan_Date'] = cta_cases['Filename'].apply(lambda x: x.split('_')[1])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cta_cases['Scan_Time'] = cta_cases['Filename

Unnamed: 0,Method,Total_Count,Total_Failure_Count,Total_Failure_Rate,Volumetrics_Count,Volumetrics_Rate,Registration_Count,Registration_Rate,DL_Count,DL_Rate,Multiple_Failures_Count,Multiple_Failures_Rate
0,CTBET,10,3,30.0,3,30.0,2,20.0,2,20.0,2,20.0
1,Robust-CTBET,10,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
2,SynthStrip,10,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
3,HD-CTBET,10,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
4,CT_BET,7,5,71.428571,5,71.428571,2,28.571429,2,28.571429,3,42.857143
5,Brainchop,10,1,10.0,1,10.0,0,0.0,0,0.0,0,0.0
6,CTbet_Docker,10,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0


In [5]:
# Artifact Subject-Level Analysis
artifact_results = []

for method in methods:
    print(f"Processing artifact subject-level results for method: {method}")
    # Read the CSV file with the new columns
    csv_file = pd.read_csv(f'/Users/rushil/brain_extraction/data/qc/{method}/{method}_final_v2.csv')
    
    if method == 'Brainchop':
        csv_file = csv_file[~csv_file['Filename'].isin(brainchop_exclude)]
        print(f"Filtered out {len(brainchop_exclude)} files from brainchop")
        
    if method == 'CTbet_Docker':
        csv_file = csv_file[~csv_file['Filename'].isin(dockerctbet_exclude)]
        print(f"Filtered out {len(dockerctbet_exclude)} files from dockerctbet")    

    # Apply exclusions based on filename prefixes
    mask = ~csv_file['Filename'].str.startswith(exclude_prefixes)
    csv_file = csv_file[mask]
    
    # Filter for artifact cases only
    artifact_cases = csv_file[csv_file['artifact'] == 1]
    print(f"Found {len(artifact_cases)} artifact scans for {method}")
    
    # Define the failure columns to check
    failure_columns = ['1 - Volumetrics', '2 - Registration', '3 - DL']
    available_columns = [col for col in failure_columns if col in artifact_cases.columns]
    print(f"Available failure columns for {method}: {available_columns}")
    
    # Set total count based on method (subject-level counts)
    if method != 'CT_BET':
        total_count = 46  # Total subjects with artifact
    else:
        total_count = 27  # Total subjects with artifact for ctbet
    
    # Extract subject information and aggregate at subject level
    artifact_cases['Subject_ID'] = artifact_cases['Filename'].apply(lambda x: x.split('_')[0])
    artifact_cases['Scan_Date'] = artifact_cases['Filename'].apply(lambda x: x.split('_')[1])
    artifact_cases['Scan_Time'] = artifact_cases['Filename'].apply(lambda x: x.split('_')[2])
    
    # Group by subject and aggregate (if any scan for a subject has failure, subject has failure)
    artifact_subj = artifact_cases.groupby('Subject_ID').agg({
        '1 - Volumetrics': lambda x: 'yes' if 'yes' in x.values else 'no',
        '2 - Registration': lambda x: 'yes' if 'yes' in x.values else 'no',
        '3 - DL': lambda x: 'yes' if 'yes' in x.values else 'no'
    }).reset_index()
    
    print(f"Found {len(artifact_subj)} unique subjects with artifact for {method}")
    
    # Count failures for each individual column
    volumetrics_count = len(artifact_subj[artifact_subj['1 - Volumetrics'] == 'yes']) if '1 - Volumetrics' in available_columns else 0
    registration_count = len(artifact_subj[artifact_subj['2 - Registration'] == 'yes']) if '2 - Registration' in available_columns else 0
    dl_count = len(artifact_subj[artifact_subj['3 - DL'] == 'yes']) if '3 - DL' in available_columns else 0
    
    # Count total failures (any failure in any column)
    total_failures_count = len(artifact_subj[artifact_subj[available_columns].eq('yes').any(axis=1)]) if available_columns else 0
    
    # Count multiple failures (failures in 2 or more columns)
    yes_counts_per_row = artifact_subj[available_columns].eq('yes').sum(axis=1) if available_columns else pd.Series([0] * len(artifact_subj))
    multiple_failures_count = len(artifact_subj[yes_counts_per_row >= 2])
    
    # Create results dataframe
    df = pd.DataFrame({
        'Method': [method],
        'Total_Count': [total_count],
        'Total_Failure_Count': [total_failures_count],
        'Total_Failure_Rate': [total_failures_count / total_count * 100 if total_count > 0 else 0],
        'Volumetrics_Count': [volumetrics_count],
        'Volumetrics_Rate': [volumetrics_count / total_count * 100 if total_count > 0 else 0],
        'Registration_Count': [registration_count],
        'Registration_Rate': [registration_count / total_count * 100 if total_count > 0 else 0],
        'DL_Count': [dl_count],
        'DL_Rate': [dl_count / total_count * 100 if total_count > 0 else 0],
        'Multiple_Failures_Count': [multiple_failures_count],
        'Multiple_Failures_Rate': [multiple_failures_count / total_count * 100 if total_count > 0 else 0]
    })
    
    artifact_results.append(df)

# Combine all artifact results
final_artifact_results = pd.concat(artifact_results, ignore_index=True)

# Define column order
column_order = [
    "Method",
    "Total_Count",
    "Total_Failure_Count", "Total_Failure_Rate",
    "Volumetrics_Count", "Volumetrics_Rate",
    "Registration_Count", "Registration_Rate",
    "DL_Count", "DL_Rate",
    "Multiple_Failures_Count", "Multiple_Failures_Rate"
]

final_artifact_results = final_artifact_results[column_order]
final_artifact_results.to_csv('/Users/rushil/brain_extraction/results/qualitative/subj_lvl//Rushil_QC_artifact_subj_results.csv', index=False)
print("Artifact subject-level results saved to CSV file")
final_artifact_results

Processing artifact subject-level results for method: CTBET
Found 12 artifact scans for CTBET
Available failure columns for CTBET: ['1 - Volumetrics', '2 - Registration', '3 - DL']
Found 11 unique subjects with artifact for CTBET
Processing artifact subject-level results for method: Robust-CTBET
Found 2 artifact scans for Robust-CTBET
Available failure columns for Robust-CTBET: ['1 - Volumetrics', '2 - Registration', '3 - DL']
Found 2 unique subjects with artifact for Robust-CTBET
Processing artifact subject-level results for method: SynthStrip
Found 47 artifact scans for SynthStrip
Available failure columns for SynthStrip: ['1 - Volumetrics', '2 - Registration', '3 - DL']
Found 40 unique subjects with artifact for SynthStrip
Processing artifact subject-level results for method: HD-CTBET
Found 14 artifact scans for HD-CTBET
Available failure columns for HD-CTBET: ['1 - Volumetrics', '2 - Registration', '3 - DL']
Found 12 unique subjects with artifact for HD-CTBET
Processing artifact su

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  artifact_cases['Subject_ID'] = artifact_cases['Filename'].apply(lambda x: x.split('_')[0])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  artifact_cases['Scan_Date'] = artifact_cases['Filename'].apply(lambda x: x.split('_')[1])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  artifact_cases['Scan_Tim

Unnamed: 0,Method,Total_Count,Total_Failure_Count,Total_Failure_Rate,Volumetrics_Count,Volumetrics_Rate,Registration_Count,Registration_Rate,DL_Count,DL_Rate,Multiple_Failures_Count,Multiple_Failures_Rate
0,CTBET,46,8,17.391304,8,17.391304,6,13.043478,7,15.217391,7,15.217391
1,Robust-CTBET,46,2,4.347826,2,4.347826,2,4.347826,2,4.347826,2,4.347826
2,SynthStrip,46,27,58.695652,27,58.695652,15,32.608696,19,41.304348,20,43.478261
3,HD-CTBET,46,3,6.521739,3,6.521739,0,0.0,0,0.0,0,0.0
4,CT_BET,27,17,62.962963,17,62.962963,12,44.444444,11,40.740741,14,51.851852
5,Brainchop,46,20,43.478261,20,43.478261,17,36.956522,6,13.043478,17,36.956522
6,CTbet_Docker,46,3,6.521739,3,6.521739,2,4.347826,1,2.173913,3,6.521739
