In [1]:
!pip install pandas
!pip install numpy
!pip install pydicom
!pip install matplotlib
!pip install pillow
!pip install dotenv


%cd /home/azureuser/cloudfiles/code/rwep_experiments/jsam/mtb_sample_data/libs/sam_mtb_utils/
!pip install -e .

Collecting pydicom
  Downloading pydicom-3.0.1-py3-none-any.whl (2.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m40.3 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hInstalling collected packages: pydicom
Successfully installed pydicom-3.0.1
/mnt/batch/tasks/shared/LS_root/mounts/clusters/alyssa-test/code/rwep_experiments/jsam/mtb_sample_data/libs/sam_mtb_utils
Obtaining file:///mnt/batch/tasks/shared/LS_root/mounts/clusters/alyssa-test/code/rwep_experiments/jsam/mtb_sample_data/libs/sam_mtb_utils
  Installing build dependencies ... [?25ldone
[?25h  Checking if build backend supports build_editable ... [?25ldone
[?25h  Getting requirements to build editable ... [?25ldone
[?25h  Preparing editable metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: sam_mtb_utils
  Building editable for sam_mtb_utils (pyproject.toml) ... [?25ldone
[?25h  Created wheel for sam_mtb_utils: filename=sam_mtb_utils-0.1.0-0.editable-p

In [2]:
import pandas as pd
from pathlib import Path

import numpy as np


def read_parquet_from_directory(parquet_dir):
    """
    Reads the Parquet file from the specified directory.

    Args:
        base_path (str): The base path where the directories are located.
        directory_name (str): The name of the directory containing the Parquet file.

    Returns:
        pd.DataFrame: The data from the Parquet file as a pandas DataFrame.
    """
    file_frames = []
    # parquet_dir = base_path / directory_name
    for file in Path(parquet_dir).glob("*.parquet"):
        df = pd.read_parquet(file)
        file_frames.append(df)
    if file_frames:
        return pd.concat(file_frames, ignore_index=True)
    raise FileNotFoundError(f"No Parquet files found in {parquet_dir}")

In [3]:
diagnosis = "/home/azureuser/data/rwesdataforge/protege/2025-06-16-clinical_cohort_2196total+mtb_pathnotes/protege/clinical-cohort/structured-ehr/DD48_DIAGNOSIS"
df = read_parquet_from_directory(diagnosis)
lung_patients = df[df['DX_CODE'] == 'C34']

# Then exclude small cell lung cancer from histology
nsclc_patients1 = lung_patients[
    ~lung_patients['HISTOLOGY'].str.contains('small cell', case=False, na=False)
]

print(f"Number of patients with non-small cell lung cancer: {len(nsclc_patients1)}")
target_patient_ids = nsclc_patients1['PTID'].unique()

Number of patients with non-small cell lung cancer: 1464


In [5]:
parquet_dir="/home/azureuser/data/rwesdataforge/protege/2025-06-16-clinical_cohort_2196total+mtb_pathnotes/protege/clinical-cohort/structured-ehr/"

#iterate through subdirectories and read parquet files
all_dataframes = []
if not Path(parquet_dir).exists():
    raise FileNotFoundError(f"The directory {parquet_dir} does not exist.")
for subdir in Path(parquet_dir).iterdir():
    if subdir.is_dir():
        print(f"Reading Parquet files from {subdir}")
        name = str(subdir).split('/')[-1]
        try:
            file_frames = read_parquet_from_directory(subdir)
            # Filter the DataFrame to include only target patients
            file_frames = file_frames[file_frames['PTID'].isin(target_patient_ids)]
            #collapse the dataframe so that there is only one row per patient, combinging all columns   
            file_frames = file_frames.groupby('PTID').agg(
    lambda x: ', '.join([str(val) for val in x.dropna() if pd.notna(val)])
).reset_index()
            #add file name to the data frame columns as an "_name" suffix
            file_frames.columns = [f"{col}_{name}" if col != 'PTID' else col for col in file_frames.columns]
            # Append the filtered DataFrame to the list
            if file_frames.empty:
                print(f"No relevant data found in {subdir} for target patients.")
                continue
            all_dataframes.append(file_frames)
            print(f"Successfully read {len(file_frames)} rows from {subdir}")
        except FileNotFoundError as e:
            print(e)

Reading Parquet files from /home/azureuser/data/rwesdataforge/protege/2025-06-16-clinical_cohort_2196total+mtb_pathnotes/protege/clinical-cohort/structured-ehr/DD48_BIOMARKER


Successfully read 1451 rows from /home/azureuser/data/rwesdataforge/protege/2025-06-16-clinical_cohort_2196total+mtb_pathnotes/protege/clinical-cohort/structured-ehr/DD48_BIOMARKER
Reading Parquet files from /home/azureuser/data/rwesdataforge/protege/2025-06-16-clinical_cohort_2196total+mtb_pathnotes/protege/clinical-cohort/structured-ehr/DD48_COMORBIDITIES
Successfully read 1451 rows from /home/azureuser/data/rwesdataforge/protege/2025-06-16-clinical_cohort_2196total+mtb_pathnotes/protege/clinical-cohort/structured-ehr/DD48_COMORBIDITIES
Reading Parquet files from /home/azureuser/data/rwesdataforge/protege/2025-06-16-clinical_cohort_2196total+mtb_pathnotes/protege/clinical-cohort/structured-ehr/DD48_COMPLETE_PATIENTS
Successfully read 1451 rows from /home/azureuser/data/rwesdataforge/protege/2025-06-16-clinical_cohort_2196total+mtb_pathnotes/protege/clinical-cohort/structured-ehr/DD48_COMPLETE_PATIENTS
Reading Parquet files from /home/azureuser/data/rwesdataforge/protege/2025-06-16-cl

In [6]:
# Script to extract columns relevant for oncological treatment decisions

import pandas as pd

def extract_oncological_decision_columns(df):
    """
    Extract columns relevant for oncological treatment decisions from the main dataframe.
    
    Parameters:
    df (pd.DataFrame): The main dataframe with all columns
    
    Returns:
    pd.DataFrame: Dataframe with only oncological decision-relevant columns
    """
    
    # Define columns used for oncological treatment decisions
    oncological_columns = [
        # Patient ID (required for linking)
        'PTID',
        
        # Primary Tumor Characteristics
        'DX_CODE_DD48_DIAGNOSIS',
        'HISTOLOGY_DD48_DIAGNOSIS', 
        'GRADE_DD48_DIAGNOSIS',
        'ANATOMIC_SITE_DD48_DIAGNOSIS',
        
        # Initial Staging Information
        'INITIAL_AJCC_STAGE_DD48_DIAGNOSIS',
        'INITIAL_T_STAGE_DD48_DIAGNOSIS',
        'INITIAL_N_STAGE_DD48_DIAGNOSIS', 
        'INITIAL_M_STAGE_DD48_DIAGNOSIS',
        'INITIAL_DX_YEAR_DD48_DIAGNOSIS',
        'INITIAL_DX_INTERVAL_DD48_DIAGNOSIS',
        
        # Metastatic Disease Information
        'METS_AJCC_STAGE_DD48_DIAGNOSIS',
        'METS_T_STAGE_DD48_DIAGNOSIS',
        'METS_N_STAGE_DD48_DIAGNOSIS',
        'METS_M_STAGE_DD48_DIAGNOSIS',
        'METDX_INTERVAL_DD48_DIAGNOSIS',
        'METS_ANATOMIC_SITE_DD48_METASTATIC_DISEASE',
        'METS_INTERVAL_DD48_METASTATIC_DISEASE',
        
        # Biomarkers (Critical for Targeted Therapy)
        'BIOMARKER_DD48_BIOMARKER',
        'BIOMARKER_RESULT_DD48_BIOMARKER',
        'TEST_RESULT_INTERVAL_DD48_BIOMARKER',
        
        # Performance Status
        'ECOG_DD48_PERFORMANCE',
        'ECOG_DT_INTERVAL_DD48_PERFORMANCE',
        'KPS_DD48_PERFORMANCE', 
        'KPS_DT_INTERVAL_DD48_PERFORMANCE',
        
        # Patient Demographics & Characteristics
        'AGE_DD48_PATIENT_CHARACTERISTICS',
        'SEX_DD48_PATIENT_CHARACTERISTICS',
        'RACE_DD48_PATIENT_CHARACTERISTICS',
        'ETHNICITY_DD48_PATIENT_CHARACTERISTICS',
        'SMOKING_STATUS_DD48_PATIENT_CHARACTERISTICS',
        'SMOKING_PACKYEARS_DD48_PATIENT_CHARACTERISTICS',
        'VITAL_STATUS_DD48_PATIENT_CHARACTERISTICS',
        'DEATH_INTERVAL_DD48_PATIENT_CHARACTERISTICS',
        'CAUSE_OF_DEATH_DD48_PATIENT_CHARACTERISTICS',
        'LAST_ALIVE_INTERVAL_DD48_PATIENT_CHARACTERISTICS',
        
        # Comorbidities (Treatment Eligibility)
        'CCI_DD48_COMORBIDITIES',
        'MI_DD48_COMORBIDITIES',
        'CHF_DD48_COMORBIDITIES',
        'PVD_DD48_COMORBIDITIES',
        'CEVD_DD48_COMORBIDITIES',
        'DEMENTIA_DD48_COMORBIDITIES',
        'CPD_DD48_COMORBIDITIES',
        'RHEUMD_DD48_COMORBIDITIES',
        'PUD_DD48_COMORBIDITIES',
        'MLD_DD48_COMORBIDITIES',
        'DIAB_DD48_COMORBIDITIES',
        'DIABWC_DD48_COMORBIDITIES',
        'HP_DD48_COMORBIDITIES',
        'REND_DD48_COMORBIDITIES',
        'CANC_DD48_COMORBIDITIES',
        'MSLD_DD48_COMORBIDITIES',
        'METACANC_DD48_COMORBIDITIES',
        'AIDS_DD48_COMORBIDITIES',
        
        # Treatment History & Response
        'ANTINEOPLASTIC_DD48_TREATMENT',
        'ANTINEOPLASTIC_START_INTERVAL_DD48_TREATMENT',
        'ANTINEOPLASTIC_END_INTERVAL_DD48_TREATMENT',
        'DISCONTINUATION_REASON_DD48_TREATMENT',
        'INTENT_DD48_TREATMENT',
        'LOT_DD48_TREATMENT',
        'RESPONSE_DD48_RESPONSE',
        'RESPONSE_INTERVAL_DD48_RESPONSE',
        
        # Surgery Information
        'SURGERY_DD48_SURGERY',
        'SURGERY_INTERVAL_DD48_SURGERY',
        'MARGINS_DD48_SURGERY',
        
        # Radiation Information
        'RADIATION_TYPE_DD48_RADIATION',
        'RADIATION_LOCATION_DD48_RADIATION',
        'DOSE_DD48_RADIATION',
        'FRACTIONS_DD48_RADIATION',
        'RADIATION_START_INTERVAL_DD48_RADIATION',
        'RADIATION_END_INTERVAL_DD48_RADIATION',
        
        # Registry Information (Additional Staging Details)
        'T_STAGE_DD48_REGISTRY',
        'N_STAGE_DD48_REGISTRY',
        'M_STAGE_DD48_REGISTRY',
        'STAGE_EDITION_DD48_REGISTRY',
        'HISTOLOGY_DD48_REGISTRY',
        'HISTOLOGY_CODE_DD48_REGISTRY',
        'GRADE_DD48_REGISTRY',
        'TUMOR_SIZE_IN_MM_DD48_REGISTRY',
        'LYMPH_VASCULAR_INVASION_LVI_DD48_REGISTRY'
    ]
    
    # Check which columns actually exist in the dataframe
    available_columns = [col for col in oncological_columns if col in df.columns]
    missing_columns = [col for col in oncological_columns if col not in df.columns]
    
    if missing_columns:
        print(f"Warning: The following columns were not found in the dataframe:")
        for col in missing_columns:
            print(f"  - {col}")
        print()
    
    print(f"Extracting {len(available_columns)} oncological decision columns out of {len(df.columns)} total columns")
    
    # Extract the relevant columns
    oncological_df = df[available_columns].copy()
    
    return oncological_df

In [7]:
# Combine all dataframes into one
combined_df = pd.concat(all_dataframes, ignore_index=True)

# Group by PTID and collapse all columns into lists
collapsed_df = (
    combined_df
    .groupby('PTID')
    .agg(lambda x: x.tolist())
    .reset_index()
)

print(f"Collapsed DataFrame has {len(collapsed_df)} unique PTIDs.")

# Remove NaNs from list-type columns
for col in collapsed_df.columns:
    if collapsed_df[col].apply(lambda x: isinstance(x, list)).all():
        collapsed_df[col] = collapsed_df[col].apply(lambda lst: [item for item in lst if pd.notna(item)])

collapsed_df


Collapsed DataFrame has 1451 unique PTIDs.


Unnamed: 0,PTID,BIOMARKER_DD48_BIOMARKER,BIOMARKER_RESULT_DD48_BIOMARKER,TEST_RESULT_INTERVAL_DD48_BIOMARKER,CCI_DD48_COMORBIDITIES,MI_DD48_COMORBIDITIES,CHF_DD48_COMORBIDITIES,PVD_DD48_COMORBIDITIES,CEVD_DD48_COMORBIDITIES,DEMENTIA_DD48_COMORBIDITIES,...,LOT_DD48_TREATMENT,ENCOUNTER_ID_DD48_VITALS,HEIGHT_DD48_VITALS,WEIGHT_DD48_VITALS,BP_SYSTOLIC_DD48_VITALS,BP_DIASTOLIC_DD48_VITALS,TEMPERATURE_DD48_VITALS,FILE_NAME_DD48_WSI_INVENTORY_FILE,PATHOLOGY_IMG_FILENAME_DD48_WSI_LIST,DEID_ACCESSION_NUMBER_DD48_WSI_LIST
0,+/eccpz6rstpkCdP17sKly0E3bV/qVD5PX7MJknt9lFAH6...,[],[],[],[5.0],[N],[N],[N],[N],[N],...,[],"[436:616475142, 436:590551040, 436:619279341, ...","[158.09, 158.12, 160.71, 160.02, 160.02]","[83.46000000000001, 82.60000000000001, 83.01, ...","[171.0, 128.0, 118.0, 126.0, 138.0, 182.0]","[84.0, 76.0, 70.0, 74.0, 80.0, 82.0]","[97.0, 98.0, 99.0]",[2070960.tif],[2070960.tif],[2070960]
1,+5F3XdH7tY8YZ8L1IGFLZHz16GqG8+4kI6Yuy1vTLjyyUo...,[],[],[],[5.0],[N],[N],[Y],[N],[N],...,[],"[309:65719226, 309:52873467, 309:59023053, 309...","[180.34, 182.88, 182.88, 182.88, 182.88, 182.8...","[72.08, 77.56, 76.2, 74.84, 78.47, 72.57000000...","[100.0, 132.0, 120.0, 92.0, 124.0, 84.0, 134.0...","[72.0, 68.0, 62.0, 66.0, 66.0, 64.0, 80.0, 79....","[97.0, 98.0, 98.0, 98.0, 97.0, 98.0, 98.0, 98....",[2054027.tif],[2054027.tif],[2054027]
2,+7i+hDlYf2iXP4HNo3mt7ls3ntKr78unyHuznyD7poO7ST...,[ALK],[NEGATIVE],[21099.0],[8.0],[N],[N],[N],[N],[N],...,"[1.0, 1.0, 1.0]","[436:631607099, 436:631623828, 436:631291468, ...","[187.96, 187.96]","[99.34, 99.88, 98.88, 105.19, 95.25]","[140.0, 125.0, 114.0, 120.0, 115.0, 106.0, 107...","[64.0, 71.0, 58.0, 56.0, 78.0, 62.0, 60.0, 68....","[98.0, 98.0, 98.0, 98.0, 98.0, 98.0, 99.0, 98....",[2055328.tif],[2055328.tif],[2055328]
3,+BV2xwvaB0HKRtNsoA/RBWf1Q32BUTH+7zSq7Y1zuy7HX0...,[PDL1],[POSITIVE],[17055.0],[2.0],[N],[N],[N],[N],[N],...,[],"[199:117161376, 199:116776145, 309:2458639, 19...",[186.0],[79.4],"[134.0, 147.0, 159.0, 161.0]","[81.0, 86.0, 102.0, 101.0]",[],[2084107.tif],[2084107.tif],[2084107]
4,+C6RiK6gNv+JLW5zZsIVE8L/byYWlIkRK/1wKgFPPU1Xj5...,[ROS1],[FUSION],[27895.0],[3.0],[N],[N],[N],[Y],[N],...,[],"[436:523868569, 436:540599473, 436:377058961]","[162.56, 162.56]","[72.62, 72.57000000000001]","[148.0, 137.0, 146.0]","[67.0, 82.0, 67.0]","[98.0, 99.0, 97.0]",[2070975.tif],[2070975.tif],[2070975]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1446,zkHcOdGrSVgzvh200NcSXV8f3cDun/sJXWz4DdmVS/6Inv...,"[ALK, BRAF, BRAF, ROS1, ROS1, BRAF]","[NEGATIVE, Mutant, No mutation, POSITIVE, NEGA...","[28944.0, 28944.0, 28944.0, 28944.0, 28944.0, ...",[7.0],[N],[N],[Y],[N],[N],...,[],"[309:74545208, 309:73012427, 309:61345922, 309...","[175.26, 176.53, 175.26, 175.26, 176.53, 175.2...","[83.01, 84.37, 88.72, 87.45, 87.5, 82.10000000...","[100.0, 112.0, 120.0, 146.0, 147.0, 110.0, 119...","[60.0, 70.0, 82.0, 80.0, 87.0, 70.0, 77.0, 77....","[97.0, 98.0, 97.0, 98.0, 98.0, 97.0, 97.0, 98....","[2050507.tif, 2055180.tif]","[2055180.tif, 2050507.tif]","[2055180, 2050507]"
1447,zkvZY8u69yrm682kmu+/w9xqhidRJ4bztZf087V3L2T8aV...,[],[],[],[2.0],[N],[N],[N],[N],[N],...,[],"[436:478943018, 436:579941389, 436:462068016, ...","[164.47, 165.1, 165.1, 164.47]","[64.91, 65.32000000000001, 65.77, 67.63, 66.59...","[122.0, 122.0, 124.0, 122.0, 130.0, 110.0, 124.0]","[70.0, 72.0, 70.0, 80.0, 80.0, 60.0, 78.0]","[98.0, 98.0]",[2070871.tif],[2070871.tif],[2070871]
1448,znxJ27FZ3LNDEjWk6wnHrm2h0GEz6zrBJHlAsAIjozvwn1...,[PDL1],[NEGATIVE],[29170.0],[3.0],[N],[N],[N],[N],[N],...,"[1.0, 1.0]","[436:431371675, 436:47530530, 436:449077620, 4...","[157.48, 154.94, 156.21, 156.21, 157.48, 154.9...","[95.71000000000001, 96.48, 79.33, 79.61, 90.58...","[110.0, 130.0, 117.0, 116.0, 104.0, 110.0, 118...","[64.0, 78.0, 58.0, 66.0, 58.0, 64.0, 70.0, 58....","[99.0, 98.0, 98.0, 98.0, 98.0, 98.0, 97.0]",[2070909.tif],[2070909.tif],[2070909]
1449,zubM7k+BaZUJaoSkZs2r5fieYjX1tmEjigytJLvO/mrkwM...,[],[],[],[4.0],[N],[N],[N],[N],[N],...,[],"[199:111423147, 199:111611471, 199:111254549, ...","[158.0, 170.0, 158.20000000000002, 157.5]","[105.10000000000001, 95.0, 103.4, 102.10000000...","[111.0, 102.0, 162.0, 173.0]","[56.0, 58.0, 77.0, 74.0]","[97.88, 98.06, 97.16, 98.24000000000001]",[2084032.tif],[2084032.tif],[2084032]


In [8]:
# Usage example:
oncological_data = extract_oncological_decision_columns(collapsed_df)
print(f"Original dataframe shape: {file_frames.shape}")
print(f"Oncological dataframe shape: {oncological_data.shape}")

# Optional: Save to file
oncological_data.to_csv('/home/azureuser/cloudfiles/code/rwep_experiments/alyssa/RLFollow_clean/data/oncological_structured_data.csv', index=False)
print("Oncological decision data saved to 'oncological_structured_data.csv'")

Extracting 79 oncological decision columns out of 165 total columns
Original dataframe shape: (1451, 3)
Oncological dataframe shape: (1451, 79)
Oncological decision data saved to 'oncological_structured_data.csv'


In [9]:
def extract_nlp_data(df):
    """
    Extract columns containing unstructured text data suitable for NLP processing.
    
    Parameters:
    df (pd.DataFrame): The main dataframe with all columns
    
    Returns:
    pd.DataFrame: Dataframe with text data and relevant metadata for NLP processing
    """
    
    # Define columns for NLP processing
    nlp_columns = [
        # Patient ID (required for linking)
        'PTID',
        
        # Imaging Reports
        'DEID_REPORT_TXT_DD48_IMAGING',
        'RESULT_INTERVAL_DD48_IMAGING',
        'REPORT_TYPE_DD48_IMAGING',
        'REPORT_SUBTYPE_DD48_IMAGING',
        'ENCOUNTER_ID_DD48_IMAGING',
        'DEID_ACCESSION_NUMBER_DD48_IMAGING',
        
        # Clinical Notes
        'DEID_NOTE_TXT_DD48_NOTES',
        'NOTE_INTERVAL_DD48_NOTES',
        'NOTE_TYPE_DD48_NOTES',
        'NOTE_SUBTYPE_DD48_NOTES',
        'ENCOUNTER_ID_DD48_NOTES',
        
        # General Reports
        'DEID_REPORT_TXT_DD48_REPORTS',
        'RESULT_INTERVAL_DD48_REPORTS',
        'REPORT_TYPE_DD48_REPORTS',
        'REPORT_SUBTYPE_DD48_REPORTS',
        'ENCOUNTER_ID_DD48_REPORTS',
        'DEID_ACCESSION_NUMBER_DD48_REPORTS',
        
        # Essential clinical context for NLP (helps with interpretation)
        'DX_CODE_DD48_DIAGNOSIS',
        'HISTOLOGY_DD48_DIAGNOSIS',
        'ANATOMIC_SITE_DD48_DIAGNOSIS',
        'INITIAL_AJCC_STAGE_DD48_DIAGNOSIS',
        'AGE_DD48_PATIENT_CHARACTERISTICS',
        'SEX_DD48_PATIENT_CHARACTERISTICS'
    ]
    
    # Check which columns actually exist in the dataframe
    available_columns = [col for col in nlp_columns if col in df.columns]
    missing_columns = [col for col in nlp_columns if col not in df.columns]
    
    if missing_columns:
        print(f"Warning: The following NLP columns were not found in the dataframe:")
        for col in missing_columns:
            print(f"  - {col}")
        print()
    
    print(f"Extracting {len(available_columns)} NLP-relevant columns out of {len(df.columns)} total columns")
    
    # Extract the relevant columns
    nlp_df = df[available_columns].copy()
    
    # Count non-null text entries for each text column
    text_columns = [col for col in available_columns if 'TXT' in col]
    print("\nText data summary:")
    for col in text_columns:
        if col in nlp_df.columns:
            non_null_count = nlp_df[col].notna().sum()
            total_count = len(nlp_df)
            print(f"  {col}: {non_null_count:,} non-null entries ({non_null_count/total_count*100:.1f}%)")
    
    return nlp_df

In [10]:
# Usage example:
nlp_data = extract_nlp_data(collapsed_df)
print(f"Original dataframe shape: {file_frames.shape}")
print(f"Oncological dataframe shape: {nlp_data.shape}")

# Optional: Save to file
nlp_data.to_csv('/home/azureuser/cloudfiles/code/rwep_experiments/alyssa/RLFollow_clean/data/unstructured_oncological_decision_data.csv', index=False)
print("Oncological decision data saved to 'unstructured_oncological_decision_data.csv'")

Extracting 24 NLP-relevant columns out of 165 total columns

Text data summary:
  DEID_REPORT_TXT_DD48_IMAGING: 1,451 non-null entries (100.0%)
  DEID_NOTE_TXT_DD48_NOTES: 1,451 non-null entries (100.0%)
  DEID_REPORT_TXT_DD48_REPORTS: 1,451 non-null entries (100.0%)
Original dataframe shape: (1451, 3)
Oncological dataframe shape: (1451, 24)
Oncological decision data saved to 'unstructured_oncological_decision_data.csv'
