In [1]:
import os
import pandas as pd
import datetime
import random
import numpy as np
from tqdm import tqdm
from functools import partial
from typing import NamedTuple
from collections import defaultdict, Counter
from scipy.stats.contingency import crosstab, relative_risk, odds_ratio
from c_index_util import get_censoring_dist, concordance_index
from scipy._lib._util import rng_integers

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [None]:
class Metrics(NamedTuple):
    relative_risk: float
    odds_ratio: float
    c_index: float

def format_data(data: pd.DataFrame, max_followup: int, include_age: bool=False) -> tuple:
    """
    Prepare the data for the evaluation.

    Parameters
    ----------
    data : pd.DataFrame
        The input data
    max_followup : int
        The truncation time in years

    Returns
    -------
    tuple
        A tuple of density, cancer_label, and censor_time
    """
    # set cancer date to 100 if no cancer
    data.loc[data["Cancer (YES | NO)"] == 0, "Date of Cancer Diagnosis"] = data.loc[
        data["Cancer (YES | NO)"] == 0, "Exam Date"
    ] + datetime.timedelta(days=int(100 * 365))
    if include_age:
        ages = data["Age at Exam"]
    density = np.array(data["Density"])
    date = data["Exam Date"]
    ever_has_cancer = data["Cancer (YES | NO)"]
    last_negative_date = data["Date of Last Negative Mammogram"]
    cancer_date = data["Date of Cancer Diagnosis"]
    # compute the time of censoring relative to exam date
    years_to_last_negative = np.minimum(
        (last_negative_date - date).dt.days // 365, max_followup
    )
    days_to_last_negative = (last_negative_date - date).dt.days

    # compute the time of cancer diagnosis relative to exam date or max_followup
    years_to_cancer = ((cancer_date - date).dt.days // 365).copy()

    # positives are those who developed cancer within the follow-up period
    cancer_label = (years_to_cancer <= max_followup) & ever_has_cancer

    # valid rows
    valid_rows = ((years_to_last_negative >= max_followup) &  (days_to_last_negative>0)) | (cancer_label & (years_to_cancer >= 0))

    # construct censor_time array
    censor_time = np.where(cancer_label, years_to_cancer, years_to_last_negative)

    # remove invalid rows
    density = density[valid_rows]
    cancer_label = cancer_label[valid_rows]
    censor_time = censor_time[valid_rows]
    patient_id = data["Patient ID"][valid_rows]
    if include_age:
        ages = ages[valid_rows]
        return density, cancer_label, censor_time, patient_id, ages

    return density, cancer_label, censor_time, patient_id


def calculate_metrics(density, cancer_label, censor_time, density_cutoff=3) -> Metrics:
    """
    Calculate the relative risk, odds ratio of the model.

    Parameters
    ----------
    predictions : list
        Binary predictions of risk
    labels : list
        True cancer labels
    censor_time : list
        Time of censoring
    density_cutoff : int
        The cutoff for what's considered dense. Default is 3-4.

    Returns
    -------
    Metrics
        A named tuple with the relative risk, odds ratio
        - relative_risk : float
            The relative risk of the model
        - odds_ratio : float
            The odds ratio of the model; maximum likelihood estimates of the odds ratios
    """
    censor_time = np.int32(censor_time)
    binary_density = np.int16(density >= density_cutoff)
    contingency_table = crosstab(binary_density, cancer_label)
    contingency_table_dict = {}
    dense_values, cancer_values = contingency_table.elements
    for i, dense in enumerate(dense_values):
        dense_key = "dense" if dense == 1 else "not_dense"
        for j, cancer in enumerate(cancer_values):
            cancer_key = "cancer" if cancer == 1 else "no_cancer"
            contingency_table_dict[f"{dense_key}_{cancer_key}"] = (
                contingency_table.count[i][j]
            )

    rr = relative_risk(
        exposed_cases=contingency_table_dict["dense_cancer"],
        exposed_total=contingency_table_dict["dense_cancer"]
        + contingency_table_dict["dense_no_cancer"],
        control_cases=contingency_table_dict["not_dense_cancer"],
        control_total=contingency_table_dict["not_dense_cancer"]
        + contingency_table_dict["not_dense_no_cancer"],
    )
    table = np.array(
        [
            [
                contingency_table_dict["dense_cancer"],
                contingency_table_dict["dense_no_cancer"],
            ],
            [
                contingency_table_dict["not_dense_cancer"],
                contingency_table_dict["not_dense_no_cancer"],
            ],
        ]
    )
    oratio = odds_ratio(table)

    # calculate c-index
    censoring_dist = get_censoring_dist(
        censor_time, cancer_label
    )  # NOTE: computed on test set
    normalized_density = (density - density.min()) / (density.max() - density.min())
    c_index = concordance_index(
        event_times=censor_time,
        predicted_scores=normalized_density,
        event_observed=cancer_label,
        censoring_dist=censoring_dist,
    )
    return Metrics(rr.relative_risk, oratio.statistic, c_index)


def calculate_age_metrics(
    ages, cancer_label, censor_time, age_cutoffs=[50, 60, 70]
) -> Metrics:
    """
    Calculate the relative risk, odds ratio of the model.

    Parameters
    ----------
    ages : list
        Age of the patients
    labels : list
        True cancer labels
    censor_time : list
        Time of censoring
    age_cutoffs : int
        The cutoff for what's considered positive.

    Returns
    -------
    Metrics
        A named tuple with the relative risk, odds ratio
        - relative_risk : float
            The relative risk of the model
        - odds_ratio : float
            The odds ratio of the model; maximum likelihood estimates of the odds ratios
    """
    censor_time = np.int32(censor_time)
    results = []
    for age in age_cutoffs:
        binary_ages = np.int16(ages >= age)
        contingency_table = crosstab(binary_ages, cancer_label)
        contingency_table_dict = {}
        age_values, cancer_values = contingency_table.elements
        for i, binary_age in enumerate(age_values):
            age_key = "old" if binary_age == 1 else "young"
            for j, cancer in enumerate(cancer_values):
                cancer_key = "cancer" if cancer == 1 else "no_cancer"
                contingency_table_dict[f"{age_key}_{cancer_key}"] = (
                    contingency_table.count[i][j]
                )

        rr = relative_risk(
            exposed_cases=contingency_table_dict["old_cancer"],
            exposed_total=contingency_table_dict["old_cancer"]
            + contingency_table_dict["old_no_cancer"],
            control_cases=contingency_table_dict["young_cancer"],
            control_total=contingency_table_dict["young_cancer"]
            + contingency_table_dict["young_no_cancer"],
        )
        table = np.array(
            [
                [
                    contingency_table_dict["old_cancer"],
                    contingency_table_dict["old_no_cancer"],
                ],
                [
                    contingency_table_dict["young_cancer"],
                    contingency_table_dict["young_no_cancer"],
                ],
            ]
        )
        oratio = odds_ratio(table)

        # calculate c-index
        censoring_dist = get_censoring_dist(
            censor_time, cancer_label
        )  # NOTE: computed on test set
        c_index = concordance_index(
            event_times=censor_time,
            predicted_scores=binary_ages,
            event_observed=cancer_label,
            censoring_dist=censoring_dist,
        )
        results.append(Metrics(rr.relative_risk, oratio.statistic, c_index))

    return results


def ci_bounds(mean, deltas, confidence_interval, num_resamples):
    '''
    Returns the lower and upper bounds of the confidence interval.
    mean : the empirical mean
    deltas : deltas from the empirical mean from different samples
    '''
    deltas = np.sort(deltas)
    index_offset = int((1 - confidence_interval)/2. * num_resamples)
    lower_delta, upper_delta = deltas[index_offset], deltas[-index_offset]
    lower_bound, upper_bound = mean - upper_delta, mean - lower_delta
    return lower_bound, upper_bound

def sample_two_stage_clustered_bootstrap(cluster_ids, n_bootstrap=1000, random_state=None):
    """
    Efficient two-stage clustered bootstrap using only one for loop (over bootstrap replicates).

    Parameters:
    - cluster_ids: 1D array-like with same length as data
    - n_bootstrap: number of bootstrap replicates
    - random_state: int or Generator

    Returns:
    - sampled_indices_list: list of arrays, each containing the resampled indices for a bootstrap replicate
    """
    rng = np.random.default_rng(random_state)
    cluster_ids = np.asarray(cluster_ids)

    # Map unique clusters to data indices
    unique_clusters, inverse_indices = np.unique(cluster_ids, return_inverse=True)
    n_clusters = len(unique_clusters)

    # Preallocate the array for sampled cluster IDs
    sampled_cluster_ids = rng_integers(rng, 0, n_clusters, (n_bootstrap, n_clusters)) # shape (n_bootstrap, n_clusters)

    # Create a list of index arrays, one per cluster
    cluster_to_indices = [np.flatnonzero(inverse_indices == i) for i in tqdm(range(n_clusters), ncols=100, desc="Cluster to indices", leave=False)]

    resampled_indices_list = []
    for b in tqdm(range(n_bootstrap), ncols=100, desc="Bootstrap samples", leave=False):
        # Sample clusters with replacement
        sampled_cluster_ids_step = sampled_cluster_ids[b]

        # Vectorized within-cluster resampling
        resampled_indices = np.concatenate([
            cluster_to_indices[cid][rng.integers(0, len(cluster_to_indices[cid]), size=len(cluster_to_indices[cid]))] 
            if len(cluster_to_indices[cid]) > 1  else cluster_to_indices[cid]  for cid in sampled_cluster_ids_step 
        ]) # shape (n_samples,)
        resampled_indices_list.append(resampled_indices)
    return resampled_indices_list

def get_confidence_interval(density, cancer_label, censor_time, patient_ids, confidence_level=0.95, n_bootstrap=1000, random_state=0, statistic_func = None):
    indices = sample_two_stage_clustered_bootstrap(patient_ids, n_bootstrap=n_bootstrap, random_state=random_state)

    positive_always_sampled = sum( cancer_label.iloc[i].sum() > 0 for i in indices)
    retry = 0
    while  (positive_always_sampled < n_bootstrap) and (retry < 100):
        new_indices = sample_two_stage_clustered_bootstrap(
            patient_ids, 
            n_bootstrap= n_bootstrap // 10, 
            random_state=random_state if random_state is None else random_state + 1
        )
        indices = indices + new_indices
        positive_always_sampled += sum( cancer_label.iloc[i].sum() > 0 for i in new_indices)
        retry += 1

    indices = [i for i in indices if cancer_label.iloc[i].sum() > 0][:n_bootstrap]
    
    if statistic_func == "age":
        metrics = [ calculate_age_metrics(density[i], cancer_label.iloc[i], censor_time[i], [70])[0]  for i in indices]
        emp_mean = calculate_age_metrics(density, cancer_label, censor_time, [70])[0]
    else:
        metrics = [ calculate_metrics(density[i], cancer_label.iloc[i], censor_time[i], density_cutoff=3)  for i in indices]
        emp_mean = calculate_metrics(density, cancer_label, censor_time, 3)
    
    stats = {}
    for metric_name in ['relative_risk', 'odds_ratio', 'c_index']:
        metric_bootstrap = np.array([getattr(m, metric_name) for m in metrics])
        metric_mean = getattr(emp_mean, metric_name)
        deltas = metric_bootstrap - metric_mean
        lower_bound, upper_bound = ci_bounds(metric_mean, deltas, confidence_level, n_bootstrap)
        stats[metric_name] = (metric_mean, lower_bound, upper_bound)
    return stats, len(indices)


In [None]:
NBOOTSTRAP = 1000

In [None]:
median_ages = defaultdict(list)

# MGH

In [None]:
minitable = defaultdict(list)

## MGH Data - AI

In [None]:
mgh_ai_data = pd.read_csv("mgh_ai_data.csv") # change path accordingly

In [None]:
median_ages['MGH-AI'].append( np.median(np.array(mgh_ai_data["Age at Exam"])))

In [None]:
for max_followup in [0,4]:
    for col in [
        "Exam Date",
        "Date of Last Negative Mammogram",
        "Date of Cancer Diagnosis",
    ]:
        mgh_ai_data[col] = pd.to_datetime(mgh_ai_data[col])
    density, cancer_label, censor_time, patient_ids = format_data(mgh_ai_data, max_followup)
    
    # density CI
    metrics, empirical_nboots = get_confidence_interval(density, cancer_label, censor_time, patient_ids, confidence_level=0.95, n_bootstrap=NBOOTSTRAP, random_state=None)
    minitable["max_followup"].append(max_followup+1)
    minitable["relative_risk"].append(f"{metrics['relative_risk'][0]:.2f} ({metrics['relative_risk'][1]:.2f}-{metrics['relative_risk'][2]:.2f})")
    minitable["odds_ratio"].append(f"{metrics['odds_ratio'][0]:.2f} ({metrics['odds_ratio'][1]:.2f}-{metrics['odds_ratio'][2]:.2f})")
    minitable["c_index"].append(f"{metrics['c_index'][0]:.2f} ({metrics['c_index'][1]:.2f}-{metrics['c_index'][2]:.2f})")
    minitable["group"].append("AI density")
    minitable["nbootstraps"].append(empirical_nboots)

    # age groups CI
    for group in ["30-40", "40-50", "50-60", "60-70", "70-80"]:
        try:
            age_lower, age_upper = group.split("-")
            age_lower, age_upper = int(age_lower), int(age_upper)
            group_data = mgh_ai_data[
                (mgh_ai_data["Age at Exam"] >= age_lower) & (mgh_ai_data["Age at Exam"] < age_upper)
            ]
            if len(group_data) == 0:
                continue
            density, cancer_label, censor_time, patient_ids = format_data(group_data, max_followup)
            metrics, empirical_nboots = get_confidence_interval(density, cancer_label, censor_time, patient_ids, confidence_level=0.95, n_bootstrap=NBOOTSTRAP, random_state=None)
            minitable["group"].append("AI density - Age: " + str(age_lower) + "-" + str(age_upper))
            minitable["max_followup"].append(max_followup+1)
            minitable["relative_risk"].append(f"{metrics['relative_risk'][0]:.2f} ({metrics['relative_risk'][1]:.2f}-{metrics['relative_risk'][2]:.2f})")
            minitable["odds_ratio"].append(f"{metrics['odds_ratio'][0]:.2f} ({metrics['odds_ratio'][1]:.2f}-{metrics['odds_ratio'][2]:.2f})")
            minitable["c_index"].append(f"{metrics['c_index'][0]:.2f} ({metrics['c_index'][1]:.2f}-{metrics['c_index'][2]:.2f})")
            minitable["nbootstraps"].append(empirical_nboots)
        except:
            continue       

    # race / ethnicity CI
    for group in mgh_ai_data["Ethnicity"].unique():
        if group in ["Ninguno", "Otro", "Amerindio", "Gitano (Rom)"]: continue 
        group_data = mgh_ai_data[mgh_ai_data["Ethnicity"] == group]
        density, cancer_label, censor_time, patient_ids = format_data(group_data, max_followup)
        metrics, empirical_nboots = get_confidence_interval(density, cancer_label, censor_time, patient_ids, confidence_level=0.95, n_bootstrap=NBOOTSTRAP, random_state=None)
        minitable["group"].append(f"AI density - race:{group}")
        minitable["max_followup"].append(max_followup+1)
        minitable["relative_risk"].append(f"{metrics['relative_risk'][0]:.2f} ({metrics['relative_risk'][1]:.2f}-{metrics['relative_risk'][2]:.2f})")
        minitable["odds_ratio"].append(f"{metrics['odds_ratio'][0]:.2f} ({metrics['odds_ratio'][1]:.2f}-{metrics['odds_ratio'][2]:.2f})")
        minitable["c_index"].append(f"{metrics['c_index'][0]:.2f} ({metrics['c_index'][1]:.2f}-{metrics['c_index'][2]:.2f})")
        minitable["nbootstraps"].append(empirical_nboots)



In [None]:
del mgh_ai_data

## MGH Data - Radiologist

In [None]:
mgh_rad_data = pd.read_csv("mgh_radiologist_data.csv") # change path accordingly

In [None]:
# median age
median_ages['MGH-Rad'].append( np.median(np.array(mgh_rad_data["Age at Exam"])))

In [None]:
for max_followup in [0,4]:
    for col in [
        "Exam Date",
        "Date of Last Negative Mammogram",
        "Date of Cancer Diagnosis",
    ]:
        mgh_rad_data[col] = pd.to_datetime(mgh_rad_data[col])
    density, cancer_label, censor_time, patient_ids, ages = format_data(mgh_rad_data, max_followup, include_age=True)
    
    # density CI
    metrics, empirical_nboots = get_confidence_interval(density, cancer_label, censor_time, patient_ids, confidence_level=0.95, n_bootstrap=NBOOTSTRAP, random_state=None)
    minitable["max_followup"].append(max_followup+1)
    minitable["relative_risk"].append(f"{metrics['relative_risk'][0]:.2f} ({metrics['relative_risk'][1]:.2f}-{metrics['relative_risk'][2]:.2f})")
    minitable["odds_ratio"].append(f"{metrics['odds_ratio'][0]:.2f} ({metrics['odds_ratio'][1]:.2f}-{metrics['odds_ratio'][2]:.2f})")
    minitable["c_index"].append(f"{metrics['c_index'][0]:.2f} ({metrics['c_index'][1]:.2f}-{metrics['c_index'][2]:.2f})")
    minitable["group"].append("Radiologist density")
    minitable["nbootstraps"].append(empirical_nboots)

    # age CI
    ages = np.array(ages)
    metrics, empirical_nboots = get_confidence_interval(ages, cancer_label, censor_time, patient_ids, confidence_level=0.95, n_bootstrap=NBOOTSTRAP, random_state=None, statistic_func="age")
    minitable["max_followup"].append(max_followup+1)
    minitable["relative_risk"].append(f"{metrics['relative_risk'][0]:.2f} ({metrics['relative_risk'][1]:.2f}-{metrics['relative_risk'][2]:.2f})")
    minitable["odds_ratio"].append(f"{metrics['odds_ratio'][0]:.2f} ({metrics['odds_ratio'][1]:.2f}-{metrics['odds_ratio'][2]:.2f})")
    minitable["c_index"].append(f"{metrics['c_index'][0]:.2f} ({metrics['c_index'][1]:.2f}-{metrics['c_index'][2]:.2f})")
    minitable["group"].append("Radiologist Age > 70")
    minitable["nbootstraps"].append(empirical_nboots)

In [None]:
minitable = pd.DataFrame(minitable)
with pd.ExcelWriter('mgh_output.xlsx', mode='a') as writer:
    minitable.to_excel(writer, sheet_name='MGH', index=False)

In [None]:
del mgh_rad_data

# Combined Data

In [None]:
minitable = defaultdict(list)

In [None]:
mgh_rad_data = pd.read_csv(
    "mgh_radiologist_data.csv",  # change path accordingly
    dtype={'Patient ID': str, 'Exam ID': str, 'Density': int, 'Age at Exam': float}
) 
mgh_rad_data['SITE'] = 'MGH'
mgh_rad_data['Ethnicity'] =  mgh_rad_data['Ethnicity'].str.lower()
for col in [
    "Exam Date",
    "Date of Last Negative Mammogram",
    "Date of Cancer Diagnosis",
]:
    mgh_rad_data[col] = pd.to_datetime(mgh_rad_data[col])

In [None]:
other_sites_data =  pd.read_csv(
    "all_sites_data.csv", # change path accordingly
    dtype={'Patient ID': str, 'Exam ID': str, 'Density': int, 'Age at Exam': float}
) 
for col in [
    "Exam Date",
    "Date of Last Negative Mammogram",
    "Date of Cancer Diagnosis",
]:
    other_sites_data[col] = pd.to_datetime(other_sites_data[col])

In [None]:
aggregate_data = pd.concat([mgh_rad_data,other_sites_data])

In [None]:
median_ages['Aggregate'].append( np.median(np.array(aggregate_data["Age at Exam"])))

In [None]:
for max_followup in [0,4]:

    density, cancer_label, censor_time, patient_ids, ages = format_data(aggregate_data, max_followup, include_age=True)
    
    # density CI
    metrics, empirical_nboots = get_confidence_interval(density, cancer_label, censor_time, patient_ids, confidence_level=0.95, n_bootstrap=NBOOTSTRAP, random_state=None)
    minitable["max_followup"].append(max_followup+1)
    minitable["relative_risk"].append(f"{metrics['relative_risk'][0]:.2f} ({metrics['relative_risk'][1]:.2f}-{metrics['relative_risk'][2]:.2f})")
    minitable["odds_ratio"].append(f"{metrics['odds_ratio'][0]:.2f} ({metrics['odds_ratio'][1]:.2f}-{metrics['odds_ratio'][2]:.2f})")
    minitable["c_index"].append(f"{metrics['c_index'][0]:.2f} ({metrics['c_index'][1]:.2f}-{metrics['c_index'][2]:.2f})")
    minitable["group"].append("Density")
    minitable["nbootstraps"].append(empirical_nboots)

    # age CI
    ages = np.array(ages)
    metrics, empirical_nboots = get_confidence_interval(ages, cancer_label, censor_time, patient_ids, confidence_level=0.95, n_bootstrap=NBOOTSTRAP, random_state=None, statistic_func="age")
    minitable["max_followup"].append(max_followup+1)
    minitable["relative_risk"].append(f"{metrics['relative_risk'][0]:.2f} ({metrics['relative_risk'][1]:.2f}-{metrics['relative_risk'][2]:.2f})")
    minitable["odds_ratio"].append(f"{metrics['odds_ratio'][0]:.2f} ({metrics['odds_ratio'][1]:.2f}-{metrics['odds_ratio'][2]:.2f})")
    minitable["c_index"].append(f"{metrics['c_index'][0]:.2f} ({metrics['c_index'][1]:.2f}-{metrics['c_index'][2]:.2f})")
    minitable["group"].append("Age > 70")
    minitable["nbootstraps"].append(empirical_nboots)

    # age groups CI
    for group in ["30-40", "40-50", "50-60", "60-70", "70-80"]:
        try:
            age_lower, age_upper = group.split("-")
            age_lower, age_upper = int(age_lower), int(age_upper)
            group_data = aggregate_data[
                (aggregate_data["Age at Exam"] >= age_lower) & (aggregate_data["Age at Exam"] < age_upper)
            ]
            if len(group_data) == 0:
                continue
            density, cancer_label, censor_time, patient_ids = format_data(group_data, max_followup)
            metrics, empirical_nboots = get_confidence_interval(density, cancer_label, censor_time, patient_ids, confidence_level=0.95, n_bootstrap=NBOOTSTRAP, random_state=None)
            minitable["group"].append("Age: " + str(age_lower) + "-" + str(age_upper))
            minitable["max_followup"].append(max_followup+1)
            minitable["relative_risk"].append(f"{metrics['relative_risk'][0]:.2f} ({metrics['relative_risk'][1]:.2f}-{metrics['relative_risk'][2]:.2f})")
            minitable["odds_ratio"].append(f"{metrics['odds_ratio'][0]:.2f} ({metrics['odds_ratio'][1]:.2f}-{metrics['odds_ratio'][2]:.2f})")
            minitable["c_index"].append(f"{metrics['c_index'][0]:.2f} ({metrics['c_index'][1]:.2f}-{metrics['c_index'][2]:.2f})")
            minitable["nbootstraps"].append(empirical_nboots)
        except:
            continue       

    # race / ethnicity CI
    for group in aggregate_data["Ethnicity"].unique():
        if group in ["Ninguno", "Otro", "Amerindio", "Gitano (Rom)"]: continue 
        group_data = aggregate_data[aggregate_data["Ethnicity"] == group]
        density, cancer_label, censor_time, patient_ids = format_data(group_data, max_followup)
        metrics, empirical_nboots = get_confidence_interval(density, cancer_label, censor_time, patient_ids, confidence_level=0.95, n_bootstrap=NBOOTSTRAP, random_state=None)
        minitable["group"].append(f"race:{group}")
        minitable["max_followup"].append(max_followup+1)
        minitable["relative_risk"].append(f"{metrics['relative_risk'][0]:.2f} ({metrics['relative_risk'][1]:.2f}-{metrics['relative_risk'][2]:.2f})")
        minitable["odds_ratio"].append(f"{metrics['odds_ratio'][0]:.2f} ({metrics['odds_ratio'][1]:.2f}-{metrics['odds_ratio'][2]:.2f})")
        minitable["c_index"].append(f"{metrics['c_index'][0]:.2f} ({metrics['c_index'][1]:.2f}-{metrics['c_index'][2]:.2f})")
        minitable["nbootstraps"].append(empirical_nboots)

In [None]:
minitable = pd.DataFrame(minitable)
median_ages = pd.DataFrame(median_ages)
with pd.ExcelWriter('combined_output.xlsx', mode='a') as writer:
    minitable.to_excel(writer, sheet_name='Aggregate', index=False)
    median_ages.to_excel(writer, sheet_name='Median-Ages', index=False)