## Demographic tables

### Imports

In [3]:
import json
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns

### local dataset configs

In [38]:
ds_config_path = Path("../../PD-challenge-ds-store/ds_config.json")
with open(ds_config_path, "r") as f:
    ds_config = json.load(f)
    ds_store_path = Path(ds_config["ds_store_path"])
    data_sources = ds_config["data_sources"]

PD_challenge_datasets = data_sources.keys()
print("PD_challenge_datasets:", PD_challenge_datasets)

PD_challenge_datasets: dict_keys(['qpn', 'calgary', 'nimhans-metal', 'nimhans-serb', 'nimhans-ylo'])


### Demographic tables

In [42]:
def load_tabular_data(data_sources, ds_name):
    tabular_data = data_sources[ds_name]["tabular_data"]
    print("Tabular files:", tabular_data)

    index_columns = data_sources[ds]["index_columns"]
    print("Index columns:", index_columns)

    # merge all tabular files
    demographics_df = pd.DataFrame()
    for table_name in tabular_data.keys():
        table_info = tabular_data[table_name]
        table_file_path = ds_store_path / ds / table_info["file_name"]
        table_column_map = table_info["column_map"]
        # invert the column map to get original column names
        inverted_column_map = {v: k for k, v in table_column_map.items()}
        # read the table
        df = pd.read_csv(table_file_path)[index_columns + list(table_column_map.values())]
        # rename columns to original names
        df = df.rename(columns=inverted_column_map)
        # merge with demographics_df
        if demographics_df.empty:
            demographics_df = df
        else:
            demographics_df = pd.merge(demographics_df, df, on=index_columns, how='inner')
            
    return demographics_df

In [44]:
ds = "nimhans-metal" # "qpn" #"nimhans-metal"

demographics_df = load_tabular_data(data_sources, ds)

demographics_df.head()

Tabular files: {'demographics': {'file_name': 'demographics.csv', 'column_map': {'sex': 'sex', 'diagnosis': 'diagnosis_group_for_analysis'}}}
Index columns: ['participant_id']


KeyError: "['diagnosis_group_for_analysis'] not in index"