In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import glob

import funcs.utils as utils
import funcs.plotting as plot
import funcs.amyloid as amyloid
from tqdm import tqdm

import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

In [2]:
PROCESSED_DIR = "data/processed"

### 1. Load Data
---

In [3]:
data_df = pd.read_csv(os.path.join(PROCESSED_DIR, "dataset_processed.tsv"), sep='\t', index_col=0).rename(columns=amyloid.ddict_unclean)

# Fix Dates
data_df = pd.concat([pd.to_datetime(data_df[amyloid.dates][var], format="mixed") for var in amyloid.dates], axis=1, keys=amyloid.dates).join(
    data_df.drop(amyloid.dates, axis=1)  
)

# Add clusters
data_df = data_df.join(
    pd.read_csv(os.path.join(PROCESSED_DIR,"AL_with_ccp_03.tsv"), sep="\t", index_col=0).rename(columns=amyloid.ddict_unclean)['fna3_cluster_n']
)

data_df = data_df[data_df['fna3_cluster_n'].notna()]

### 2. Comparisons
---

In [5]:
cardiac_stage = "BU (BNP-based) cardiac staging"
renal_stage = "Renal Stage (Palladini)"

In [7]:
data_df[data_df[cardiac_stage]=="stage IIIb"].groupby("fna3_cluster_n").size()

fna3_cluster_n
High            92
Intermediate    29
Low             45
dtype: int64

In [15]:
utils.run_statistical_comparisons(
    data_df[data_df[renal_stage]=="Stage I"], 
    "fna3_cluster_n", 
    "data/clustering/full_na_dataset/subgroup_comparisons",
    tag="_renal_stage_1"
)

100%|██████████| 3/3 [00:00<00:00, 82.40it/s]


In [18]:
utils.run_statistical_comparisons(
    data_df[data_df[cardiac_stage]=="stage IIIb"], 
    "fna3_cluster_n", 
    "data/clustering/full_na_dataset/subgroup_comparisons",
    tag="_cardiac_stage_3b"
)

utils.run_statistical_comparisons(
    data_df[data_df[cardiac_stage]=="stage III"], 
    "fna3_cluster_n", 
    "data/clustering/full_na_dataset/subgroup_comparisons",
    tag="_cardiac_stage_3"
)

100%|██████████| 3/3 [00:00<00:00, 104.02it/s]
100%|██████████| 3/3 [00:00<00:00, 108.98it/s]


In [17]:
utils.run_statistical_comparisons(
    data_df[data_df[cardiac_stage]=="stage II"], 
    "fna3_cluster_n", 
    "data/clustering/full_na_dataset/subgroup_comparisons",
    tag="_cardiac_stage_2"
)

100%|██████████| 3/3 [00:00<00:00, 95.14it/s]
