In [1]:
from matplotlib import pyplot as plt 
import numpy as np 
import pandas as pd 
import plotly.express as px
import plotly.io as pio
import plotly.graph_objs as go
import plotly.offline as pyo
import pickle

from collections import Counter

import plotly.graph_objects as go
import plotly.express as px

from matplotlib import rc

### Load pickle

In [2]:
read_input = open('data/ADC_FTLD_subtypes_agecorrected_zscore_final.pickle','rb')
load_inputs = pickle.load(read_input)
read_input.close()

T, S, X = load_inputs

- T --> timeline (timeline object)

- S --> subjects (dict)

In [3]:
len(S['staging'])

1080

### Create diagnosis variable

- FTD = Frontotemporal Dementia

In [10]:
subtypes = S['subtypes']
staging = S['staging']
diagnosis = ['Control' if np.isnan(subtype) else "FTD" for subtype in subtypes]
# diagnosis

In [11]:
counter = dict(Counter(diagnosis))
counter

{'FTD': 399, 'Control': 681}

In [14]:
# check = ['Control' if np.isnan(stage) else "FTD" for stage in staging]
# counter = dict(Counter(check))
# counter

### Convert Nans to 0.0

In [45]:
staging = [np.float64(0.0) if np.isnan(stage) else stage for stage in staging]

# dic = {'Diagnosis': diagnosis, 'Staging': staging}
# dic

# data = pd.DataFrame(dic)
# data

### Get labels

In [77]:
labels = list(set(diagnosis))
# labels = labels[::-1]
labels

['Control', 'FTD']

### Get indexes

In [23]:
diagnosis = np.array(diagnosis)
staging = np.array(staging)

idx_Control = np.where(diagnosis=='Control')
idx_Control = idx_Control[0]

idx_FTD = np.where(diagnosis=='FTD')
idx_FTD = idx_FTD[0]
len(idx_FTD)

399

In [24]:
freq,binc=np.histogram(staging[idx_Control],bins=10)
freq = (1.*freq)/len(staging)
freq

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.63055556, 0.        , 0.        , 0.        , 0.        ])

In [25]:
# FOR TESTING
count=-1
num_bins = 10
freq_all = []

width = np.repeat(0.02, num_bins)
width

array([0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02])

In [149]:
# Controls & FTD

diagnosis = np.array(['Control' if np.isnan(subtype) else "FTD" for subtype in S['subtypes']])
staging = np.array([np.float64(0.0) if np.isnan(stage) else stage for stage in S['staging']])

idx_Control = np.where(diagnosis=='Control')
idx_Control = idx_Control[0]

idx_FTD = np.where(diagnosis=='FTD')
idx_FTD = idx_FTD[0]

# idx_FTD

In [101]:
len(staging)

1080

In [131]:
# fig, ax = plt.subplots(figsize=(12, 6))
# plt.style.use('seaborn-whitegrid')
# rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
# rc('mathtext', fontset='stixsans');


c = ['#4daf4a','#377eb8','#e41a1c', '#ffff00']
count=-1

fig = go.Figure()

# for idx in [idx_0,idx_1,idx_2,idx_3,idx_4,idx_Control]:
for idx in [idx_Control,idx_FTD]:
    
            if len(idx)>0:
                count=count+1;
            freq,binc=np.histogram(staging[idx],bins=num_bins)
            freq = (1.*freq)/len(staging)
            if count>0:
                freq=freq+freq_all[count-1]
            freq_all.append(freq)
            bw=1/(2.*num_bins)
#             ax.bar(binc[:-1],freq,width=bw,color=c[count],label=labels[count],zorder=3-count)
            
            fig.add_trace(go.Bar(
                        x=binc[:-1],
                        y=freq,
                        name=f'{labels[count]} (n = {counter[labels[count]]})',
                        width=width))

        
fig.update_layout(
    title="Patient Staging",
    xaxis_title="Disease Stage",
    yaxis_title="Frequency of occurences",
    xaxis = dict(
        tickmode = 'linear',
        tick0 = 0.0,
        dtick = 0.1
    ),
    barmode='group',
    legend_font_size=16,
    legend=dict(
        yanchor="top",
        y=0.95,
        xanchor="right",
        x=0.95),
    autosize = False,
    width=1000,
    height=800
)


fig.update_xaxes(range=[-0.05, 1.0])    

fig.show()

In [159]:
freq,binc=np.histogram(staging[idx_Control],bins=num_bins)
freq

array([  0,   0,   0,   0,   0, 681,   0,   0,   0,   0])

In [160]:
freq,binc=np.histogram(staging[idx_FTD],bins=num_bins)
freq

array([15, 15, 38, 50, 65, 51, 63, 50, 35, 17])

## Patient staging function

In [202]:
def patient_staging(S, diagnosis, num_bins=10, bin_width=0.02):
    """
    Creates a barplot
    :param S: dictionary, Snowphlake output
    :param diagnosis: np.array or list; with diagnosis labels corresponding to records in S
    :param num_bins: int, how many bins should be displayed
    :param bin_width: int
    :return: dictionary with scores for each DK region for chosen subtype
    """
    
    subtypes = ['Control' if np.isnan(subtype) else 'subtype' for subtype in S['subtypes']]
    
    # Convert NaNs to 0.0
    staging = np.array([np.float64(0.0) if np.isnan(stage) else stage for stage in S['staging']])

    
    # Count number of each subtype occurences
    counter = dict(Counter(diagnosis))
        
    # Get labels
    labels = list(set(diagnosis))
#     labels = labels[::-1]
    
    # Get indexes
    diagnosis = np.array(diagnosis)
    staging = np.array(staging)

    idx_Control = np.where(diagnosis=='Control')
    idx_Control = idx_Control[0]
    idx_FTD = np.where(diagnosis=='FTD')
    idx_FTD = idx_FTD[0]

    # Bar settings
    num_bins = num_bins
    bin_width = np.repeat(bin_width, num_bins)
      
    count=-1
    c = ['#4daf4a','#377eb8','#e41a1c', '#ffff00']
        
    num_bins = num_bins
    freq_all = []
    bar_width = np.repeat(0.02, num_bins)
    counter = dict(Counter(diagnosis))

    fig = go.Figure()
    
    for idx in [idx_Control, idx_FTD]:
                if len(idx)>0:
                    count=count+1;
                freq,binc=np.histogram(staging[idx],bins=num_bins)
                freq = (1.*freq)/len(staging)
                bw=1/(2.*num_bins)
                
                label = labels[count]

                fig.add_trace(go.Bar(
                            x=binc[:-1],
                            y=freq,
                            name=f'{label} (n = {counter[label]})',
                            width=bin_width))                       
                
#     for idx in [idx_FTD]:
#                 if len(idx)>0:
#                     count=count+1;
#                 freq = []
#                 freq,binc=np.histogram(staging[idx],bins=num_bins)
#                 freq = (1.*freq)/len(staging)
#                 bw=1/(2.*num_bins)
                
#                 label = labels[count]

#                 fig.add_trace(go.Bar(
#                             x=binc[:-1],
#                             y=freq,
#                             name=f'{label} (n = {counter[label]})',
#                             width=bin_width))

    fig.update_layout(
        title="Patient Staging",
        xaxis_title="Disease Stage",
        yaxis_title="Frequency of occurences",
        xaxis = dict(
            tickmode = 'linear',
            tick0 = 0.0,
            dtick = 0.1
        ),
        barmode='group',
        legend_font_size=16,
        legend=dict(
            yanchor="top",
            y=0.95,
            xanchor="right",
            x=0.95),
        autosize = False,
        width=1000,
        height=800
    )



    fig.update_xaxes(range=[-0.05, 1.0])

    return fig

In [None]:
# FOR TESTING

diagnosis = np.array(['Control' if np.isnan(subtype) else "FTD" for subtype in S['subtypes']])

num_bins = 10
bin_width = 0.02


In [204]:
fig = patient_staging(S=S,
                      diagnosis=diagnosis, 
                      num_bins=10, 
                      bin_width=0.04)
fig