In [1]:
from matplotlib import pyplot as plt 
import numpy as np 
import pandas as pd 
import plotly.express as px
import plotly.io as pio
import plotly.graph_objs as go
import plotly.offline as pyo
import pickle

from collections import Counter

import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

from matplotlib import rc

### Load pickle

In [2]:
read_input = open('data/ADC_FTLD_subtypes_agecorrected_zscore_final.pickle','rb')
load_inputs = pickle.load(read_input)
read_input.close()

T, S, X = load_inputs

- T --> timeline (timeline object)

- S --> subjects (dict)

In [3]:
len(S['atypicality'])

1080

In [4]:
S['atypicality']

array([3.77340938, 9.41622155, 4.62668588, ...,        nan,        nan,
              nan])

### Create diagnosis variable

- FTD = Frontotemporal Dementia

In [39]:
subtypes = S['subtypes']
atypical = S['atypicality']
diagnosis = ['Control' if np.isnan(subtype) else "FTD" for subtype in subtypes]
# diagnosis

In [40]:
counter = dict(Counter(diagnosis))
counter

{'FTD': 399, 'Control': 681}

In [41]:
# check = ['Control' if np.isnan(stage) else "FTD" for stage in staging]
# counter = dict(Counter(check))
# counter

In [42]:
S['atypicality'][0:50]

array([ 3.77340938,  9.41622155,  4.62668588,  7.33794599, 16.59190552,
        3.44938702, 12.24217853, 12.23141099,  6.0023154 , 12.59100386,
        9.04929997, 12.08461199,  5.72702371, 17.13140078,  9.68853373,
        9.49001125, 21.90321315,  6.6561032 ,  3.7936146 ,  3.94649783,
        2.31856854,  4.23023695,  9.74827541,  3.81768523,  2.4377852 ,
        2.23540365, 18.23339926,  5.18047789,  5.4147483 ,  7.13871566,
        3.86258724, 12.68981967,  7.18920314, 11.52189123,  8.20195775,
       14.17164068,  5.31789805, 12.7504213 , 12.78733112,  1.39922972,
        3.36835973,  4.62393041, 10.71174243,  4.10906568, 11.39750226,
       11.27105022,  6.80940824,  2.11410596,  7.6231642 , 20.05737424])

### Convert Nans to 0.0

In [47]:
atypical = [np.float64(0.0) if np.isnan(x) else x for x in atypicality]
np.max(atypical)

37.735109628692555

### Get labels

In [12]:
labels = list(set(diagnosis))
# labels = labels[::-1]
labels

['Control', 'FTD']

In [16]:
color_list = ['#4daf4a','#377eb8','#e41a1c', '#ffff00']

### Get indexes

In [17]:
diagnosis = np.array(diagnosis)
atypicality = np.array(atypicality)

idx_Control = np.where(diagnosis=='Control')
idx_Control = idx_Control[0]

idx_FTD = np.where(diagnosis=='FTD')
idx_FTD = idx_FTD[0]
len(idx_FTD)

399

In [18]:
list(set(diagnosis))

['Control', 'FTD']

## Atypicality function

In [74]:
def atypicality(S, diagnosis, color_list=['#000000'], num_bins=10, bin_width=0.02):
    """
    Creates a barplot
    :param S: dictionary, Snowphlake output
    :param diagnosis: np.array or list; with diagnosis labels corresponding to records in S
    :param color_list: list with color hex values
    :param num_bins: int, how many bins should be displayed
    :param bin_width: int
    :return: plotly go Bar figure
    """  
    
    # Convert NaNs to 0.0
    atypical = np.array([np.float64(0.0) if np.isnan(x) else x for x in S['atypicality']])
   
    # Count number of each subtype occurences
    counter = dict(Counter(diagnosis))
        
    # Get labels
    labels = list(set(diagnosis))
#     labels = labels[::-1]
    
    # Get indexes
    diagnosis = np.array(diagnosis)
    atypical = np.array(atypical)
    
    # Get indexes for each diagnostic label
    idx_list = []
    for l in labels:
        idx = np.where(diagnosis==l)
        idx = idx[0]
        idx_list.append(idx)

    # Bar settings
    num_bins = num_bins
    bin_width = np.repeat(bin_width, num_bins)
          
    color_list = color_list
        
    count=-1    
    num_bins = num_bins
    bar_width = np.repeat(0.02, num_bins)
    counter = dict(Counter(diagnosis))

    fig = go.Figure()
    
    for idx in idx_list:
                if len(idx)>0:
                    count=count+1;
                freq,binc=np.histogram(atypical[idx],bins=num_bins)
                freq = (1.*freq)/len(atypical)
                
                label = labels[count]

                fig.add_trace(go.Bar(
                            x=binc[:-1],
                            y=freq,
                            name=f'{label} (n = {counter[label]})',
                            width=bin_width,
                            marker_color=color_list[count]
                )) 
                
    fig.update_layout(
        title="Atypicality",
        title_font_size=34,
        title_x=0.5,
        xaxis_title="Value",
        yaxis_title="Frequency of occurences",
        xaxis = dict(
            tickmode = 'linear',
            tick0 = 0.0,
            dtick = 2
        ),
        barmode='group',
        legend_font_size=16,
        legend=dict(
            yanchor="top",
            y=0.95,
            xanchor="right",
            x=0.95),
        autosize = False,
        width=1000,
        height=800
    )
    
    fig.update_xaxes(range=[np.min(atypical)-0.05, np.max(atypical)])
    
    fig.update_yaxes(title_font_size = 18, 
                    tickfont_size=14)
    
    fig.update_xaxes(title_font_size = 18, 
                    tickfont_size = 14)

    return fig

In [75]:
# FOR TESTING
diagnosis = np.array(['Control' if np.isnan(subtype) else "FTD" for subtype in S['subtypes']])

num_bins = 10
bin_width = 0.02
color_list = ['#4daf4a','#377eb8','#e41a1c', '#ffff00']


In [76]:
fig = atypicality(S=S,
                      diagnosis=diagnosis, 
                      color_list = color_list,
                      num_bins=20, 
                      bin_width=1.2)
fig

## Staging Boxplots function

In [84]:
def atypicality_boxes(S, diagnosis, color_list='#000000'):
    """
    Creates a boxplot
    :param S: dictionary, Snowphlake output
    :param diagnosis: np.array or list; with diagnosis labels corresponding to records in S
    :param color_list: list with color hex values
    :return: plotly go Box figure
    """
    
    # Convert NaNs to 0.0
    atypical = np.array([np.float64(0.0) if np.isnan(x) else x for x in S['atypicality']])
   
    # Count number of each subtype occurences
    counter = dict(Counter(diagnosis))
        
    # Get labels
    labels = list(set(diagnosis))
    
    # Get indexes
    diagnosis = np.array(diagnosis)
    atypical = np.array(atypical)
    
    # Get indexes for each diagnostic label
    idx_list = []
    for l in labels:
        idx = np.where(diagnosis==l)
        idx = idx[0]
        idx_list.append(idx)
        
    
    fig = go.Figure()

    for count, idx in enumerate(idx_list):
        fig.add_trace(go.Box(x=atypical[idx], name=labels[count],
                             fillcolor=color_list[count],
                            line_color='#000000'))

    fig.update_xaxes(range=[np.min(atypical)-0.05, np.max(atypical)])
    
    fig.update_layout(
            title="Atypicality - Boxplots",
            title_font_size=34,
            title_x=0.5,
            xaxis_title="Value",
            yaxis_title="Diagnosis",
            xaxis = dict(
                tickmode = 'linear',
                tick0 = 0.0,
                dtick = 2
            ),
            legend_font_size=16,
            legend=dict(
                yanchor="top",
                y=0.97,
                xanchor="right",
                x=0.97),
            autosize = False,
            width=1000,
            height=600
        )
    
    fig.update_yaxes(title_font_size = 18, 
                    tickfont_size=14)
    
    fig.update_xaxes(title_font_size = 18, 
                    tickfont_size = 14)

    return fig

In [85]:
# FOR TESTING
diagnosis = np.array(['Control' if np.isnan(subtype) else "FTD" for subtype in S['subtypes']])
color_list = ['#4daf4a','#377eb8','#e41a1c', '#ffff00']

In [86]:
fig = atypicality_boxes(S=S,
                    diagnosis=diagnosis,
                    color_list=color_list)
fig