# Behavioral profile stratification via unsupervised learning

In [1]:
from dataset import access_db, demographics, data_path
from features import *

Created directory /odf-data: 2019-07-05-15-02-10



### Package `dataset`

**Access odf-lab database**

:return:
- `list` of Patient class objects
- `dictionary` of Pencounters objects per patient
- `dictionary` of pandas dataframes per table in the DB

**Compute standard demographic statistics**

In [2]:
subj_list, p_enc, df_dict = access_db()

In [3]:
demographics(subj_list, p_enc)

Period span: 01/05/2013 -- 31/10/2018

N of subjects: 205

Average number of assessments: 5.522
Median number of assessments: 5.0
Maximum number of assessments: 24
Minimum number of assessments: 1

Average number of encounters: 1.439
Median number of assessments: 1.0
Maximum number of assessments: 5
Minimum number of assessments: 1

Instrument list:
emotionalavailabilityscales
griffithsmentaldevelopmentscales
leiterinternationalperformancescale-revised
ados-2modulo2
ados-2modulo1
wisc-iv
ados-2modulo3
wisc-iii
srs
wais-iv
wppsi-iiifascia26-311
psi-sf
wppsi-iiifascia40-73
ados-2modulotoddler
wppsi
vineland-ii
N of selected instruments: 16


Mean age of the subjects: 11.045999008009547 -- Standard deviation: 5.097775697784719
N Female: 34 -- N Male: 171



### Package `features`

**Create raw behavioral ehrs**

:parameters:

`dictionary` of dataframes per table in the DB

:return:

`dictionary` list of [Pinfo, behavioral tokens ordered wrt date of assessment]

**Filter tokens from raw behavioral ehrs according to depth level**

:parameters:

`dictionary` list of [Pinfo, behavioral tokens] per subject

:return:

`dictionary` list of [Pinfo, filtered tokens wrt the level]

**Create BEHRs and vocabulary**

* `create_vocabulary`

    :parameters:

    `dictionary` list of [Pinfo, tokens] per subject
    
    `int` level

    :return:

    `dictionaries` idx to term, term to idx
    
* `create_behr`

    :parameters:
    
    `dictionary` list of [Pinfo, tokens] per subject
    
    `int` level
    
    :return:
    
    `dictionary` of list of tuples (DOA, [instrument tokens]) per subject
    
**Create feature data (quantitative scores)**

* `create_features_data`

    :parameters:
    
    `dictionary` output of behr_level4
    
    :return:
    
    mean-imputed dataframe
    
    normalized (column-wise) dataframe

In [4]:
raw_behr = create_tokens(df_dict)

Average length of behavioral sequences: 5.522



### Behavioral EHRs Level-1

In [5]:
lev1 = behr_level1(raw_behr)

In [6]:
v1, _ = create_vocabulary(lev1, level=1)
out_behr_lev1 = create_behr(lev1, v1, level=1)

Vocabulary size:1349



### Behavioral EHRs Level-2

In [7]:
lev2 = behr_level2(raw_behr)

In [8]:
v2, _ = create_vocabulary(lev2, level=2)
out_behr_lev2 = create_behr(lev2, v2, level=2)

Vocabulary size:1198



### Behavioral EHRs Level-3

In [9]:
lev3 = behr_level3(raw_behr)

In [10]:
v3, _ = create_vocabulary(lev3, level=3)
out_behr_lev3 = create_behr(lev3, v3, level=3)

Vocabulary size:514



### Behavioral EHRs Level-4

In [11]:
lev4 = behr_level4(raw_behr)

In [12]:
v4, _ = create_vocabulary(lev4, level=4)
out_behr_lev4 = create_behr(lev4, v4, level=4)

Vocabulary size:1167



### Feature data Level-4

In [13]:
feat_df, feat_scaled_df = create_features_data(lev4)

In [14]:
feat_scaled_df

Unnamed: 0,F1::ados::comparison_score,F1::ados::rrb_tot,F1::ados::sa_tot,F1::gmds::GQ,F1::gmds::q_A,F1::gmds::q_B,F1::gmds::q_C,F1::gmds::q_D,F1::gmds::q_E,F1::gmds::q_F,...,F5::leiter::BIQ,F5::leiter::composite_fr,F5::srs::caretaker::raw_rirb,F5::srs::caretaker::raw_tot,F5::vineland::caretaker::standard_ABC,F5::vineland::caretaker::standard_CD,F5::vineland::caretaker::standard_DLSD,F5::vineland::caretaker::standard_MSD,F5::vineland::caretaker::standard_SD,F5::wechsler::FSIQ
lab1439,3.777446e-15,0.000000,-3.151313e-15,-3.733775e-15,0.000000,3.304729e-15,2.281285e-15,-3.427072e-15,2.593922e-15,0.000000,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,-2.897639e-15,0.0,0.000000,0.000000
lab1060,3.777446e-15,0.000000,-3.151313e-15,-3.733775e-15,0.000000,3.304729e-15,2.281285e-15,-3.427072e-15,2.593922e-15,0.000000,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,-2.897639e-15,0.0,0.000000,0.000000
lab1365,3.777446e-15,0.000000,-3.151313e-15,-3.733775e-15,0.000000,3.304729e-15,2.281285e-15,-3.427072e-15,2.593922e-15,0.000000,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,-2.897639e-15,0.0,0.000000,0.000000
lab1191,3.777446e-15,0.000000,-3.151313e-15,-3.733775e-15,0.000000,3.304729e-15,2.281285e-15,-3.427072e-15,2.593922e-15,0.000000,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,-2.897639e-15,0.0,0.000000,0.000000
lab0993,3.777446e-15,3.517871,-3.681116e+00,5.465014e+00,5.872477,5.221796e+00,1.619905e+00,4.801251e+00,3.667214e+00,0.000000,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,-2.897639e-15,0.0,0.000000,0.000000
lab1224,-2.835351e+00,-0.185151,-1.020068e+00,-3.733775e-15,0.000000,3.304729e-15,2.281285e-15,-3.427072e-15,2.593922e-15,0.000000,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,-2.897639e-15,0.0,0.000000,0.000000
lab1222,3.777446e-15,0.000000,-3.151313e-15,-3.733775e-15,0.000000,3.304729e-15,2.281285e-15,-3.427072e-15,2.593922e-15,0.000000,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,-2.897639e-15,0.0,0.000000,0.000000
lab1218,3.777446e-15,-0.185151,2.527995e+00,-3.733775e-15,0.000000,3.304729e-15,2.281285e-15,-3.427072e-15,2.593922e-15,0.000000,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,-2.897639e-15,0.0,0.000000,0.000000
lab1217,3.777446e-15,0.000000,-3.151313e-15,-3.733775e-15,0.000000,3.304729e-15,2.281285e-15,-3.427072e-15,2.593922e-15,0.000000,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,-2.897639e-15,0.0,0.000000,0.000000
lab0879,3.777446e-15,0.000000,-3.151313e-15,-3.733775e-15,0.000000,3.304729e-15,2.281285e-15,-3.427072e-15,2.593922e-15,0.000000,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,-2.897639e-15,0.0,0.000000,0.000000


In [15]:
feat_df

Unnamed: 0,F1::ados::comparison_score,F1::ados::rrb_tot,F1::ados::sa_tot,F1::gmds::GQ,F1::gmds::q_A,F1::gmds::q_B,F1::gmds::q_C,F1::gmds::q_D,F1::gmds::q_E,F1::gmds::q_F,...,F5::leiter::BIQ,F5::leiter::composite_fr,F5::srs::caretaker::raw_rirb,F5::srs::caretaker::raw_tot,F5::vineland::caretaker::standard_ABC,F5::vineland::caretaker::standard_CD,F5::vineland::caretaker::standard_DLSD,F5::vineland::caretaker::standard_MSD,F5::vineland::caretaker::standard_SD,F5::wechsler::FSIQ
lab1439,5.666667,3.05,12.15,74.2,87.0,70.545455,54.909091,76.090909,92.909091,72.0,...,59.5,59.5,22.0,169.0,49.0,34.0,48.666667,22.0,37.0,67.0
lab1060,5.666667,3.05,12.15,74.2,87.0,70.545455,54.909091,76.090909,92.909091,72.0,...,59.5,59.5,22.0,169.0,49.0,34.0,48.666667,22.0,37.0,67.0
lab1365,5.666667,3.05,12.15,74.2,87.0,70.545455,54.909091,76.090909,92.909091,72.0,...,59.5,59.5,22.0,169.0,49.0,34.0,48.666667,22.0,37.0,67.0
lab1191,5.666667,3.05,12.15,74.2,87.0,70.545455,54.909091,76.090909,92.909091,72.0,...,59.5,59.5,22.0,169.0,49.0,34.0,48.666667,22.0,37.0,67.0
lab0993,5.666667,4.00,8.00,95.0,111.0,93.000000,65.000000,96.000000,113.000000,72.0,...,59.5,59.5,22.0,169.0,49.0,34.0,48.666667,22.0,37.0,67.0
lab1224,5.000000,3.00,11.00,74.2,87.0,70.545455,54.909091,76.090909,92.909091,72.0,...,59.5,59.5,22.0,169.0,49.0,34.0,48.666667,22.0,37.0,67.0
lab1222,5.666667,3.05,12.15,74.2,87.0,70.545455,54.909091,76.090909,92.909091,72.0,...,59.5,59.5,22.0,169.0,49.0,34.0,48.666667,22.0,37.0,67.0
lab1218,5.666667,3.00,15.00,74.2,87.0,70.545455,54.909091,76.090909,92.909091,72.0,...,59.5,59.5,22.0,169.0,49.0,34.0,48.666667,22.0,37.0,67.0
lab1217,5.666667,3.05,12.15,74.2,87.0,70.545455,54.909091,76.090909,92.909091,72.0,...,59.5,59.5,22.0,169.0,49.0,34.0,48.666667,22.0,37.0,67.0
lab0879,5.666667,3.05,12.15,74.2,87.0,70.545455,54.909091,76.090909,92.909091,72.0,...,59.5,59.5,22.0,169.0,49.0,34.0,48.666667,22.0,37.0,67.0
