In [2]:
import pandas as pd
import csv
import os
import time
from collections import defaultdict
import numpy as np
import itertools
import db_structure
import db_structure_v2
import networkx as nx
from imp import reload
import matplotlib.pyplot as plt
import utilities as u
from flask import jsonify
import logging
# import exceptions
from pandas.api.types import is_string_dtype
from pandas.api.types import is_numeric_dtype

logging.basicConfig(format=' %(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
%load_ext autoreload
%aimport -web
%autoreload 2

 2019-12-19 10:25:02,321 - DEBUG - backend module://ipykernel.pylab.backend_inline version unknown


In [None]:
draw_db = db

G = nx.DiGraph()
G.add_nodes_from(draw_db.table_names)
for table_name in draw_db.table_names:
    for sibling in draw_db.find_table_siblings(table_name):
        G.add_edge(table_name, sibling)
        G.add_edge(sibling, table_name)
    for child in draw_db.find_table_children(table_name):
        G.add_edge(table_name, child)
plt.figure(figsize=(10,10))

#draw with networkx built-ins
nx.draw_networkx(G, node_shape="None", width=0.2)

#OR

#draw using pygraphviz
#A = nx.nx_agraph.to_agraph(G)
#H = nx.nx_agraph.from_agraph(A)
#nx.draw_spring(H, node_shape="None", with_labels=True, width=0.2)

In [150]:
x = db_structure_v2.DBMaker(directory_path='datasets\\TOPICC')

In [151]:
x.create_db(overwrite=True)

 2019-12-17 13:51:43,159 - INFO - Writing CARDIACPROCEDURES to db
 2019-12-17 13:51:44,634 - INFO - Writing CARDIACSURGERY to db
 2019-12-17 13:51:45,263 - INFO - Writing CAREPROCESSES to db
 2019-12-17 13:51:46,342 - INFO - Writing CATASTROPHICEVENTS to db
 2019-12-17 13:51:46,656 - INFO - Writing CPR to db
 2019-12-17 13:51:47,021 - INFO - Writing DEATH to db
 2019-12-17 13:51:47,562 - INFO - Writing HOSPITALADMIT to db
 2019-12-17 13:51:49,261 - INFO - Writing HOSPITALADMIT_CE to db
 2019-12-17 13:51:49,696 - INFO - Writing HOSPITALDISCHARGE to db
 2019-12-17 13:51:51,045 - INFO - Writing LIMITOFCARE to db
 2019-12-17 13:51:51,511 - INFO - Writing PHYSIOSTATUS to db
 2019-12-17 13:51:53,854 - INFO - Writing PICUADMIT to db
 2019-12-17 13:51:55,175 - INFO - Writing PICUADMIT_ADMITSECONDDX to db
 2019-12-17 13:51:55,802 - INFO - Writing PICUADMIT_AHD to db
 2019-12-17 13:51:56,174 - INFO - Writing PICUADMIT_CHD to db
 2019-12-17 13:51:56,622 - INFO - Writing PICUADMIT_CHRONICDX to db


In [32]:
y = db_structure_v2.DBLinker(directory_path='datasets\\TOPICC')

In [33]:
y.add_global_fk('PudID')

 2019-12-19 10:50:28,554 - DEBUG - ['CARDIACPROCEDURES', 'CARDIACSURGERY', 'CAREPROCESSES', 'CATASTROPHICEVENTS', 'CPR', 'DEATH', 'HOSPITALADMIT', 'HOSPITALADMIT_CE', 'HOSPITALDISCHARGE', 'LIMITOFCARE', 'PHYSIOSTATUS', 'PICUADMIT', 'PICUADMIT_ADMITSECONDDX', 'PICUADMIT_AHD', 'PICUADMIT_CHD', 'PICUADMIT_CHRONICDX', 'PICUDISCHARGE', 'PICUDISCHARGE_DISCHRONICDX', 'PICUDISCHARGE_DISSECONDDX', 'SURGERY']


In [34]:
y.finalize()

In [38]:
z = db_structure_v2.DBExtractor(directory_path='datasets\\TOPICC')

In [166]:
z.find_paths_between_tables('CPR', 'CARDIACPROCEDURES')

[]

In [171]:
z.find_paths_multi_tables(['HOSPITALADMIT', 'DEATH', 'CPR'])

[['CPR', 'HOSPITALADMIT', 'DEATH'], ['CPR', 'DEATH', 'HOSPITALADMIT']]

In [47]:
path = ['HOSPITALADMIT', 'CAREPROCESSES', 'DEATH']
start = time.time()
df = z.get_df_from_path(path, table_columns_of_interest=[('HOSPITALADMIT', 'PudID'), ('HOSPITALADMIT', 'Sex'), ('CAREPROCESSES', 'MechVent'), ('DEATH', 'DeathMode')])
end = time.time()
print(end-start)

 2019-12-19 10:55:54,171 - INFO - SELECT HOSPITALADMIT.PudID AS PatientID, HOSPITALADMIT.Sex AS Sex, CAREPROCESSES.MechVent AS MechVent, DEATH.DeathMode AS DeathMode FROM HOSPITALADMIT JOIN CAREPROCESSES ON HOSPITALADMIT.PudID = CAREPROCESSES.PudID JOIN DEATH ON CAREPROCESSES.PudID = DEATH.PudID 


0.04999995231628418


In [48]:
df

Unnamed: 0,PatientID,Sex,MechVent,DeathMode
0,27,Female,Yes,Withdrawal of care
1,78,Male,Yes,Withdrawal of care
2,125,Female,Yes,Failed resuscitation
3,154,Female,Yes,Failed resuscitation
4,155,Male,Yes,Withdrawal of care
...,...,...,...,...
270,9925,Female,Yes,Brain death
271,9932,Male,No,Withdrawal of care
272,9964,Male,Yes,Brain death
273,9990,Male,No,Withdrawal of care


In [41]:
df['DeathMode'].value_counts()

Withdrawal of care      141
Failed resuscitation     53
Limitation of care       46
Brain death              35
Name: DeathMode, dtype: int64

In [260]:
filters = {
    'AdmitThroughED': {'type': 'list', 'filter': ['Yes', 'No']},
    'Sex': {'type': 'list', 'filter': ['Male', 'Female']}
}
new_df = z.aggregate_df(df, groupby_columns=['AdmitThroughED', 'Sex'], filters=filters)

 2019-12-18 10:21:04,282 - DEBUG - Aggregate by ['AdmitThroughED', 'Sex']


In [278]:
sorted(new_df.loc[:,'groupby_labels'].unique(), key=lambda x: x.upper())

['No_Female', 'No_Male', 'Yes_Female', 'Yes_Male']

In [281]:
x, y = pd.cut(new_df['Count'], bins=2)

ValueError: too many values to unpack (expected 2)

In [283]:
for i in u.pairwise([1,2]):
    print(i)

(1, 2)


In [284]:
'3'.split('.')

['3']

In [285]:
str((5, 3))

'(5, 3)'

In [286]:
path = ['HOSPITALADMIT', 'CAREPROCESSES', 'PHYSIOSTATUS']
df = z.get_df_from_path(path, table_columns_of_interest=['HOSPITALADMIT.Sex', 'CAREPROCESSES.MechVent', 'PHYSIOSTATUS.LowpH'])

 2019-12-18 13:21:56,268 - INFO - SELECT HOSPITALADMIT.Sex, CAREPROCESSES.MechVent, PHYSIOSTATUS.LowpH FROM HOSPITALADMIT JOIN CAREPROCESSES ON HOSPITALADMIT.PudID = CAREPROCESSES.PudID JOIN PHYSIOSTATUS ON CAREPROCESSES.PudID = PHYSIOSTATUS.PudID 


In [305]:
filters = {
    'MechVent': {'type': 'list', 'filter': ['Yes']},
    'LowpH': {'type': 'range', 'filter': {'min': 6.8, 'max': 6.9, 'bins': 4}},
    'Sex': None
}

z.aggregate_df(df, groupby_columns=['MechVent', 'LowpH'], filters=filters, aggregate_column='Sex')

 2019-12-18 13:50:48,644 - DEBUG - Aggregate by ['MechVent', 'LowpH']
 2019-12-18 13:50:48,646 - DEBUG - Aggregate for Sex


Sex,Female,Male,groupby_labels
0,4,3,"Yes_(6.79, 6.81]"
1,0,2,"Yes_(6.81, 6.83]"
2,5,1,"Yes_(6.83, 6.85]"
3,5,8,"Yes_(6.85, 6.9]"


In [297]:
len(f)

20

In [298]:
f.sort_values(by='LowpH')

Unnamed: 0,Sex,MechVent,LowpH
320,Male,Yes,6.8
9104,Male,Yes,6.8
5035,Male,Yes,6.8
6915,Male,Yes,6.82
6707,Male,Yes,6.83
7723,Male,Yes,6.85
2975,Male,Yes,6.86
517,Male,Yes,6.87
986,Male,Yes,6.87
9827,Male,Yes,6.89


In [35]:
w = db_structure_v2.DBCustomizer(directory_path='datasets\\TOPICC')

In [36]:
w.rename_column('CARDIACPROCEDURES', 'PudID', 'PatientID')

In [37]:
w.dump_customization()

In [25]:
x = [(1, 2), (3, 4)]
for i, j in x:
    print(i)
    print(j)

1
2
3
4
