In [7]:
import pandas as pd
import csv
import os
import time
from collections import defaultdict
import numpy as np
import itertools
import db_structure
import db_structure_v2
import networkx as nx
from imp import reload
import matplotlib.pyplot as plt
import utilities as u
from flask import jsonify
import logging
# import exceptions
from pandas.api.types import is_string_dtype
from pandas.api.types import is_numeric_dtype

logging.basicConfig(format=' %(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
%load_ext autoreload
%aimport -web
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
draw_db = db

G = nx.DiGraph()
G.add_nodes_from(draw_db.table_names)
for table_name in draw_db.table_names:
    for sibling in draw_db.find_table_siblings(table_name):
        G.add_edge(table_name, sibling)
        G.add_edge(sibling, table_name)
    for child in draw_db.find_table_children(table_name):
        G.add_edge(table_name, child)
plt.figure(figsize=(10,10))

#draw with networkx built-ins
nx.draw_networkx(G, node_shape="None", width=0.2)

#OR

#draw using pygraphviz
#A = nx.nx_agraph.to_agraph(G)
#H = nx.nx_agraph.from_agraph(A)
#nx.draw_spring(H, node_shape="None", with_labels=True, width=0.2)

In [150]:
x = db_structure_v2.DBMaker(directory_path='datasets\\TOPICC')

In [151]:
x.create_db(overwrite=True)

 2019-12-17 13:51:43,159 - INFO - Writing CARDIACPROCEDURES to db
 2019-12-17 13:51:44,634 - INFO - Writing CARDIACSURGERY to db
 2019-12-17 13:51:45,263 - INFO - Writing CAREPROCESSES to db
 2019-12-17 13:51:46,342 - INFO - Writing CATASTROPHICEVENTS to db
 2019-12-17 13:51:46,656 - INFO - Writing CPR to db
 2019-12-17 13:51:47,021 - INFO - Writing DEATH to db
 2019-12-17 13:51:47,562 - INFO - Writing HOSPITALADMIT to db
 2019-12-17 13:51:49,261 - INFO - Writing HOSPITALADMIT_CE to db
 2019-12-17 13:51:49,696 - INFO - Writing HOSPITALDISCHARGE to db
 2019-12-17 13:51:51,045 - INFO - Writing LIMITOFCARE to db
 2019-12-17 13:51:51,511 - INFO - Writing PHYSIOSTATUS to db
 2019-12-17 13:51:53,854 - INFO - Writing PICUADMIT to db
 2019-12-17 13:51:55,175 - INFO - Writing PICUADMIT_ADMITSECONDDX to db
 2019-12-17 13:51:55,802 - INFO - Writing PICUADMIT_AHD to db
 2019-12-17 13:51:56,174 - INFO - Writing PICUADMIT_CHD to db
 2019-12-17 13:51:56,622 - INFO - Writing PICUADMIT_CHRONICDX to db


In [157]:
y = db_structure_v2.DBLinker(directory_path='datasets\\TOPICC')

In [158]:
y.add_global_fk('PudID')

Exception: Linking has been finalized. Delete the .metadata file if you want to re-do it

In [None]:
y.finalize()

In [198]:
z = db_structure_v2.DBExtractor(directory_path='datasets\\TOPICC')

In [166]:
z.find_paths_between_tables('CPR', 'CARDIACPROCEDURES')

[]

In [171]:
z.find_paths_multi_tables(['HOSPITALADMIT', 'DEATH', 'CPR'])

[['CPR', 'HOSPITALADMIT', 'DEATH'], ['CPR', 'DEATH', 'HOSPITALADMIT']]

In [239]:
path = ['HOSPITALADMIT', 'CAREPROCESSES']
start = time.time()
df = z.get_df_from_path(path, table_columns_of_interest=['HOSPITALADMIT.AdmitThroughED', 'HOSPITALADMIT.Sex', 'CAREPROCESSES.MechVent'])
end = time.time()
print(end-start)

 2019-12-17 15:27:55,869 - INFO - SELECT HOSPITALADMIT.AdmitThroughED, HOSPITALADMIT.Sex, CAREPROCESSES.MechVent FROM HOSPITALADMIT JOIN CAREPROCESSES ON HOSPITALADMIT.PudID = CAREPROCESSES.PudID 


0.09100008010864258


In [240]:
df

Unnamed: 0,AdmitThroughED,Sex,MechVent
0,No,Male,No
1,Yes,Female,No
2,Yes,Male,No
3,Yes,Male,No
4,Yes,Female,Yes
...,...,...,...
10073,No,Male,No
10074,No,Female,No
10075,No,Male,Yes
10076,No,Male,No


In [251]:
filters = {
    'AdmitThroughED': {'type': 'list', 'filter': ['Yes', 'No']},
    'Sex': {'type': 'list', 'filter': ['Male', 'Female']}
}
new_df = z.aggregate_df(df, groupby_columns=['AdmitThroughED', 'Sex'], filters=filters, aggregate_column='MechVent', aggregate_fxn='Count')

 2019-12-17 15:31:23,948 - DEBUG - Aggregate by ['AdmitThroughED', 'Sex']
 2019-12-17 15:31:23,950 - DEBUG - Aggregate for MechVent


In [253]:
new_df.drop(columns='MechVent')

KeyError: "['MechVent'] not found in axis"

In [257]:
new_df.index

Int64Index([3, 2, 1, 0], dtype='int64')

In [258]:
new_df.columns

Index(['No', 'Yes', 'groupby_labels'], dtype='object', name='MechVent')