In [1]:
import pandas as pd
import csv
import os
import time
from collections import defaultdict
import numpy as np
import itertools
import db_structure
import networkx as nx
import matplotlib.pyplot as plt
import utilities as u
from flask import jsonify
import logging
# import exceptions
from pandas.api.types import is_string_dtype
from pandas.api.types import is_numeric_dtype
from web.models import *

logging.basicConfig(format=' %(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
%load_ext autoreload
%aimport -web
%autoreload 2

In [None]:
draw_db = db

G = nx.DiGraph()
G.add_nodes_from(draw_db.table_names)
for table_name in draw_db.table_names:
    for sibling in draw_db.find_table_siblings(table_name):
        G.add_edge(table_name, sibling)
        G.add_edge(sibling, table_name)
    for child in draw_db.find_table_children(table_name):
        G.add_edge(table_name, child)
plt.figure(figsize=(10,10))

#draw with networkx built-ins
nx.draw_networkx(G, node_shape="None", width=0.2)

#OR

#draw using pygraphviz
#A = nx.nx_agraph.to_agraph(G)
#H = nx.nx_agraph.from_agraph(A)
#nx.draw_spring(H, node_shape="None", with_labels=True, width=0.2)

In [8]:
x = db_structure.DBMaker(dataset_name='TOPICC', directory_path='datasets\\TOPICC')
x.create_db_metadata(dump_to_data_db=True)

In [7]:
db_structure.DBDestroyer('TOPICC').remove_db()

In [9]:
y = db_structure.DBLinker(dataset_name='TOPICC')

In [10]:
y.add_global_fk('PudID')

In [11]:
z = db_structure.DBExtractor(dataset_name='TOPICC')

In [12]:
path = ['HOSPITALADMIT', 'CAREPROCESSES', 'DEATH']
start = time.time()
df = z.get_df_from_path(path, table_columns_of_interest=[('HOSPITALADMIT', 'PudID'), ('HOSPITALADMIT', 'Sex'), ('CAREPROCESSES', 'MechVent'), ('DEATH', 'DeathMode')])
end = time.time()
print(end-start)

0.040000200271606445


In [13]:
df

Unnamed: 0,HOSPITALADMIT_PudID,HOSPITALADMIT_Sex,CAREPROCESSES_MechVent,DEATH_DeathMode
0,27,Female,Yes,Withdrawal of care
1,78,Male,Yes,Withdrawal of care
2,125,Female,Yes,Failed resuscitation
3,154,Female,Yes,Failed resuscitation
4,155,Male,Yes,Withdrawal of care
...,...,...,...,...
270,9925,Female,Yes,Brain death
271,9932,Male,No,Withdrawal of care
272,9964,Male,Yes,Brain death
273,9990,Male,No,Withdrawal of care


In [None]:
df['DeathMode'].value_counts()

In [None]:
filters = {
    'AdmitThroughED': {'type': 'list', 'filter': ['Yes', 'No']},
    'Sex': {'type': 'list', 'filter': ['Male', 'Female']}
}
new_df = z.aggregate_df(df, groupby_columns=['AdmitThroughED', 'Sex'], filters=filters)

In [None]:
sorted(new_df.loc[:,'groupby_labels'].unique(), key=lambda x: x.upper())

In [None]:
x, y = pd.cut(new_df['Count'], bins=2)

In [None]:
for i in u.pairwise([1,2]):
    print(i)

In [None]:
'3'.split('.')

In [None]:
str((5, 3))

In [None]:
path = ['HOSPITALADMIT', 'CAREPROCESSES', 'PHYSIOSTATUS']
df = z.get_df_from_path(path, table_columns_of_interest=['HOSPITALADMIT.Sex', 'CAREPROCESSES.MechVent', 'PHYSIOSTATUS.LowpH'])

In [None]:
filters = {
    'MechVent': {'type': 'list', 'filter': ['Yes']},
    'LowpH': {'type': 'range', 'filter': {'min': 6.8, 'max': 6.9, 'bins': 4}},
    'Sex': None
}

z.aggregate_df(df, groupby_columns=['MechVent', 'LowpH'], filters=filters, aggregate_column='Sex')

In [None]:
len(f)

In [None]:
f.sort_values(by='LowpH')

In [None]:
w = db_structure_v2.DBCustomizer(dataset_name='TOPICC')

In [None]:
w.rename_column('HOSPITALADMIT', 'PudID', 'Patient ID')

In [None]:
w.dump_customization()

In [None]:
x = [(1, 2), (3, 4)]
for i, j in x:
    print(i)
    print(j)

In [17]:
x = db_structure.DBMaker(dataset_name='SCA_AcuteCare', sql_server='CYKPANADBSQL', sql_db='SCA_AcuteCare', schema_name='dbo')
x.create_db_metadata(ignore_tables_with_substrings=['MOTempRecsGrp', 'MO_MeasureInfo', 'MO_MedicationDim_55_'])

In [21]:
y = db_structure.DBLinker(dataset_name='SCA_AcuteCare')
for i in y.get_common_column_names():
    if i.upper()[-2:] == 'ID' and i.upper() not in ['GUID']:
        print(i)
        y.add_global_fk(i)

APMInsuranceCarrierDimID
AbnormalityDimID
AccommodationDimID
AccountDimID
AccountTypeDimID
ActionCodedReasonTypeDimID
ActionReasonDimID
ActionTypeDimID
AdmitDateDimID
AdmitReasonDimID
AdmitShiftDimID
AdmitSourceDimID
AdmitTypeDimID
AgeDimID
AgeRangeItemID
AgeRangeSetID
AlertEventTypeDimID
AlertNameDimID
AlertTypeDimID
AllergenDimID
AllergyCategoryTypeDimID
AllergyTypeDimID
AmbulStatusDimID
AncillaryFacilityDimID
ApplicSourceDimID
AppointmentCategoryDimID
ArrivalDateDimID
AuthoredProviderDimID
BillingAddressDimID
BlockDefinitionDimID
CareLevelDimID
CatalogItemTaskGUID
CategoryCodeDimID
ChartGUID
ClientDocumentGUID
ClientGUID
ClientUserDataGUID
ClientVisitGUID
ClinDecSupportDescDimID
ClinDecSupportPriorityDimID
CommentDeclarationTypeDimID
ConfidenceDimID
CostDimID
CurrentLocationGUID
DemographicDimID
DestinationTypeDimID
DiagDimID
DiagTypeDimID
DiagnosisGroupID
DiagnosisID
DictionaryID
DischargeDateDimID
DischargeDispositionDimID
DischargeLocationDimID
DischargeServiceDimID
DischargeShif

In [22]:
z = db_structure.DBExtractor('SCA_AcuteCare')

In [25]:
paths = z.find_paths_between_tables('SCAObservation', 'SCAPatientDim', recursion_depth=5)

In [27]:
sorted(paths, key=lambda x: len(x))[-1]

['SCAObservation',
 'SCQualifiedVisit',
 'SCAQualityMeasure',
 'SCAPatientStudyNewborn2015',
 'SCAPatientGuarantor',
 'SCARacePatientDim',
 'SCAPatientDim']

In [None]:
df = z.get_df_from_path(['SCAObservation', 'SCAVisit', 'SCAPatientDim'], table_columns_of_interest=[('SCAVisit', 'AdmitShiftDimID'), ('SCAPatientDim', 'HasHxAsthma')], limit_rows=100)

In [None]:
df

In [15]:
from web import db
db.session.rollback()