In [1]:
import pandas as pd
import csv
import os
import time
from collections import defaultdict
import numpy as np
import itertools
import db_structure
import networkx as nx
import matplotlib.pyplot as plt
import utilities as u
from flask import jsonify
import logging
# import exceptions
from pandas.api.types import is_string_dtype
from pandas.api.types import is_numeric_dtype
from web.models import *
from web import db

logging.basicConfig(format=' %(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
%load_ext autoreload
%aimport -web
%autoreload 2

In [None]:
dataset_name = 'SCA_AcuteCare'
draw_db = db_structure.DBExtractor(dataset_name)

G = nx.DiGraph()
table_names = [x.table_name for x in db.session.query(TableMetadata).filter(TableMetadata.dataset_name == dataset_name, TableMetadata.num_records >= 10000).all()]
G.add_nodes_from(table_names)
for table_name in table_names:
    for sibling in draw_db.find_table_siblings(table_name):
        if sibling in table_name:
            G.add_edge(table_name, sibling)
            G.add_edge(sibling, table_name)
    for child in draw_db.find_table_children(table_name):
        if child in table_names:
            G.add_edge(table_name, child)
plt.figure(figsize=(10,10))

#draw with networkx built-ins
nx.draw_networkx(G, node_shape="None", width=0.2)

#OR

#draw using pygraphviz
#A = nx.nx_agraph.to_agraph(G)
#H = nx.nx_agraph.from_agraph(A)
#nx.draw_spring(H, node_shape="None", with_labels=True, width=0.2)

In [4]:
x = db_structure.DBMaker(dataset_name='TOPICC', directory_path='datasets\\TOPICC')
x.create_db_metadata(dump_to_data_db=True)

In [3]:
db_structure.DBDestroyer('TOPICC').remove_db()

In [None]:
y = db_structure.DBLinker(dataset_name='TOPICC')

In [None]:
y.add_global_fk('PudID')

In [None]:
z = db_structure.DBExtractor(dataset_name='TOPICC')

In [None]:
path = ['HOSPITALADMIT', 'CAREPROCESSES', 'DEATH']
start = time.time()
df = z.get_df_from_path(path, table_columns_of_interest=[('HOSPITALADMIT', 'PudID'), ('HOSPITALADMIT', 'Sex'), ('CAREPROCESSES', 'MechVent'), ('DEATH', 'DeathMode')])
end = time.time()
print(end-start)

In [None]:
df

In [51]:
x = db_structure.DBMaker(dataset_name='SCA_AcuteCare', sql_server='CYKPANADBSQL', sql_db='SCA_AcuteCare', schema_name='dbo')
x.create_db_metadata(ignore_tables_with_substrings=['MOTempRecsGrp', 'MO_MeasureInfo', 'MO_MedicationDim_55_'])



In [75]:
y = db_structure.DBLinker(dataset_name='SCA_AcuteCare')
for i in y.get_common_column_names():
    if i.upper()[-2:] == 'ID' and i.upper() not in ['GUID']:
        print(i)
        y.add_global_fk(i)

APMInsuranceCarrierDimID
AbnormalityDimID
AccommodationDimID
AccountDimID
AccountTypeDimID
ActionCodedReasonTypeDimID
ActionReasonDimID
ActionTypeDimID
AdmitDateDimID
AdmitReasonDimID
AdmitShiftDimID
AdmitSourceDimID
AdmitTypeDimID
AgeDimID
AgeRangeItemID
AgeRangeSetID
AlertEventTypeDimID
AlertNameDimID
AlertTypeDimID
AllergenDimID
AllergyCategoryTypeDimID
AllergyTypeDimID
AmbulStatusDimID
AncillaryFacilityDimID
ApplicSourceDimID
AppointmentCategoryDimID
ArrivalDateDimID
AuthoredProviderDimID
BillingAddressDimID
BlockDefinitionDimID
CareLevelDimID
CatalogItemTaskGUID
CategoryCodeDimID
ChartGUID
ClientDocumentGUID
ClientGUID
ClientUserDataGUID
ClientVisitGUID
ClinDecSupportDescDimID
ClinDecSupportPriorityDimID
CommentDeclarationTypeDimID
ConfidenceDimID
CostDimID
CurrentLocationGUID
DemographicDimID
DestinationTypeDimID
DiagDimID
DiagTypeDimID
DiagnosisGroupID
DiagnosisID
DictionaryID
DischargeDateDimID
DischargeDispositionDimID
DischargeLocationDimID
DischargeServiceDimID
DischargeShif

In [50]:
db_structure.DBDestroyer('SCA_AcuteCare').remove_db()

In [None]:
z = db_structure.DBExtractor('SCA_AcuteCare')

In [None]:
paths = z.find_paths_between_tables('SCAObservation', 'SCAPatientDim', recursion_depth=5)

In [None]:
sorted(paths, key=lambda x: len(x))[-1]

In [None]:
df = z.get_df_from_path(['SCAObservation', 'SCAVisit', 'SCAPatientDim'], table_columns_of_interest=[('SCAVisit', 'AdmitShiftDimID'), ('SCAPatientDim', 'HasHxAsthma')], limit_rows=100)

In [None]:
df

In [30]:
from web import db
db.session.rollback()

In [60]:
for i in itertools.product(*['1', '2', '3']):
    print(i)

('1', '2', '3')


In [65]:
filter_filters = [['1', '2', '3'], ['a', 'b']]
groupby_label_options = []

for filter_combo in itertools.product(*filter_filters):
    label = ''
    for i in filter_combo:
        label += str(i) + '_'
    label = label[:-1]
    groupby_label_options.append(label)

In [66]:
groupby_label_options

['1_a', '1_b', '2_a', '2_b', '3_a', '3_b']

In [70]:
column_values = [True, False]
next((x for x in column_values if str(x).upper() in ['', 'asdf']), None)

In [73]:
df[~df['vVisGUID'].isnull()]

Unnamed: 0,VisitID,ClientVisitGUID,AdmitDtm,AdmitDateDimID,AdmitShiftDimID,DischargeDtm,DischargeDateDimID,DischargeShiftDimID,DischargeLocationDimID,FirstLocationDimID,...,OrganizationalUnitDimID,vVisGUID,DischargeToLocationDimID,LOSAltDays,GroupPracticeDimID,EDDepartureDtm,EDDepartureDateDimID,PatientReadmittedWithin30Days,DaysToReadmission,ReadmitVisitID


In [74]:
str(None)

[autoreload of db_structure failed: Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\IPython\extensions\autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "C:\ProgramData\Anaconda3\lib\site-packages\IPython\extensions\autoreload.py", line 368, in superreload
    module = reload(module)
  File "C:\ProgramData\Anaconda3\lib\imp.py", line 315, in reload
    return importlib.reload(module)
  File "C:\ProgramData\Anaconda3\lib\importlib\__init__.py", line 166, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 618, in _exec
  File "<frozen importlib._bootstrap_external>", line 674, in exec_module
  File "<frozen importlib._bootstrap_external>", line 781, in get_code
  File "<frozen importlib._bootstrap_external>", line 741, in source_to_code
  File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
  File "C:\Users\SShah50\Documents\Programming\cohort-visualiz

'None'