In [26]:
import pandas as pd
import csv
import os
import time
from collections import defaultdict
import numpy as np
import itertools
import db_structure
import networkx as nx
import matplotlib.pyplot as plt
import utilities as u
from flask import jsonify
import logging
# import exceptions
from pandas.api.types import is_string_dtype
from pandas.api.types import is_numeric_dtype
from web.models import *
from web import db
import graph

logging.basicConfig(format=' %(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
%load_ext autoreload
%aimport -web
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
dataset_name = 'SCA_AcuteCare'
draw_db = db_structure.DBExtractor(dataset_name)

G = nx.DiGraph()
table_names = [x.table_name for x in db.session.query(TableMetadata).filter(TableMetadata.dataset_name == dataset_name, TableMetadata.num_records >= 10000).all()]
G.add_nodes_from(table_names)
for table_name in table_names:
    for sibling in draw_db.find_table_siblings(table_name):
        if sibling in table_name:
            G.add_edge(table_name, sibling)
            G.add_edge(sibling, table_name)
    for child in draw_db.find_table_children(table_name):
        if child in table_names:
            G.add_edge(table_name, child)
plt.figure(figsize=(10,10))

#draw with networkx built-ins
nx.draw_networkx(G, node_shape="None", width=0.2)

#OR

#draw using pygraphviz
#A = nx.nx_agraph.to_agraph(G)
#H = nx.nx_agraph.from_agraph(A)
#nx.draw_spring(H, node_shape="None", with_labels=True, width=0.2)

In [None]:
x = db_structure.DBMaker(dataset_name='TOPICC', directory_path='datasets\\TOPICC')
x.create_db_metadata(dump_to_data_db=True)

In [None]:
db_structure.DBDestroyer('TOPICC').remove_db()

In [None]:
y = db_structure.DBLinker(dataset_name='TOPICC')

In [None]:
y.add_global_fk('PudID')

In [None]:
z = db_structure.DBExtractor(dataset_name='TOPICC')

In [None]:
path = ['HOSPITALADMIT', 'CAREPROCESSES', 'DEATH']
start = time.time()
df = z.get_df_from_path(path, table_columns_of_interest=[('HOSPITALADMIT', 'PudID'), ('HOSPITALADMIT', 'Sex'), ('CAREPROCESSES', 'MechVent'), ('DEATH', 'DeathMode')])
end = time.time()
print(end-start)

In [None]:
df

In [22]:
x = db_structure.DBMaker(dataset_name='SCA_AcuteCare', sql_server='CYKPANADBSQL', sql_db='SCA_AcuteCare', schema_name='dbo')
x.create_db_metadata(ignore_tables_with_substrings=['MOTempRecsGrp', 'MO_MeasureInfo', 'MO_MedicationDim_55_'])



In [None]:
y = db_structure.DBLinker(dataset_name='SCA_AcuteCare')
for i in y.get_common_column_names():
    if i.upper()[-2:] == 'ID' and i.upper() not in ['GUID']:
        print(i)
        y.add_global_fk(i)

In [21]:
db_structure.DBDestroyer('SCA_AcuteCare').remove_db()

In [None]:
z = db_structure.DBExtractor('SCA_AcuteCare')

In [None]:
paths = [['BF_FoleyTimes', 'SCAVisit']]
table_columns_of_interest = [('BF_FoleyTimes', 'FoleyDays'), ('SCAVisit', 'IsCHF')]
filters_with_name_keys = {'SCAVisit_IsCHF': {'filter': ['0', '1'], 'type': 'bool'}}
groupby_columns = ['SCAVisit_IsCHF']
aggregate_column = 'BF_FoleyTimes_FoleyDays'
aggregate_fxn = 'Mean'
df = z.get_biggest_df_from_paths(paths, table_columns_of_interest, limit_rows=10000)

In [None]:
agg_df = z.aggregate_df(df, groupby_columns, filters_with_name_keys, aggregate_column, aggregate_fxn)

In [None]:
agg_df

In [None]:
import math
math.ceil(50/100*25)

In [18]:
from web import db
db.session.rollback()

In [5]:
query = f"SELECT tbl.name, MAX(CAST(p.rows AS int)) AS rows FROM sys.tables AS tbl INNER JOIN sys.indexes AS idx ON idx.object_id = tbl.object_id and idx.index_id < 2 INNER JOIN sys.partitions AS p ON p.object_id=CAST(tbl.object_id AS int) AND p.index_id=idx.index_id WHERE (SCHEMA_NAME(tbl.schema_id)='dbo') GROUP BY tbl.name"
num_rows_df = db_structure.execute_sql_query(query=query, sql_server='CYKPANADBSQL', sql_db='SCA_AcuteCare')

In [14]:
import math
min_rows=1000
max_rows=10000
analyze_percentage=50
num_rows_in_db = num_rows_df[num_rows_df['name'] == 'CV3ClientVisit_Test'].iloc[0]['rows']
by_percentage = math.ceil(analyze_percentage / 100 * num_rows_in_db)
if by_percentage < min_rows:
    num_rows = min_rows
elif by_percentage > max_rows:
    num_rows = max_rows
else:
    num_rows = by_percentage

In [16]:
num_rows

4915

In [15]:
num_rows_in_db

9829

In [85]:
g = graph.Graph()
g.add_node('A')
g.add_node('B')
g.add_node('C')
g.add_node('D')
g.add_node('E')
g.add_node('F')
g.add_edge('A', 'C')
g.add_edge('A', 'D')
g.add_edge('A', 'B')
g.add_edge('B', 'A')
g.add_edge('B', 'E')
g.add_edge('C', 'D')
g.add_edge('C', 'F')
g.add_edge('D', 'C')
g.add_edge('E', 'F')
print(g.get_node_parents('A'))  # []
print(g.get_node_parents('B'))  # []
print(g.get_node_parents('C'))  # ['A']
print(g.get_node_children('A')) # ['C']
print(g.get_node_children('B')) # []
print(g.get_node_children('C')) # []
print(g.get_node_siblings('A')) # ['B']
print(g.get_node_siblings('B')) # ['A']
print(g.get_node_siblings('C')) # []
print(g.find_paths_between_nodes('A', 'F'))

[]
[]
['A']
['C', 'D']
['E']
['F']
['B']
['A']
['D']
[['A', 'C', 'F'], ['A', 'D', 'C', 'F'], ['A', 'B', 'E', 'F']]


In [71]:
g.nodes['A'].edges

[autoreload of graph failed: Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\IPython\extensions\autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "C:\ProgramData\Anaconda3\lib\site-packages\IPython\extensions\autoreload.py", line 368, in superreload
    module = reload(module)
  File "C:\ProgramData\Anaconda3\lib\imp.py", line 315, in reload
    return importlib.reload(module)
  File "C:\ProgramData\Anaconda3\lib\importlib\__init__.py", line 166, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 618, in _exec
  File "<frozen importlib._bootstrap_external>", line 674, in exec_module
  File "<frozen importlib._bootstrap_external>", line 781, in get_code
  File "<frozen importlib._bootstrap_external>", line 741, in source_to_code
  File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
  File "C:\Users\SShah50\Documents\Programming\cohort-visualization\g

{'B': <graph.Edge at 0x1438c208>, 'C': <graph.Edge at 0x1438c630>}