In [1]:
import pandas as pd
import numpy as np

In [2]:
# Read in and show the the med_full_final_melted.csv file
med_claims = pd.read_csv('Data/med_full_final_melted.csv')
med_claims = med_claims.drop('Unnamed: 0', axis = 1)
med_claims

Unnamed: 0,Member Life ID,Biological Gender,Subscriber Zip Code,Current Procedural Terminology,CPT Modifier 1,CPT Modifier 2,Line Service From Date,Line Service Thru Date,Header Service From Date,Header Service Thru Date,Billed Amount,Diagnosis Type,Diagnosis,Base Code
0,109514,F,20678.0,A7035,NU,,2017-08-10,2017-08-10,2017-08-10,2017-08-10,40.00,Primary Diagnosis Code-ICD10,G4733,G47
1,109514,F,20678.0,99213,,,2017-10-17,2017-10-17,2017-10-17,2017-10-17,121.00,Primary Diagnosis Code-ICD10,J0191,J01
2,109514,F,20678.0,A7034,NU,,2017-11-15,2017-11-15,2017-11-15,2017-11-15,125.00,Primary Diagnosis Code-ICD10,G4733,G47
3,109514,F,20678.0,A7032,NU,,2017-11-15,2017-11-15,2017-11-15,2017-11-15,90.00,Primary Diagnosis Code-ICD10,G4733,G47
4,109514,F,20678.0,A7035,NU,,2017-11-15,2017-11-15,2017-11-15,2017-11-15,40.00,Primary Diagnosis Code-ICD10,G4733,G47
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15711868,25236738,F,21401.0,84439,,,2017-11-20,2017-11-20,2017-11-20,2017-11-20,24.74,Tertiary Diagnosis Code-ICD10,E782,E78
15711869,25236738,F,21401.0,84443,,,2018-11-26,2018-11-26,2018-11-26,2018-11-26,41.50,Tertiary Diagnosis Code-ICD10,E039,E03
15711870,25236738,F,21401.0,36415,,,2018-11-26,2018-11-26,2018-11-26,2018-11-26,6.00,Tertiary Diagnosis Code-ICD10,E039,E03
15711871,2797724,F,22301.0,82728,,,2017-12-02,2017-12-02,2017-12-02,2017-12-02,85.48,Tertiary Diagnosis Code-ICD10,E063,E06


In [3]:
# Create a list of the unique Member Life IDs
unique_ids = np.unique(med_claims['Member Life ID'])

edges_dict = {}

for current_id in unique_ids:
    # Slice the medical claims dataframe to get only the rows where Member Life ID equals the current_id
    current_slice = med_claims.loc[med_claims['Member Life ID'] == current_id]

    current_base_codes = np.sort(np.unique(current_slice['Base Code']))

    # Create a list of edges to include in the edges_dict dictionary.
    current_edges = []
    for node_1 in current_base_codes:
        for node_2 in current_base_codes:
            if node_1 != node_2 and (node_2, node_1) not in current_edges:
                current_edges.append((node_1, node_2))

    # Loop through the current_edges list and update the edges_dict dictionary appropriately.
    for edge in current_edges:
        node_1 = edge[0]
        node_2 = edge[1]

        if edge in edges_dict.keys():
            edges_dict[edge] = edges_dict[edge] + 1
        else:
            edges_dict[edge] = 1


In [4]:
edges_dict

{('B35', 'D22'): 670,
 ('B35', 'D48'): 696,
 ('B35', 'D53'): 40,
 ('B35', 'D61'): 33,
 ('B35', 'D64'): 566,
 ('B35', 'D69'): 162,
 ('B35', 'E04'): 236,
 ('B35', 'E86'): 270,
 ('B35', 'E87'): 469,
 ('B35', 'E88'): 67,
 ('B35', 'H11'): 124,
 ('B35', 'H25'): 928,
 ('B35', 'H35'): 555,
 ('B35', 'H52'): 463,
 ('B35', 'H61'): 449,
 ('B35', 'H90'): 380,
 ('B35', 'I10'): 2590,
 ('B35', 'I11'): 352,
 ('B35', 'I12'): 280,
 ('B35', 'I13'): 123,
 ('B35', 'I20'): 108,
 ('B35', 'I21'): 108,
 ('B35', 'I25'): 794,
 ('B35', 'I42'): 148,
 ('B35', 'I44'): 166,
 ('B35', 'I48'): 460,
 ('B35', 'I50'): 397,
 ('B35', 'I51'): 388,
 ('B35', 'I95'): 235,
 ('B35', 'J18'): 384,
 ('B35', 'J81'): 107,
 ('B35', 'J90'): 230,
 ('B35', 'J96'): 192,
 ('B35', 'J98'): 404,
 ('B35', 'K21'): 985,
 ('B35', 'K57'): 468,
 ('B35', 'L21'): 247,
 ('B35', 'L30'): 634,
 ('B35', 'L53'): 114,
 ('B35', 'L57'): 763,
 ('B35', 'L82'): 887,
 ('B35', 'N17'): 293,
 ('B35', 'N18'): 493,
 ('B35', 'N28'): 376,
 ('B35', 'N40'): 462,
 ('B35', 'R0

In [5]:
# Create a dataframe for the edges and weights and then write a csv
final_edges_df = pd.DataFrame({'Edge' : edges_dict.keys(),'Weight':edges_dict.values()})
final_edges_df['Source'] = final_edges_df['Edge'].str[0]
final_edges_df['Target'] = final_edges_df['Edge'].str[1]

In [6]:
# Load in the code-description pairs
code_description_pairs = pd.read_table('Data/code_description_pairs.txt', header=None)
code_description_pairs = code_description_pairs.rename({0:'Code', 1:'Description'}, axis=1)

In [7]:
# Join in the Source Descriptions
final_edges_df = pd.merge(final_edges_df, code_description_pairs, how = 'left', left_on = 'Source', right_on = 'Code')
final_edges_df = final_edges_df.rename({'Description': 'Source Description'}, axis=1)
final_edges_df = final_edges_df.drop('Code', axis = 1)

In [8]:
# Join in the Target Descriptions
final_edges_df = pd.merge(final_edges_df, code_description_pairs, how = 'left', left_on = 'Target', right_on = 'Code')
final_edges_df = final_edges_df.rename({'Description': 'Target Description'}, axis=1)
final_edges_df = final_edges_df.drop('Code', axis = 1)

In [9]:
# Sort the final_edges_df dataframe
final_edges_df = final_edges_df.sort_values(by = ['Weight', 'Source', 'Target'], ascending = [False, True, True]).reset_index(drop=True)
final_edges_df

Unnamed: 0,Edge,Weight,Source,Target,Source Description,Target Description
0,"(Z00, Z23)",29990,Z00,Z23,General examination and investigation of perso...,Need for immunization against single bacterial...
1,"(Z00, Z01)",17114,Z00,Z01,General examination and investigation of perso...,Other special examinations and investigations ...
2,"(E78, I10)",16020,E78,I10,Disorders of lipoprotein metabolism and other ...,Essential (primary) hypertension
3,"(Z00, Z12)",14834,Z00,Z12,General examination and investigation of perso...,Special screening examination for neoplasms
4,"(Z01, Z23)",14621,Z01,Z23,Other special examinations and investigations ...,Need for immunization against single bacterial...
...,...,...,...,...,...,...
544653,"(Z81, Z96)",1,Z81,Z96,Family history of mental and behavioural disor...,Presence of other functional implants
544654,"(Z82, Z89)",1,Z82,Z89,Family history of certain disabilities and chr...,Acquired absence of limb
544655,"(Z84, Z93)",1,Z84,Z93,Family history of other conditions,Artificial opening status
544656,"(Z84, Z95)",1,Z84,Z95,Family history of other conditions,Presence of cardiac and vascular implants and ...


In [10]:
# Write the final_edges_df to csv
final_edges_df.to_csv('edges.csv')