In [None]:
import os
import sys
from google.colab import drive
import dill
import numpy as np

drive.mount('/content/gdrive')
path = "/content/gdrive/My Drive/GAMENet/code_/data"
os.chdir(path)

print("Current working directory:", os.getcwd())

# use pre-installed pacakages
sys.path.append("/content/gdrive/MyDrive/GAMENet/colab_env/lib/python3.10/site-packages")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
Current working directory: /content/gdrive/.shortcut-targets-by-id/1HvUJwbm1gmi_iRV21oB-A5JZClBwM9Eb/GAMENet/code_/data


In [None]:
# we try to use pre-installed pacakages using virtualenv, but still struggle debugging now.
!pip install dill

Collecting dill
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/116.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━[0m [32m61.4/116.3 kB[0m [31m1.7 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: dill
Successfully installed dill-0.3.8


# EHR

In [None]:
'''
INPUT
'''
# MIMIC-III (https://mimic.physionet.org/gettingstarted/dbsetup/)
MED_FILE        = './PRESCRIPTIONS.csv'
DIAG_FILE       = './DIAGNOSES_ICD.csv'
PROCEDURE_FILE  = './PROCEDURES_ICD.csv'
# drug code mapping files
ndc2atc_file    = './ndc2atc_level4.csv'
ndc2rxnorm_file = './ndc2rxnorm_mapping.txt'

'''
OUPTUT
'''
EHR_FINAL     = './ehr_final.pkl'    # EHR ([patient_id, admission_id, list<diag>, list<med>, list<proc>, len(med)])
RECORD_FINAL  = './records_final.pkl'  # vectorized EHR
VOC_FINAL     = './voc_final.pkl'     # {'diag_voc':diag_voc, 'med_voc':med_voc,'pro_voc':pro_voc}

In [None]:
ehr_1 =  dill.load(open('./ehr_adj_final.pkl', 'rb'))
ehr_2 =  dill.load(open('./bak_20240418/ehr_adj_final.pkl', 'rb'))

In [None]:
ehr_1

array([[0., 1., 1., ..., 1., 1., 1.],
       [1., 0., 1., ..., 1., 1., 1.],
       [1., 1., 0., ..., 1., 1., 1.],
       ...,
       [1., 1., 1., ..., 0., 0., 0.],
       [1., 1., 1., ..., 0., 0., 0.],
       [1., 1., 1., ..., 0., 0., 0.]])

In [None]:
ehr_2

array([[0., 1., 1., ..., 1., 1., 1.],
       [1., 0., 1., ..., 1., 1., 1.],
       [1., 1., 0., ..., 1., 1., 1.],
       ...,
       [1., 1., 1., ..., 0., 0., 0.],
       [1., 1., 1., ..., 0., 0., 0.],
       [1., 1., 1., ..., 0., 0., 0.]])

In [None]:
(ehr_1 == ehr_2)

array([[ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       ...,
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True]])

In [None]:
non_true_indices = np.where((ehr_1 == ehr_2) != True)
non_true_indices

(array([], dtype=int64), array([], dtype=int64))

In [33]:
np.count_nonzero(ehr_1 == 1.0)

14078

# DDI

In [None]:
'''
INPUT
'''
# EHR
VOC_FINAL     = './voc_final.pkl'
RECORD_FINAL  = './records_final.pkl'
# drug-drug interactions (https://www.dropbox.com/s/8os4pd2zmp2jemd/drug-DDI.csv?dl=0)
DDI_FILE      = './drug-DDI.csv'
CID_ATC       = './drug-atc.csv' # atc -> cid
TOPK          = 40 # topk drug-drug interaction

'''
OUPTUT
'''
EHR_ADJ_FINAL = './ehr_adj_final.pkl' # EHR adjacent matrix
DDI_ADJ_FINAL = './ddi_A_final.pkl'   # DDI adjacent matrix

In [None]:
ddi_1 =  dill.load(open('./ddi_A_final.pkl', 'rb'))
ddi_2 =  dill.load(open('./bak_20240418/ddi_A_final.pkl', 'rb'))

In [None]:
ddi_1

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [None]:
np.count_nonzero(ddi_1 == 1.0)

372

In [34]:
ddi_1.shape

(145, 145)

In [None]:
ddi_2

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [None]:
np.count_nonzero(ddi_2 == 1.0)

404

In [None]:
ddi_1 == ddi_2

array([[ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       ...,
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True]])

In [None]:
non_true_indices_ = np.where((ddi_1 == ddi_2) != True)
non_true_indices_

(array([  2,  11,  11,  11,  23,  25,  26,  26,  26,  27,  30,  30,  35,
         38,  38,  41,  41,  41,  43,  43,  43,  56,  56,  58,  58,  58,
         58,  58,  60,  60,  60,  60,  60,  64,  64,  64,  67,  69,  69,
         70,  70,  85,  85,  85,  85,  85,  93,  96,  96,  98, 101, 101,
        114, 124, 133, 133]),
 array([ 67,  26,  69,  96, 114,  35,  11,  56,  96,  56, 101, 133,  25,
         98, 124,  58,  60,  85,  58,  60,  85,  26,  27,  41,  43,  60,
         64,  85,  41,  43,  58,  64,  85,  58,  60,  85,   2,  11,  93,
        101, 133,  41,  43,  58,  60,  64,  69,  11,  26,  38,  30,  70,
         23,  38,  30,  70]))

In [None]:
from scipy import sparse

In [None]:
s_ddi_adj_1 = sparse.csr_matrix(ddi_1)
s_ddi_adj_2 = sparse.csr_matrix(ddi_2)

In [None]:
print(s_ddi_adj_1)

  (0, 80)	1.0
  (1, 7)	1.0
  (1, 29)	1.0
  (1, 30)	1.0
  (1, 31)	1.0
  (1, 32)	1.0
  (1, 42)	1.0
  (1, 43)	1.0
  (1, 44)	1.0
  (1, 49)	1.0
  (1, 64)	1.0
  (1, 69)	1.0
  (1, 76)	1.0
  (1, 80)	1.0
  (1, 98)	1.0
  (1, 100)	1.0
  (1, 127)	1.0
  (2, 25)	1.0
  (2, 34)	1.0
  (2, 67)	1.0
  (2, 114)	1.0
  (7, 1)	1.0
  (7, 31)	1.0
  (7, 49)	1.0
  (7, 64)	1.0
  :	:
  (127, 7)	1.0
  (127, 29)	1.0
  (127, 42)	1.0
  (127, 43)	1.0
  (127, 44)	1.0
  (127, 69)	1.0
  (129, 26)	1.0
  (129, 30)	1.0
  (130, 12)	1.0
  (131, 32)	1.0
  (131, 52)	1.0
  (131, 55)	1.0
  (131, 83)	1.0
  (131, 142)	1.0
  (134, 26)	1.0
  (134, 27)	1.0
  (134, 41)	1.0
  (134, 85)	1.0
  (142, 8)	1.0
  (142, 32)	1.0
  (142, 55)	1.0
  (142, 60)	1.0
  (142, 71)	1.0
  (142, 84)	1.0
  (142, 131)	1.0


In [None]:
print(s_ddi_adj_2)

  (0, 80)	1.0
  (1, 7)	1.0
  (1, 29)	1.0
  (1, 30)	1.0
  (1, 31)	1.0
  (1, 32)	1.0
  (1, 42)	1.0
  (1, 43)	1.0
  (1, 44)	1.0
  (1, 49)	1.0
  (1, 64)	1.0
  (1, 69)	1.0
  (1, 76)	1.0
  (1, 80)	1.0
  (1, 98)	1.0
  (1, 100)	1.0
  (1, 127)	1.0
  (2, 25)	1.0
  (2, 34)	1.0
  (2, 114)	1.0
  (7, 1)	1.0
  (7, 31)	1.0
  (7, 49)	1.0
  (7, 64)	1.0
  (7, 100)	1.0
  :	:
  (127, 42)	1.0
  (127, 43)	1.0
  (127, 44)	1.0
  (127, 69)	1.0
  (129, 26)	1.0
  (129, 30)	1.0
  (130, 12)	1.0
  (131, 32)	1.0
  (131, 52)	1.0
  (131, 55)	1.0
  (131, 83)	1.0
  (131, 142)	1.0
  (133, 30)	1.0
  (133, 70)	1.0
  (134, 26)	1.0
  (134, 27)	1.0
  (134, 41)	1.0
  (134, 85)	1.0
  (142, 8)	1.0
  (142, 32)	1.0
  (142, 55)	1.0
  (142, 60)	1.0
  (142, 71)	1.0
  (142, 84)	1.0
  (142, 131)	1.0
