In [46]:
import csv
import os
import sys
import sqlite3


# the files to read
mimic_files = ('DIAGNOSES_ICD_DATA_TABLE.csv',
               'D_ICD_PROCEDURES_DATA_TABLE.csv',
               'PATIENTS_DATA_TABLE.csv',
               'D_ICD_DIAGNOSES_DATA_TABLE.csv', 
               'NOTEEVENTS_DATA_TABLE.csv')

for mf in mimic_files:
    file = os.path.join(os.path.expanduser('~'), 'Box Sync', 'GradSchoolStuff', 'MastersProject', 'mimic3', mf)

    if not (os.path.exists(file)):
        print("Specified file does not exist")
        sys.exit()

    #spamReader = csv.reader(open('eggs.csv', newline=''), delimiter=' ', quotechar='|')
    csvReader = csv.reader(open(file, newline=''))
    header = next(csvReader)
    print('Columns read from ', mf, ':', header)

    table_name = mf.replace('_DATA_TABLE.csv', '')
    print('Loading to ', table_name)
    
    value_placeholder = '' 
    for c in range(len(header)):
        value_placeholder += '?,'
    value_placeholder = value_placeholder[:-1]
    
    # load each line from CSV into appropriate table
    connection = sqlite3.connect(sqlitedb)
    with connection:
        for row in csvReader:
            cursor = connection.cursor()
            cursor.execute('insert into ' + table_name + ' values (' + value_placeholder + ')', row)

Columns read from  DIAGNOSES_ICD_DATA_TABLE.csv : ['ROW_ID', 'SUBJECT_ID', 'HADM_ID', 'SEQ_NUM', 'ICD9_CODE']
Loading to  DIAGNOSES_ICD
Columns read from  D_ICD_PROCEDURES_DATA_TABLE.csv : ['ROW_ID', 'ICD9_CODE', 'SHORT_TITLE', 'LONG_TITLE']
Loading to  D_ICD_PROCEDURES
Columns read from  PATIENTS_DATA_TABLE.csv : ['ROW_ID', 'SUBJECT_ID', 'GENDER', 'DOB', 'DOD', 'DOD_HOSP', 'DOD_SSN', 'EXPIRE_FLAG']
Loading to  PATIENTS
Columns read from  D_ICD_DIAGNOSES_DATA_TABLE.csv : ['ROW_ID', 'ICD9_CODE', 'SHORT_TITLE', 'LONG_TITLE']
Loading to  D_ICD_DIAGNOSES
Columns read from  NOTEEVENTS_DATA_TABLE.csv : ['ROW_ID', 'SUBJECT_ID', 'HADM_ID', 'CHARTDATE', 'CHARTTIME', 'STORETIME', 'CATEGORY', 'DESCRIPTION', 'CGID', 'ISERROR', 'TEXT']
Loading to  NOTEEVENTS


In [12]:
sqlitedb = os.path.join(os.path.expanduser('~'),'Box Sync', 'GradSchoolStuff', 'MastersProject', 'mimic3', 'mimic3.sqlite')
if (os.path.exists(sqlitedb)):
    print("Database already exists")
    sys.exit()

In [14]:
# using sql from https://github.com/MIT-LCP/mimic-code/blob/master/buildmimic/postgres/postgres_create_tables.sql
connection = sqlite3.connect(sqlitedb)
with connection:
    cursor = connection.cursor()
    cursor.execute('''CREATE TABLE DIAGNOSES_ICD
   (	ROW_ID INT NOT NULL,
	SUBJECT_ID INT NOT NULL,
	HADM_ID INT NOT NULL,
	SEQ_NUM INT,
	ICD9_CODE VARCHAR(20),
	CONSTRAINT diagnosesicd_rowid_pk PRIMARY KEY (ROW_ID)
   )''')


In [17]:
connection = sqlite3.connect(sqlitedb)
with connection:
    cursor = connection.cursor()
    cursor.execute('''CREATE TABLE D_ICD_PROCEDURES
   (	ROW_ID INT NOT NULL,
	ICD9_CODE VARCHAR(10) NOT NULL,
	SHORT_TITLE VARCHAR(50) NOT NULL,
	LONG_TITLE VARCHAR(255) NOT NULL,
    	CONSTRAINT d_icd_proc_code_unique UNIQUE (ICD9_CODE),
    	CONSTRAINT d_icd_proc_rowid_pk PRIMARY KEY (ROW_ID)
   )''')

In [19]:
connection = sqlite3.connect(sqlitedb)
with connection:
    cursor = connection.cursor()
    cursor.execute('''CREATE TABLE PATIENTS
   (	ROW_ID INT NOT NULL,
	SUBJECT_ID INT NOT NULL,
	GENDER VARCHAR(5) NOT NULL,
	DOB TIMESTAMP(0) NOT NULL,
	DOD TIMESTAMP(0),
	DOD_HOSP TIMESTAMP(0),
	DOD_SSN TIMESTAMP(0),
	EXPIRE_FLAG INT NOT NULL,
    	CONSTRAINT pat_subid_unique UNIQUE (SUBJECT_ID),
    	CONSTRAINT pat_rowid_pk PRIMARY KEY (ROW_ID)
   )''')

In [20]:
connection = sqlite3.connect(sqlitedb)
with connection:
    cursor = connection.cursor()
    cursor.execute('''CREATE TABLE D_ICD_DIAGNOSES
   (	ROW_ID INT NOT NULL,
	ICD9_CODE VARCHAR(10) NOT NULL,
	SHORT_TITLE VARCHAR(50) NOT NULL,
	LONG_TITLE VARCHAR(255) NOT NULL,
    	CONSTRAINT d_icd_diag_code_unique UNIQUE (ICD9_CODE),
    	CONSTRAINT d_icd_diag_rowid_pk PRIMARY KEY (ROW_ID)
   )''')

In [21]:
connection = sqlite3.connect(sqlitedb)
with connection:
    cursor = connection.cursor()
    cursor.execute('''CREATE TABLE NOTEEVENTS
   (	ROW_ID INT NOT NULL,
	SUBJECT_ID INT NOT NULL,
	HADM_ID INT,
	CHARTDATE TIMESTAMP(0),
	CHARTTIME TIMESTAMP(0),
	STORETIME TIMESTAMP(0),
	CATEGORY VARCHAR(50),
	DESCRIPTION VARCHAR(255),
	CGID INT,
	ISERROR CHAR(1),
	TEXT TEXT,
	CONSTRAINT noteevents_rowid_pk PRIMARY KEY (ROW_ID)
   )''')

In [22]:
connection = sqlite3.connect(sqlitedb)
with connection:
    cursor = connection.cursor()
    cursor.execute('select * from sqlite_master')
    row = cursor.fetchone()
    while row:
        print(row)
        row = cursor.fetchone()

('table', 'DIAGNOSES_ICD', 'DIAGNOSES_ICD', 2, 'CREATE TABLE DIAGNOSES_ICD\n   (\tROW_ID INT NOT NULL,\n\tSUBJECT_ID INT NOT NULL,\n\tHADM_ID INT NOT NULL,\n\tSEQ_NUM INT,\n\tICD9_CODE VARCHAR(20),\n\tCONSTRAINT diagnosesicd_rowid_pk PRIMARY KEY (ROW_ID)\n   )')
('index', 'sqlite_autoindex_DIAGNOSES_ICD_1', 'DIAGNOSES_ICD', 3, None)
('table', 'D_ICD_PROCEDURES', 'D_ICD_PROCEDURES', 4, 'CREATE TABLE D_ICD_PROCEDURES\n   (\tROW_ID INT NOT NULL,\n\tICD9_CODE VARCHAR(10) NOT NULL,\n\tSHORT_TITLE VARCHAR(50) NOT NULL,\n\tLONG_TITLE VARCHAR(255) NOT NULL,\n    \tCONSTRAINT d_icd_proc_code_unique UNIQUE (ICD9_CODE),\n    \tCONSTRAINT d_icd_proc_rowid_pk PRIMARY KEY (ROW_ID)\n   )')
('index', 'sqlite_autoindex_D_ICD_PROCEDURES_1', 'D_ICD_PROCEDURES', 5, None)
('index', 'sqlite_autoindex_D_ICD_PROCEDURES_2', 'D_ICD_PROCEDURES', 6, None)
('table', 'PATIENTS', 'PATIENTS', 7, 'CREATE TABLE PATIENTS\n   (\tROW_ID INT NOT NULL,\n\tSUBJECT_ID INT NOT NULL,\n\tGENDER VARCHAR(5) NOT NULL,\n\tDOB TIMEST