In [2]:
import subprocess
import glob
import pathlib
import pandas as pd
from pathlib import Path

# Setup

[MDBTools](https://sourceforge.net/projects/mdbtools/) must be installed and the paths set below.

In [None]:
#!sudo apt install -y mdbtools

In [2]:
mdb_tables = '/usr/bin/mdb-tables'
mdb_export = '/usr/bin/mdb-export'

# Clean previous runs

In [6]:
for file in Path('../data/processed/').rglob('*'):
    if file.is_file():
        file.unlink(True)
Path('../data/rambam.sqlite').unlink(True)

# Convert to multiple CSV files

Convert each table in the mdb files to individual CSV files

In [4]:
files = [pathlib.Path(x) for x in glob.glob('../data/raw/HomeHospital/database/*/*.mdb')]
tables = subprocess.check_output([mdb_tables, '-1', files[0]], text=True).splitlines()

In [5]:
output_fp = pathlib.Path('../data/processed/multicsv/filename')

def csv_filepath(table, filepath):    
    name = table + '_' + filepath.with_suffix('.csv').name
    return output_fp.with_name(name)

In [6]:
%%time 
for table in tables:
    for filepath in files:        
        with open(csv_filepath(table, filepath), "w") as outfile:
            subprocess.run([mdb_export, filepath, table], stdout=outfile)

CPU times: user 126 ms, sys: 469 ms, total: 595 ms
Wall time: 14.9 s


# Join CSV files into one file per table

In [7]:
%%time

for table in tables:
    table_files = [pathlib.Path(x) for x in glob.glob(f'../data/processed/multicsv/{table}_*.csv')]
    combined_csv = pd.concat([pd.read_csv(f) for f in table_files ])    
    combined_csv.to_csv(f'../data/processed/{table}.csv', index=False)

CPU times: user 20.1 s, sys: 1.62 s, total: 21.7 s
Wall time: 24.8 s


# Load CSV into a Sqlite database

In [7]:
from sqlalchemy import create_engine
engine = create_engine('sqlite:///../data/rambam.sqlite', echo=False)

In [26]:
for table in tables:
    df = pd.read_csv(f'../data/processed/{table}.csv')

    df.entry_date = pd.to_datetime(df.entry_date)
    df.exit_date = pd.to_datetime(df.exit_date)
    if 'hospitalization_date' in df.columns:
        df.hospitalization_date = pd.to_datetime(df.hospitalization_date)
    if 'first_procedure_date' in df.columns:
        df.first_procedure_date = pd.to_datetime(df.first_procedure_date)
        
    df.to_sql(table, con=engine, if_exists='replace', index=False)

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.


KeyboardInterrupt



## Create tables based on data dictionaries

In [8]:
def to_table(table, data):
    rows = [line for line in data.split('\n') if len(line) > 0 and line[0].isdigit()]
    values = {'code': [], 'name': []}
    for (code, name) in [row.split(' ', 1) for row in rows]:
        values['code'].append(code)
        values['name'].append(name)
    df = pd.DataFrame.from_dict(values)
    df.to_sql(table, con=engine, if_exists='replace', index=False)

In [9]:
table = 'gender'
data = '''
code name
0 Total
1 Male
2 Female
3 Unknown
'''
to_table(table, data)

In [10]:
table = 'entry_exit'
data = '''
code name
0 Total
1 Emergency Internal Medicine Unit
2 Emergency Surgery Unit
3 Emergency Traumatology Unit
4 Emergency Orthopedic Unit
5 Emergency Otorhinolaryngology Unit
6 Emergency Ophthalmology Unit
7 Emergency Psychiatry Unit
8 Emergency Gynecology Unit
9 Pediatric Emergency Unit
10 Pediatric Surgery Emergency Unit
11 Emergency Maternity Unit
10 Pediatric Surgery Emergency Unit
12 Hospitalization
13 Internal Medicine Division
14 Division of Surgery
15 Division of Obstetrics and Gynecology
16 Department of Anesthesiology
17 Pediatrics Department
18 Department of Pediatric Surgery
19 Division of Laboratory Services
20 Oncology Institute
22 Delay
23 Other DRG
24 Clinic
25 Trauma Unit (road accidents)
26 Trauma Unit (industrial accident)
30 Unknown
'''
to_table(table, data)

In [11]:
table = 'entry_group'
data = '''
code name 
0 Total 
1 Emergency Room 
2 Hospital
'''
to_table(table, data)

In [12]:
table = 'hospital_unit'
data = '''
code name 
1 Emergency Internal Medicine Unit 
2 Emergency Surgery Unit 
3 Emergency Traumatology Unit 
4 Emergency Orthopedic Unit 
5 Emergency Otorhinolaryngology Unit 
6 Emergency Ophthalmology Unit 
7 Emergency Psychiatry Unit 
8 Emergency Gynecology Unit 
9 Pediatric Emergency Unit
10 Pediatric Emergency Surgery Unit
11 Emergency Maternity Unit
12 Chemotherapy Outpatient Center
13 Institute of Oncology
14 Chemotherapy Inpatient Oncology Unit
15 Radiotherapy Inpatient Oncology Unit
16 Institute for Immunology, Allergy and AIDS
17 Institute of Gastroenterology
11 Emergency Maternity Unit
18 Intensive Care Unit Gastroenterology
19 Institute of Hematology and Bone Marrow Transplant
20 Pediatric Hematology and Oncology Department2
21 Institute of Hematology and Bone Marrow Transplant1
22 Thrombosis and Hemostasis Unit
23 Department of Intensive and Critical Medicine
24 Department of Neurology
25 Cognitive Neurology Unit
26 EEG Institute
27 Department of Nephrology
28 Hypertension Clinic
29 Hemodialysis Unit
30 Kidney Transplant Unit
31 Institute of Endocrinology, Diabetes and Metabolism
32 Bone and Calcium Metabolism Unit
33 Department of Dermatology
34 Phototherapy Unit
35 Internal Medicine A
36 Intensive Care Unit A
37 Internal Medicine B
38 Intensive Care Unit B
39 Internal Medicine C
40 Intensive Care Unit C
41 Internal Medicine D
42 Intensive Care Unit D
43 Internal Medicine E
44 Intensive Care Unit
45 Internal Medicine Military
46 Department of Cardiology
47 Intensive Cardiac Care Unit
48 Arrhythmia and Electrophysiology (Pacemakers)
49 Congestive Heart Failure Clinic
50 Interventional (Angioplasty) Cardiology
51 Department of Rheumatology
52 Institute of Pulmonology
53 Pain Relief Unit
54 Department of Urology
55 Neuro-Urology Unit
56 Department of Otorhinolaryngology
57 Orthopedics A Department
58 Orthopedics A - Department1
59 Orthopedic Oncology
60 Orthopedics B - Department
61 Orthopedics B - Department1
62 Department of General Surgery A
63 Department of General Surgery B
64 Department of Plastic Surgery
65 Intensive Care Burn Unit
66 Department of Thoracic Surgery
67 Hand Surgery Unit
68 Department of Vascular Surgery and Transplantation
69 Department of Cardiac Surgery
70 Intensive Care of Cardiac Surgery Unit
71 Department of Neurosurgery
72 Intensive Care of Neurosurgery Unit
73 Neuro-Oncology
74 Department of Maternity A
75 Department of Maternity B
76 Department of Newborn Care
77 Labor and Delivery Rooms
78 Department of Gynecology
79 Neonatal Intensive Care Unit (NICU)
80 Onco-Gynecology Unit
81 Endo-Gynecology Unit
82 High Risk Pregnancy Unit
83 Department of Psychiatry and Mental Health
84 Psychopathic Ward 5
85 Psychopathic Ward 6
86 Day Care Psychiatry Unit
87 Psychogeriatric Service
88 Department of Ophthalmology
89 Department of Oral and Maxillofacial Surgery
90 Institute of Endocrinology, Diabetes and Metabolism1
91 Proctology
92 Pathology Laboratory
93 Radiology Laboratory
94 Post-Anesthesia Recovery Unit
95 Ambulatory Operating Room
96 Pre-Operating Room
97 Delay
98 Hospitalization
99 Unit High Risk Pregnancy Unit
100 Department of Plastic Surgery (suture)
101 Emergency Medicine Unit1
102 Department of Pediatrics A
103 Department of Pediatrics B
104 Department of Pediatric Surgery
105 Pediatric Plastic Surgery Unit
106 Pediatric Oncology and Bone Marrow Transplant Unit
107 Pediatric Intensive Care Unit
108 Pediatric Hematology and Oncology Department
109 Pediatric Urology Clinic
110 Pediatric Orthopedics Unit
111 Pediatric Immunology and Allergy
112 Pediatric Endocrinology Clinic
113 Pediatric Otorhinolaryngology Unit
114 Pediatric Gastroenterology and Nutrition Unit
115 Pediatric Hemodialysis Unit
116 Pediatric Metabolism Unit
117 Pediatric Hematology and Oncology Department1
118 Department of Infectious Diseases
119 Pediatric Neurosurgery Unit
120 Department of Pediatric Cardiology
121 Pediatric Rheumatology Clinic
122 Pediatric Pulmonary Unit
123 Child and Adolescent Psychiatry Service
124 Pediatric Nephrology Unit
125 Pediatric Diabetes and Obesity Clinic
126 Pediatric Oral and Maxillofacial Surgery
127 Institute of Breast Health
128 Mammography Unit
129 Trauma Unit (road accidents)
130 Institute for Occupational Health
131 School of Radiology and Imaging
132 Institute of Nuclear Medicine
133 Computed Tomography Unit
134 Ultrasound Unit
135 Ultrasound Unit1
136 Ultrasound Unit2
137 Magnetic Resonance Imaging Unit
138 Oncology Clinic
139 Orthopedic Clinic
140 Orthopedic Clinic6
141 Ilizarov External Fixation Clinic
142 Eating Disorders Clinic
143 Institute of Breast Health (image-guided biopsies)
144 Orthopedic Clinic1
145 Orthopedic Clinic5
146 Orthopedic Clinic4
147 Non-Invasive Cardiology Unit
148 Hemodialysis Clinic
149 Hypertension Clinic1
150 Plastic Surgery Clinic
151 Plastic Surgery Clinic (Burn)
152 Lipid Disorders Clinic
153 Liver Clinic
154 Orthopedic Clinic7
155 Orthopedic Clinic3
156 Pain Relief Clinic
157 Neuro-Urology Clinic
158 Gynecology Clinic
159 Fertility and Sperm Bank Clinic
160 Periodontics and Oral Medicine Clinic
161 Medicinal Plant Clinic (violent patients unit)
162 Dental Clinic 163 Rehabilitation Heart Clinic
164 Institute for Occupational Health1
165 Pediatric Hematology and Oncology Clinic
166 Pediatric Hematology and Oncology Clinic1
167 Pediatric Neurosurgery Clinic
168 Immunology and Tissue Classification Laboratory
169 Endocrinology Laboratory
170 Biochemistry Laboratory
171 Blood Bank Laboratory
172 Sperm Bank Laboratory
173 Urgent Testing Laboratory
174 Toxicology Laboratory
175 Biochemistry Laboratory1
176 Microbiology Laboratory
177 Neurosurgery Laboratory
178 Cytology Laboratory
179 Ministry of Defense (rehabilitation)
180 Institute for Periodic Examinations
181 Invasive Imaging Tests
182 Hydrotherapy Pool (rehabilitative)
183 Orthopedic Clinic2
184 Department of Vascular Surgery and Transplantation1
185 Gynecology Clinic2
186 Department of Clinical Nutrition
187 X-ray Screening
188 Unknown2
189 Neurosurgery
190 Unknown1
191 Bone and Calcium Metabolism Unit1
192 Hearing and Speech Unit
193 Nutrition and Dietary Unit
194 Department of Physical Therapy
195 Bone Marrow Transplant Unit
196 Hemodialysis Clinic1
197 Endodontics and Dental Trauma Unit
198 Gynecology Clinic1
200 Unknown
'''
to_table(table, data)

In [14]:
table = 'outcome_id'
data = '''
code name
1 Emergency Room
2 Hospitalization
3 Removing
4 Beginning Removing
5 Completion Removing
6 Receiving
7 Released (Home)
8 Left on His Own
9 Left with Medical File
10 Deceased
11 Renunciation of Treatments
12 Cancellation
13 Medical Clinic
14 Other Institution
15 Other
20 Unknown
'''
to_table(table, data)

In [15]:
table = 'outcome'
data = '''
code name
0 Total
1 Emergency Room
2 Hospitalization
3 Transfer
4 Released (Home)
5 Left on His Own
6 Deceased
7 Renunciation of Treatments
8 Medical Clinic
9 Other Institution
10 Other
11 Unknown
'''
to_table(table, data)

In [16]:
table = 'department'
data = '''
code name
0 Total
1 Emergency Internal Medicine Unit
2 Emergency Surgery Unit
3 Emergency Traumatology Unit
4 Emergency Orthopedic Unit
5 Emergency Otorhinolaryngology Unit
6 Emergency Ophthalmology Unit
7 Emergency Psychiatry Unit
8 Emergency Gynecology Unit
9 Pediatric Emergency Unit
10 Pediatric Emergency Surgery Unit
11 Emergency Maternity Unit
12 Institute of Oncology
13 Institute for Immunology, Allergy and AIDS
14 Institute of Gastroenterology
15 Institute of Hematology and Bone Marrow Transplant
16 Department of Intensive and Critical Medicine
17 Department of Neurology
18 Department of Nephrology
19 Institute of Endocrinology, Diabetes and Metabolism
20 Department of Dermatology
21 Department of Internal Medicine
22 Intensive Care Unit
23 Department of Cardiology
24 Department of Rheumatology
25 Institute of Pulmonology
26 Pain Relief Unit
27 Department of Urology
28 Department of Otorhinolaryngology
29 Department of Orthopedics
30 Department of General Surgery
31 Department of Plastic Surgery
32 Department of Vascular Surgery and Transplantation
33 Department of Cardiac Surgery
34 Department of Neurosurgery
35 Department of Maternity
36 Department of Newborn Care
37 Department of Gynecology
38 Department of Psychiatry and Mental Health
39 Department of Ophthalmology
40 Department of Oral and Maxillofacial Surgery
41 Pathology Laboratory
42 Department of Anesthesiology
43 Delay
44 Hospitalization
45 Pediatrics Department
46 Pediatric Surgery Department
47 Pediatric Plastic Surgery Unit
48 Pediatric Intensive Care Unit
49 Pediatric Hematology and Oncology Department
50 Pediatric Urology Clinic
51 Pediatric Orthopedics Unit
52 Pediatric Immunology and Allergy
53 Pediatric Endocrinology Clinic
54 Pediatric Otorhinolaryngology Unit
55 Pediatric Gastroenterology and Nutrition Unit
56 Pediatric Hemodialysis Unit
57 Pediatric Metabolism Unit
58 Department of Infectious Diseases
59 Pediatric Neurosurgery Unit
60 Pediatric Cardiology Department
61 Pediatric Rheumatology Clinic
62 Pediatric Pulmonary Unit
63 Pediatric Nephrology Unit
64 Pediatric Diabetes and Obesity Clinic
65 Pediatric Oral and Maxillofacial Surgery
66 Institute of Breast Health
67 Trauma Unit (road accidents)
68 Institute for Occupational Health
69 School of Radiology and Imaging
70 Institute of Nuclear Medicine
71 Oncology Clinic
72 Orthopedic Clinic
73 Eating Disorders Clinic
74 Hemodialysis Clinic
75 Hypertension Clinic
76 Plastic Surgery Clinic
77 Lipid Disorders Clinic
78 Liver Clinic
79 Neuro-urology Clinic
80 Gynecology Clinic
81 Periodontics and Oral Medicine Clinic
82 Rehabilitation Heart Clinic
83 Pediatric Hematology and Oncology Clinic
84 Immunology and Tissue Classification Laboratory
85 Endocrinology Laboratory
86 Biochemistry Laboratory
87 Blood Bank Laboratory
88 Sperm Bank Laboratory
89 Urgent Testing Laboratory
90 Toxicology Laboratory
91 Microbiology Laboratory
92 Cytology Laboratory
93 Institute for Periodic Examinations
94 Department of Clinical Nutrition
95 Hearing and Speech Unit
96 Department of Physical Therapy
99 Unknown
'''
to_table(table, data)

In [17]:
table = 'ward_department'
data = '''
code name
0 Total
1 Internal Medicine A
2 Internal Medicine D
3 Internal Medicine C
4 Internal Medicine E
5 Internal Medicine B
6 Department of General Surgery B
7 Department of Pediatrics A 8 Labor and Delivery Rooms 9 Orthopedics A - Department
10 Department of Pediatrics B
11 Orthopedics B - Department
12 Department of General Surgery A
13 Department of Neurology
14 Department of Intensive and Critical Medicine
15 Intensive Cardiac Care Unit
16 Department of Maternity A
17 Chemotherapy Inpatient Oncology Unit
18 Department of Neurosurgery
19 Psychopathic Ward 5
20 Department of Maternity B
21 Department of Urology
22 Department of Gynecology
23 Pediatric Intensive Care Unit
24 Intensive Care of Neurosurgery Unit
25 Department of Ophthalmology
26 Radiotherapy Inpatient Oncology Unit
27 Department of Nephrology
28 Department of Cardiology
29 Department of Thoracic Surgery
30 Department of Dermatology
31 Department of Vascular Surgery and Transplantation
32 Department of Otorhinolaryngology
33 Pediatric Hematology
34 Department of Plastic Surgery
35 Psychopathic Ward 6
36 Department of Pediatric Surgery
37 Department of Cardiac Surgery
38 Intensive Care Unit A
39 Institute of Hematology and Bone Marrow Transplant
40 Intensive Care Unit D
41 Department of Rheumatology
42 Department of Oral and Maxillofacial Surgery
43 Intensive Care Unit B
44 Pediatric Neurosurgery Unit
45 Intensive Care Unit C
46 Pediatric Oncology
47 Hand Surgery Unit
48 Onco-Gynecology Unit
49 Pediatric Orthopedics Unit
50 Neonatal Intensive Care Unit (NICU)
51 Pediatric Plastic Surgery Unit
52 Intensive Care Burn Unit
53 Pediatric Oral and Maxillofacial Surgery
54 Intensive Care of Cardiac Surgery Unit
55 Department of Newborn Care
56 Pediatric Urology Clinic
57 Pediatric Otorhinolaryngology Unit
58 Pain Relief Unit
59 Pediatric Oncology and Bone Marrow Transplant Unit
60 Department of Pediatric Cardiology
61 Interventional (Angioplasty) Cardiology
62 Intensive Care Unit
63 Day Care Psychiatry Unit
65 Other
'''
to_table(table, data)

In [18]:
table = 'intensive_care_unit'
data = '''
code name
0 Total
1 Intensive Care Unit Gastroenterology
2 Department of Intensive and Critical Medicine
3 Intensive Care Unit A
4 Intensive Care Unit B
5 Intensive Care Unit C
6 Intensive Care Unit D
7 Intensive Care Unit
8 Intensive Cardiac Care Unit
9 Intensive Care Burn Unit
10 Intensive Care of Cardiac Surgery Unit
11 Intensive Care of Neurosurgery Unit
12 Neonatal Intensive Care Unit (NICU)
13 Pediatric Intensive Care Unit
14 Other
'''
to_table(table, data)

In [19]:
table = 'pediatric_unit'
data = '''
code name
0 Total
1 Pediatric Hematology and Oncology Department2
2 Department of Pediatrics A
3 Department of Pediatrics B
4 Department of Pediatric Surgery
5 Pediatric Plastic Surgery Unit
6 Pediatric Oncology and Bone Marrow Transplant Unit
7 Pediatric Intensive Care Unit
8 Pediatric Hematology and Oncology Department
9 Pediatric Urology Clinic
10 Pediatric Orthopedics Unit
11 Pediatric Immunology and Allergy
12 Pediatric Endocrinology Clinic
13 Pediatric Otorhinolaryngology Unit
14 Pediatric Gastroenterology and Nutrition Unit
15 Pediatric Hemodialysis Unit
16 Pediatric Metabolism Unit
17 Pediatric Hematology and Oncology Department1
18 Pediatric Neurosurgery Unit
19 Department of Pediatric Cardiology
20 Pediatric Rheumatology Clinic
21 Pediatric Pulmonary Unit
22 Pediatric Nephrology Unit
23 Pediatric Diabetes and Obesity Clinic
24 Pediatric Oral and Maxillofacial Surgery
25 Pediatric Hematology and Oncology Clinic
26 Pediatric Hematology and Oncology Clinic1
27 Pediatric Neurosurgery Clinic
28 Other
'''
to_table(table, data)

In [20]:
table = 'seg_type'
data = '''
code name
1 segment start
2 segment start and end
3 segment end
4 middle segment
'''
to_table(table, data)