# Data extraction

### Find diagnosis codes with diabetes diagnosis

In [1]:
url <- "https://physionet.org/static/published-projects/mimiciii-demo/mimic-iii-clinical-database-demo-1.4.zip"

download.file(url, dest = "dataset.zip", mode="wb") 
unzip ("dataset.zip", exdir = "./")

file.remove("dataset.zip")

In [2]:
library(dplyr)
library(tidyr)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [3]:
diagnoses_codes <- read.csv("mimic-iii-clinical-database-demo-1.4/D_ICD_DIAGNOSES.csv", stringsAsFactors = FALSE)
diagnoses_codes <- diagnoses_codes[startsWith(diagnoses_codes$short_title, "DMI"), ]
diabetes_codes <- diagnoses_codes$icd9_code

rm(diagnoses_codes)

diabetes_codes

### Find all subject_ids (patient id), who have diabetes

In [4]:
diagnoses <- read.csv("mimic-iii-clinical-database-demo-1.4/DIAGNOSES_ICD.csv", stringsAsFactors = FALSE)
diagnoses <- diagnoses[diagnoses$icd9_code %in% diabetes_codes, ]
diagnosed <- diagnoses$subject_id

rm(diagnoses)

diagnosed

### Find patients records, who have diabetes

In [5]:
patients <- read.csv("mimic-iii-clinical-database-demo-1.4/PATIENTS.csv", stringsAsFactors = FALSE)

patients$diabetes <- ifelse(patients$subject_id %in% diagnosed, 1, 0)
head(patients)

Unnamed: 0_level_0,row_id,subject_id,gender,dob,dod,dod_hosp,dod_ssn,expire_flag,diabetes
Unnamed: 0_level_1,<int>,<int>,<chr>,<chr>,<chr>,<chr>,<chr>,<int>,<dbl>
1,9467,10006,F,2094-03-05 00:00:00,2165-08-12 00:00:00,2165-08-12 00:00:00,2165-08-12 00:00:00,1,1
2,9472,10011,F,2090-06-05 00:00:00,2126-08-28 00:00:00,2126-08-28 00:00:00,,1,0
3,9474,10013,F,2038-09-03 00:00:00,2125-10-07 00:00:00,2125-10-07 00:00:00,2125-10-07 00:00:00,1,0
4,9478,10017,F,2075-09-21 00:00:00,2152-09-12 00:00:00,,2152-09-12 00:00:00,1,1
5,9479,10019,M,2114-06-20 00:00:00,2163-05-15 00:00:00,2163-05-15 00:00:00,2163-05-15 00:00:00,1,0
6,9486,10026,F,1895-05-17 00:00:00,2195-11-24 00:00:00,,2195-11-24 00:00:00,1,0


In [6]:
# patients$death_in_hospital <- ifelse(patients$dod_hosp == "", 0, 1)
# head(patients)

In [7]:
# all patients died

any(patients$expire_flag == 0)

In [8]:
# remove redundant columns
patients <- patients[ , c(2, 3, 8, 9)]
#patients$gender <- ifelse(patients$gender == 'M', 1, 0)
head(patients)

Unnamed: 0_level_0,subject_id,gender,expire_flag,diabetes
Unnamed: 0_level_1,<int>,<chr>,<int>,<dbl>
1,10006,F,1,1
2,10011,F,1,0
3,10013,F,1,0
4,10017,F,1,1
5,10019,M,1,0
6,10026,F,1,0


### Admissions table

In [9]:
admissions <- read.csv("mimic-iii-clinical-database-demo-1.4/ADMISSIONS.csv", stringsAsFactors = FALSE)
head(admissions)

Unnamed: 0_level_0,row_id,subject_id,hadm_id,admittime,dischtime,deathtime,admission_type,admission_location,discharge_location,insurance,language,religion,marital_status,ethnicity,edregtime,edouttime,diagnosis,hospital_expire_flag,has_chartevents_data
Unnamed: 0_level_1,<int>,<int>,<int>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<int>,<int>
1,12258,10006,142345,2164-10-23 21:09:00,2164-11-01 17:15:00,,EMERGENCY,EMERGENCY ROOM ADMIT,HOME HEALTH CARE,Medicare,,CATHOLIC,SEPARATED,BLACK/AFRICAN AMERICAN,2164-10-23 16:43:00,2164-10-23 23:00:00,SEPSIS,0,1
2,12263,10011,105331,2126-08-14 22:32:00,2126-08-28 18:59:00,2126-08-28 18:59:00,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,DEAD/EXPIRED,Private,,CATHOLIC,SINGLE,UNKNOWN/NOT SPECIFIED,,,HEPATITIS B,1,1
3,12265,10013,165520,2125-10-04 23:36:00,2125-10-07 15:13:00,2125-10-07 15:13:00,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,DEAD/EXPIRED,Medicare,,CATHOLIC,,UNKNOWN/NOT SPECIFIED,,,SEPSIS,1,1
4,12269,10017,199207,2149-05-26 17:19:00,2149-06-03 18:42:00,,EMERGENCY,EMERGENCY ROOM ADMIT,SNF,Medicare,,CATHOLIC,DIVORCED,WHITE,2149-05-26 12:08:00,2149-05-26 19:45:00,HUMERAL FRACTURE,0,1
5,12270,10019,177759,2163-05-14 20:43:00,2163-05-15 12:00:00,2163-05-15 12:00:00,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,DEAD/EXPIRED,Medicare,,CATHOLIC,DIVORCED,WHITE,,,ALCOHOLIC HEPATITIS,1,1
6,12277,10026,103770,2195-05-17 07:39:00,2195-05-24 11:45:00,,EMERGENCY,EMERGENCY ROOM ADMIT,REHAB/DISTINCT PART HOSP,Medicare,,OTHER,,WHITE,2195-05-17 01:49:00,2195-05-17 08:29:00,STROKE/TIA,0,1


In [10]:
data <- admissions[, c('subject_id', 'religion', 'ethnicity', 'insurance')]
head(data)

Unnamed: 0_level_0,subject_id,religion,ethnicity,insurance
Unnamed: 0_level_1,<int>,<chr>,<chr>,<chr>
1,10006,CATHOLIC,BLACK/AFRICAN AMERICAN,Medicare
2,10011,CATHOLIC,UNKNOWN/NOT SPECIFIED,Private
3,10013,CATHOLIC,UNKNOWN/NOT SPECIFIED,Medicare
4,10017,CATHOLIC,WHITE,Medicare
5,10019,CATHOLIC,WHITE,Medicare
6,10026,OTHER,WHITE,Medicare


In [11]:
# join to patients table
patients <- merge(patients, data, by = c('subject_id'))
rm(data)

head(patients)

Unnamed: 0_level_0,subject_id,gender,expire_flag,diabetes,religion,ethnicity,insurance
Unnamed: 0_level_1,<int>,<chr>,<int>,<dbl>,<chr>,<chr>,<chr>
1,10006,F,1,1,CATHOLIC,BLACK/AFRICAN AMERICAN,Medicare
2,10011,F,1,0,CATHOLIC,UNKNOWN/NOT SPECIFIED,Private
3,10013,F,1,0,CATHOLIC,UNKNOWN/NOT SPECIFIED,Medicare
4,10017,F,1,1,CATHOLIC,WHITE,Medicare
5,10019,M,1,0,CATHOLIC,WHITE,Medicare
6,10026,F,1,0,OTHER,WHITE,Medicare


### Number of admission of each type

In [12]:
x <- admissions %>% 
    group_by(subject_id, admission_type) %>%
    summarise(n = n())

x <- x %>%
  pivot_wider(names_from = admission_type, values_from = n)

x[is.na(x)] <- 0

colnames(x) <- c('subject_id', 'n_EMERGENCY_stay', 'n_ELECTIVE_stay', 'n_URGENT_stay')

head(x)

`summarise()` has grouped output by 'subject_id'. You can override using the `.groups` argument.



subject_id,n_EMERGENCY_stay,n_ELECTIVE_stay,n_URGENT_stay
<int>,<int>,<int>,<int>
10006,1,0,0
10011,1,0,0
10013,1,0,0
10017,1,0,0
10019,1,0,0
10026,1,0,0


In [13]:
# join to patients table
patients <- merge(patients, x, by = c('subject_id'))
head(patients)

Unnamed: 0_level_0,subject_id,gender,expire_flag,diabetes,religion,ethnicity,insurance,n_EMERGENCY_stay,n_ELECTIVE_stay,n_URGENT_stay
Unnamed: 0_level_1,<int>,<chr>,<int>,<dbl>,<chr>,<chr>,<chr>,<int>,<int>,<int>
1,10006,F,1,1,CATHOLIC,BLACK/AFRICAN AMERICAN,Medicare,1,0,0
2,10011,F,1,0,CATHOLIC,UNKNOWN/NOT SPECIFIED,Private,1,0,0
3,10013,F,1,0,CATHOLIC,UNKNOWN/NOT SPECIFIED,Medicare,1,0,0
4,10017,F,1,1,CATHOLIC,WHITE,Medicare,1,0,0
5,10019,M,1,0,CATHOLIC,WHITE,Medicare,1,0,0
6,10026,F,1,0,OTHER,WHITE,Medicare,1,0,0


In [14]:
rm(admissions)

### Laboratory reuslts

According to https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5961793/ following medical examinations are connected with diabetes:
* HbA1c - (Hemoglobina glikowana); code: 4548-4
* Blood Glucose - (Glukoza); code: 2345-7
* Serum Creatinine - (Kreatynina); code: 2160-0

In [15]:
lab_codes <- c("4548-4", "2345-7", "2160-0")

labitems <- read.csv("mimic-iii-clinical-database-demo-1.4/D_LABITEMS.csv", stringsAsFactors = FALSE)
labitems <- labitems[labitems$loinc_code %in% lab_codes, ]
labitems

Unnamed: 0_level_0,row_id,itemid,label,fluid,category,loinc_code
Unnamed: 0_level_1,<int>,<int>,<chr>,<chr>,<chr>,<chr>
53,53,50852,% Hemoglobin A1c,Blood,Chemistry,4548-4
55,55,50854,Absolute A1c,Blood,Chemistry,4548-4
113,113,50912,Creatinine,Blood,Chemistry,2160-0
132,132,50931,Glucose,Blood,Chemistry,2345-7


Codes are correct

In [16]:
lab_codes <- labitems$itemid

In [17]:
rm(labitems)

In [18]:
lab_codes

In [19]:
labevents <- read.csv("mimic-iii-clinical-database-demo-1.4/LABEVENTS.csv", stringsAsFactors = FALSE)
colnames(labevents)

In [20]:
labevents <- labevents[labevents$itemid %in% lab_codes, c('subject_id', 'itemid', 'valuenum', 'valueuom', 'flag')]

labevents$name <- ifelse(labevents$itemid == 50931, "Glucose",
                        ifelse(labevents$itemid == 50912, "Creatinine", 
                              ifelse(labevents$itemid == 50854, "Absolute A1c", "% Hemoglobin A1c")))

head(labevents, 20)

Unnamed: 0_level_0,subject_id,itemid,valuenum,valueuom,flag,name
Unnamed: 0_level_1,<int>,<int>,<dbl>,<chr>,<chr>,<chr>
5,10006,50912,7.0,mg/dL,abnormal,Creatinine
6,10006,50931,126.0,mg/dL,abnormal,Glucose
43,10006,50912,7.4,mg/dL,abnormal,Creatinine
44,10006,50931,106.0,mg/dL,abnormal,Glucose
78,10006,50912,8.3,mg/dL,abnormal,Creatinine
80,10006,50931,79.0,mg/dL,,Glucose
132,10006,50912,6.2,mg/dL,abnormal,Creatinine
133,10006,50931,100.0,mg/dL,,Glucose
149,10006,50912,5.9,mg/dL,abnormal,Creatinine
150,10006,50931,90.0,mg/dL,,Glucose


In [21]:
# units
unique(labevents[c(4, 6)])

Unnamed: 0_level_0,valueuom,name
Unnamed: 0_level_1,<chr>,<chr>
5,mg/dL,Creatinine
6,mg/dL,Glucose
339,%,% Hemoglobin A1c
2813,,Absolute A1c


#### Mean values

In [22]:
labevents_tmp <- 
    labevents %>%
        group_by(subject_id, name) %>%
        summarise(mean = round(mean(valuenum), 3))

head(labevents_tmp)

`summarise()` has grouped output by 'subject_id'. You can override using the `.groups` argument.



subject_id,name,mean
<int>,<chr>,<dbl>
10006,% Hemoglobin A1c,5.5
10006,Creatinine,5.418
10006,Glucose,126.82
10011,Creatinine,0.563
10011,Glucose,115.526
10013,Creatinine,1.7


In [23]:
labevents_tmp <-
    labevents_tmp %>%
        pivot_wider(names_from = name, values_from = mean)

head(labevents_tmp)

subject_id,% Hemoglobin A1c,Creatinine,Glucose,Absolute A1c
<int>,<dbl>,<dbl>,<dbl>,<dbl>
10006,5.5,5.418,126.82,
10011,,0.563,115.526,
10013,,1.7,149.5,
10017,8.4,0.44,204.8,
10019,,5.8,194.8,
10026,,0.545,107.545,


In [24]:
# join to patients table
patients <- merge(patients, labevents_tmp, by = c('subject_id'))
head(patients)

Unnamed: 0_level_0,subject_id,gender,expire_flag,diabetes,religion,ethnicity,insurance,n_EMERGENCY_stay,n_ELECTIVE_stay,n_URGENT_stay,% Hemoglobin A1c,Creatinine,Glucose,Absolute A1c
Unnamed: 0_level_1,<int>,<chr>,<int>,<dbl>,<chr>,<chr>,<chr>,<int>,<int>,<int>,<dbl>,<dbl>,<dbl>,<dbl>
1,10006,F,1,1,CATHOLIC,BLACK/AFRICAN AMERICAN,Medicare,1,0,0,5.5,5.418,126.82,
2,10011,F,1,0,CATHOLIC,UNKNOWN/NOT SPECIFIED,Private,1,0,0,,0.563,115.526,
3,10013,F,1,0,CATHOLIC,UNKNOWN/NOT SPECIFIED,Medicare,1,0,0,,1.7,149.5,
4,10017,F,1,1,CATHOLIC,WHITE,Medicare,1,0,0,8.4,0.44,204.8,
5,10019,M,1,0,CATHOLIC,WHITE,Medicare,1,0,0,,5.8,194.8,
6,10026,F,1,0,OTHER,WHITE,Medicare,1,0,0,,0.545,107.545,


#### Count abnormal laboratory results

In [25]:
head(labevents)

Unnamed: 0_level_0,subject_id,itemid,valuenum,valueuom,flag,name
Unnamed: 0_level_1,<int>,<int>,<dbl>,<chr>,<chr>,<chr>
5,10006,50912,7.0,mg/dL,abnormal,Creatinine
6,10006,50931,126.0,mg/dL,abnormal,Glucose
43,10006,50912,7.4,mg/dL,abnormal,Creatinine
44,10006,50931,106.0,mg/dL,abnormal,Glucose
78,10006,50912,8.3,mg/dL,abnormal,Creatinine
80,10006,50931,79.0,mg/dL,,Glucose


In [26]:
labevents_tmp <- labevents
labevents_tmp$abnormal <- ifelse(labevents_tmp$flag == "", 0, 1)

labevents_tmp <- 
    labevents_tmp %>%
        group_by(subject_id, name) %>%
        summarise(n_abnormal_flags = sum(abnormal))

head(labevents_tmp)

`summarise()` has grouped output by 'subject_id'. You can override using the `.groups` argument.



subject_id,name,n_abnormal_flags
<int>,<chr>,<dbl>
10006,% Hemoglobin A1c,0
10006,Creatinine,62
10006,Glucose,41
10011,Creatinine,0
10011,Glucose,11
10013,Creatinine,2


In [27]:
labevents_tmp <-
    labevents_tmp %>%
        pivot_wider(names_from = name, values_from = n_abnormal_flags)

head(labevents_tmp)

subject_id,% Hemoglobin A1c,Creatinine,Glucose,Absolute A1c
<int>,<dbl>,<dbl>,<dbl>,<dbl>
10006,0.0,62,41,
10011,,0,11,
10013,,2,2,
10017,2.0,2,13,0.0
10019,,4,3,
10026,,0,5,


In [28]:
colnames(labevents_tmp) <- c('subject_id', 'n_abnormal_flags_% Hemoglobin A1c', 'n_abnormal_flags_Creatinine', 'n_abnormal_flags_Glucose', 'n_abnormal_flags_Absolute A1c')
head(labevents_tmp)

subject_id,n_abnormal_flags_% Hemoglobin A1c,n_abnormal_flags_Creatinine,n_abnormal_flags_Glucose,n_abnormal_flags_Absolute A1c
<int>,<dbl>,<dbl>,<dbl>,<dbl>
10006,0.0,62,41,
10011,,0,11,
10013,,2,2,
10017,2.0,2,13,0.0
10019,,4,3,
10026,,0,5,


In [29]:
# join to patients table
patients <- merge(patients, labevents_tmp, by = c('subject_id'))
head(patients)

Unnamed: 0_level_0,subject_id,gender,expire_flag,diabetes,religion,ethnicity,insurance,n_EMERGENCY_stay,n_ELECTIVE_stay,n_URGENT_stay,% Hemoglobin A1c,Creatinine,Glucose,Absolute A1c,n_abnormal_flags_% Hemoglobin A1c,n_abnormal_flags_Creatinine,n_abnormal_flags_Glucose,n_abnormal_flags_Absolute A1c
Unnamed: 0_level_1,<int>,<chr>,<int>,<dbl>,<chr>,<chr>,<chr>,<int>,<int>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,10006,F,1,1,CATHOLIC,BLACK/AFRICAN AMERICAN,Medicare,1,0,0,5.5,5.418,126.82,,0.0,62,41,
2,10011,F,1,0,CATHOLIC,UNKNOWN/NOT SPECIFIED,Private,1,0,0,,0.563,115.526,,,0,11,
3,10013,F,1,0,CATHOLIC,UNKNOWN/NOT SPECIFIED,Medicare,1,0,0,,1.7,149.5,,,2,2,
4,10017,F,1,1,CATHOLIC,WHITE,Medicare,1,0,0,8.4,0.44,204.8,,2.0,2,13,0.0
5,10019,M,1,0,CATHOLIC,WHITE,Medicare,1,0,0,,5.8,194.8,,,4,3,
6,10026,F,1,0,OTHER,WHITE,Medicare,1,0,0,,0.545,107.545,,,0,5,


#### Count number of medical examinations

In [30]:
labevents_tmp <- 
    labevents %>%
        group_by(subject_id, name) %>%
        summarise(count = n())

head(labevents_tmp)

`summarise()` has grouped output by 'subject_id'. You can override using the `.groups` argument.



subject_id,name,count
<int>,<chr>,<int>
10006,% Hemoglobin A1c,2
10006,Creatinine,62
10006,Glucose,61
10011,Creatinine,19
10011,Glucose,19
10013,Creatinine,2


In [31]:
labevents_tmp <-
    labevents_tmp %>%
        pivot_wider(names_from = name, values_from = count)

head(labevents_tmp)

subject_id,% Hemoglobin A1c,Creatinine,Glucose,Absolute A1c
<int>,<int>,<int>,<int>,<int>
10006,2.0,62,61,
10011,,19,19,
10013,,2,2,
10017,2.0,15,15,2.0
10019,,4,5,
10026,,11,11,


In [32]:
colnames(labevents_tmp) <- c('subject_id', 'number_of_% Hemoglobin A1c', 'number_of_Creatinine', 'number_of_Glucose', 'number_of_Absolute A1c')

In [33]:
# join to patients table
patients <- merge(patients, labevents_tmp, by = c('subject_id'))

options(repr.matrix.max.cols=30)

head(patients)

Unnamed: 0_level_0,subject_id,gender,expire_flag,diabetes,religion,ethnicity,insurance,n_EMERGENCY_stay,n_ELECTIVE_stay,n_URGENT_stay,% Hemoglobin A1c,Creatinine,Glucose,Absolute A1c,n_abnormal_flags_% Hemoglobin A1c,n_abnormal_flags_Creatinine,n_abnormal_flags_Glucose,n_abnormal_flags_Absolute A1c,number_of_% Hemoglobin A1c,number_of_Creatinine,number_of_Glucose,number_of_Absolute A1c
Unnamed: 0_level_1,<int>,<chr>,<int>,<dbl>,<chr>,<chr>,<chr>,<int>,<int>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<int>,<int>,<int>
1,10006,F,1,1,CATHOLIC,BLACK/AFRICAN AMERICAN,Medicare,1,0,0,5.5,5.418,126.82,,0.0,62,41,,2.0,62,61,
2,10011,F,1,0,CATHOLIC,UNKNOWN/NOT SPECIFIED,Private,1,0,0,,0.563,115.526,,,0,11,,,19,19,
3,10013,F,1,0,CATHOLIC,UNKNOWN/NOT SPECIFIED,Medicare,1,0,0,,1.7,149.5,,,2,2,,,2,2,
4,10017,F,1,1,CATHOLIC,WHITE,Medicare,1,0,0,8.4,0.44,204.8,,2.0,2,13,0.0,2.0,15,15,2.0
5,10019,M,1,0,CATHOLIC,WHITE,Medicare,1,0,0,,5.8,194.8,,,4,3,,,4,5,
6,10026,F,1,0,OTHER,WHITE,Medicare,1,0,0,,0.545,107.545,,,0,5,,,11,11,


### Finish and save table

In [34]:
rm(labevents)
rm(labevents_tmp)

In [35]:
colnames(patients) <- c("id", "gender", "dead", "diabetes", "religion", "ethnicity", "insurance", "number_of_emergency_stays", "number_of_elective_stays", "number_of_urgent_stays",
                       "A1c_hemoglobin_%_mean", "creatinine_mg/dL_mean", "glucose_mg/dL_mean", "A1c_absolute_mean", "number_of_abnormal_results_A1c_hemoglobin_%", 
                       "number_of_abnormal_results_creatinine", "number_of_abnormal_results_glucose", "number_of_abnormal_results_A1c_absolute",
                       "number_of_A1c_hemoglobin_%_tests", "number_of_creatinine_tests", "number_of_glucose_tests", "number_of_A1c_absolute_tests")

In [36]:
# column diabetes as last
patients <- patients[c("id", "gender", "dead", "religion", "ethnicity", "insurance", "number_of_emergency_stays", "number_of_elective_stays", "number_of_urgent_stays",
                       "A1c_hemoglobin_%_mean", "creatinine_mg/dL_mean", "glucose_mg/dL_mean", "A1c_absolute_mean", "number_of_abnormal_results_A1c_hemoglobin_%", 
                       "number_of_abnormal_results_creatinine", "number_of_abnormal_results_glucose", "number_of_abnormal_results_A1c_absolute",
                       "number_of_A1c_hemoglobin_%_tests", "number_of_creatinine_tests", "number_of_glucose_tests", "number_of_A1c_absolute_tests", "diabetes")]

In [37]:
head(patients)

Unnamed: 0_level_0,id,gender,dead,religion,ethnicity,insurance,number_of_emergency_stays,number_of_elective_stays,number_of_urgent_stays,A1c_hemoglobin_%_mean,creatinine_mg/dL_mean,glucose_mg/dL_mean,A1c_absolute_mean,number_of_abnormal_results_A1c_hemoglobin_%,number_of_abnormal_results_creatinine,number_of_abnormal_results_glucose,number_of_abnormal_results_A1c_absolute,number_of_A1c_hemoglobin_%_tests,number_of_creatinine_tests,number_of_glucose_tests,number_of_A1c_absolute_tests,diabetes
Unnamed: 0_level_1,<int>,<chr>,<int>,<chr>,<chr>,<chr>,<int>,<int>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<int>,<int>,<int>,<dbl>
1,10006,F,1,CATHOLIC,BLACK/AFRICAN AMERICAN,Medicare,1,0,0,5.5,5.418,126.82,,0.0,62,41,,2.0,62,61,,1
2,10011,F,1,CATHOLIC,UNKNOWN/NOT SPECIFIED,Private,1,0,0,,0.563,115.526,,,0,11,,,19,19,,0
3,10013,F,1,CATHOLIC,UNKNOWN/NOT SPECIFIED,Medicare,1,0,0,,1.7,149.5,,,2,2,,,2,2,,0
4,10017,F,1,CATHOLIC,WHITE,Medicare,1,0,0,8.4,0.44,204.8,,2.0,2,13,0.0,2.0,15,15,2.0,1
5,10019,M,1,CATHOLIC,WHITE,Medicare,1,0,0,,5.8,194.8,,,4,3,,,4,5,,0
6,10026,F,1,OTHER,WHITE,Medicare,1,0,0,,0.545,107.545,,,0,5,,,11,11,,0


In [38]:
write.csv(patients, "patients_diabetes_data.csv", row.names = FALSE)