# Project: ICD-AIS conversion using Deep Learning utilizing ICD10

This script translated the ICD codes using ICDPIC-R

Note: This notebook uses an R 3.6 kernal.  Not Python.

## Setup

In [2]:
# load tidyverse and ICDPIC-R
library(tidyverse)
library(icdpicr)
library(stringr)

# clear memory
rm(list=ls())

# display full numbers
options(scipen=999)

In [3]:
install.packages("icdpicr", dependencies = TRUE)

Installing package into ‘/sfs/qumulo/qhome/trh6u/R/goolf/3.6’
(as ‘lib’ is unspecified)



In [4]:
install.packages("tidyverse", dependencies = TRUE)

Installing package into ‘/sfs/qumulo/qhome/trh6u/R/goolf/3.6’
(as ‘lib’ is unspecified)



#### Files

In [5]:
# test inputs
test_icd_file <- "../Data/NTDB_combine/ntdb_test_icd.csv"

# output file for translation
results_ais_file <- "../Results/Translations/test_ais_icdpic.csv"

## Load ICD codes

Only use the first 10,000 patients of the testing data.

In [6]:
# load data
icd <- read_csv(test_icd_file, col_names = TRUE, col_types=cols(inc_key=col_double(), ICDDIAGNOSISCODE=col_character()))

In [7]:
head(icd)

inc_key,ICDDIAGNOSISCODE
<dbl>,<chr>
190026915434,S00.83XA
190026915434,S42.413A
190026915434,S71.039A
190026915434,S71.039A
190026915434,S71.139A
190026915434,S81.839A


## Prepare data

### Add diagnosis codes

In [8]:
# sort data by key
icd <- icd %>% arrange(inc_key)

In [9]:
# add diagnosis indications
icd$diag <- "Pending"

In [10]:
icd$inc_key[1]

In [11]:
# keep track of current ID and number of diagnoses
id <- icd$inc_key[1] 
diag_num <- 0

# loop through each row
for (row in 1:nrow(icd)) {
    
    # check if this is the same patient
    if(icd[row,]$inc_key==id){
        
        # increment diagnosis code
        diag_num <- diag_num + 1
        
    } else {
        
        # reset id and diagnosis number
        id = icd[row,]$inc_key
        diag_num <- 1
    }
    
    # make diagnosis number string
    diag_char <- paste("dx", as.character(diag_num), sep="")
    
    # replace diagnosis nubmer
    icd[row,'diag'] <- diag_char    
}

In [12]:
head(icd)

inc_key,ICDDIAGNOSISCODE,diag
<dbl>,<chr>,<chr>
190026915434,S00.83XA,dx1
190026915434,S42.413A,dx2
190026915434,S71.039A,dx3
190026915434,S71.039A,dx4
190026915434,S71.139A,dx5
190026915434,S81.839A,dx6


## Convert from long to wide data

In [13]:
icd <- icd %>% 
    spread(diag, ICDDIAGNOSISCODE)

In [14]:
# create list of columns names
dx_cols = c("inc_key")

for(i in 1:(ncol(icd)-1)){
    dx_cols <- append(dx_cols, paste("dx", as.character(i), sep=""))
}

In [15]:
# put diagnosis codes in ascending order
icd <- icd[,dx_cols]

In [16]:
head(icd)

inc_key,dx1,dx2,dx3,dx4,dx5,dx6,dx7,dx8,dx9,⋯,dx41,dx42,dx43,dx44,dx45,dx46,dx47,dx48,dx49,dx50
<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,⋯,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
190026915434,S00.83XA,S42.413A,S71.039A,S71.039A,S71.139A,S81.839A,S89.80XA,,,⋯,,,,,,,,,,
190026952586,S00.03XA,S05.72XA,S20.20XA,,,,,,,⋯,,,,,,,,,,
190026952733,S14.103A,,,,,,,,,⋯,,,,,,,,,,
190026952851,S00.81XA,S80.01XA,S81.011A,S82.842A,,,,,,⋯,,,,,,,,,,
190026953094,S00.03XA,S00.81XA,S00.81XA,S00.83XA,S08.0XXA,S82.844A,S90.01XA,,,⋯,,,,,,,,,,
190026953139,S00.81XA,S06.319A,S06.329A,S06.5X9A,S06.5X9A,S09.93XA,,,,⋯,,,,,,,,,,


## Get ISS with ICDPIC-R

In [17]:
#icdpic <- cat_trauma(as.data.frame(icd[0:10]), "dx", icd10 = TRUE, i10_iss_method="roc_max_TQIP")
icdpic <- cat_trauma(icd, "dx", icd10 = TRUE, i10_iss_method="roc_max_TQIP")

In [18]:
icdpic_res <- icdpic %>%
    select("inc_key", "maxais","riss")

## View results

In [19]:
head(cbind(icdpic_res, icd[,c("dx1","dx2","dx3","dx4","dx5","dx6","dx7","dx8","dx9")]),10)

Unnamed: 0_level_0,inc_key,maxais,riss,dx1,dx2,dx3,dx4,dx5,dx6,dx7,dx8,dx9
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,190026915434,4,17,S00.83XA,S42.413A,S71.039A,S71.039A,S71.139A,S81.839A,S89.80XA,,
2,190026952586,1,3,S00.03XA,S05.72XA,S20.20XA,,,,,,
3,190026952733,3,9,S14.103A,,,,,,,,
4,190026952851,1,2,S00.81XA,S80.01XA,S81.011A,S82.842A,,,,,
5,190026953094,2,5,S00.03XA,S00.81XA,S00.81XA,S00.83XA,S08.0XXA,S82.844A,S90.01XA,,
6,190026953139,4,16,S00.81XA,S06.319A,S06.329A,S06.5X9A,S06.5X9A,S09.93XA,,,
7,190026963751,1,1,S81.812A,,,,,,,,
8,190026963856,1,1,S82.192A,,,,,,,,
9,190026964242,2,4,S22.20XA,,,,,,,,
10,190026965799,1,3,S00.03XA,S01.01XA,S06.0X1A,S22.42XA,S30.1XXA,,,,


In [20]:
head(icdpic[,c("inc_key", "maxais","riss","sev_1","issbr_1","sev_2","issbr_2","sev_3","issbr_3",
          "sev_4","issbr_4","sev_5","issbr_5","sev_6","issbr_6","sev_7","issbr_7")],10)

Unnamed: 0_level_0,inc_key,maxais,riss,sev_1,issbr_1,sev_2,issbr_2,sev_3,issbr_3,sev_4,issbr_4,sev_5,issbr_5,sev_6,issbr_6,sev_7,issbr_7
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<int>,<chr>,<int>,<chr>,<int>,<chr>,<int>,<chr>,<int>,<chr>,<int>,<chr>,<int>,<chr>
1,190026915434,4,17,1,Head/Neck,1.0,Extremities,4.0,Extremities,4.0,Extremities,3.0,Extremities,1.0,Extremities,1.0,Extremities
2,190026952586,1,3,1,Head/Neck,1.0,Face,1.0,Chest,,,,,,,,
3,190026952733,3,9,3,Head/Neck,,,,,,,,,,,,
4,190026952851,1,2,1,Head/Neck,1.0,Extremities,1.0,Extremities,1.0,Extremities,,,,,,
5,190026953094,2,5,1,Head/Neck,1.0,Head/Neck,1.0,Head/Neck,1.0,Head/Neck,2.0,Head/Neck,1.0,Extremities,1.0,Extremities
6,190026953139,4,16,1,Head/Neck,1.0,Head/Neck,1.0,Head/Neck,4.0,Head/Neck,4.0,Head/Neck,1.0,Head/Neck,,
7,190026963751,1,1,1,Extremities,,,,,,,,,,,,
8,190026963856,1,1,1,Extremities,,,,,,,,,,,,
9,190026964242,2,4,2,Chest,,,,,,,,,,,,
10,190026965799,1,3,1,Head/Neck,1.0,Head/Neck,1.0,Head/Neck,1.0,Chest,1.0,Abdomen,,,,


In [21]:
unique(icdpic$issbr_1)

In [22]:
icdpic %>%
    filter(inc_key==190043769658) %>%
    select(contains('sev'))

“number of rows of result is not a multiple of vector length (arg 2)”
“number of rows of result is not a multiple of vector length (arg 2)”
“number of rows of result is not a multiple of vector length (arg 2)”
“number of rows of result is not a multiple of vector length (arg 2)”


sev_1,sev_2,sev_3,sev_4,sev_5,sev_6,sev_7,sev_8,sev_9,sev_10,⋯,sev_41,sev_42,sev_43,sev_44,sev_45,sev_46,sev_47,sev_48,sev_49,sev_50
<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,⋯,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>


In [23]:
colnames(icdpic)

## Convert back to long format

In [24]:
icdpic_l <-icdpic %>% 
            select(inc_key, dx1:issbr_50) %>%
            gather(v, value, dx1:issbr_50) %>% 
            separate(v, c("var", "col")) %>% 
            drop_na()  %>% 
            spread(var, value)

“Expected 2 pieces. Missing pieces filled with `NA` in 500000 rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].”


In [25]:
icdpic_l %>% head()

Unnamed: 0_level_0,inc_key,col,issbr,sev
Unnamed: 0_level_1,<dbl>,<chr>,<chr>,<chr>
1,190026915434,1,Head/Neck,1
2,190026915434,2,Extremities,1
3,190026915434,3,Extremities,4
4,190026915434,4,Extremities,4
5,190026915434,5,Extremities,3
6,190026915434,6,Extremities,1


## Convert regions to numbers

In [26]:
# function to map body region descriptions to numbers
regions <- function(reg){  
    if (reg == 'Head/Neck') {
        return(1)
    } else if (reg=='Chest'){
        return(2)
    } else if (reg=='Abdomen'){
        return(3)
    } else if (reg=='Extremities'){
        return(4)
    } else if (reg=='Face'){
        return(5)
    } else if (reg=='General'){
        return(6)
    }  else {
        return(0)
    }         
}

In [27]:
# convert body region description to numbers
icdpic_l$region <- sapply(icdpic_l$issbr, FUN=regions)

## Format output

In [28]:
# change severity to float
icdpic_l$severity <- as.double(icdpic_l$sev)

In [29]:
# make dummy predot code
icdpic_l$ais_predot <- as.integer(icdpic_l$region * 10000)

In [30]:
# make dummy ais code
icdpic_l$aiscode <- icdpic_l$ais_predot + (icdpic_l$severity/10)

In [31]:
# indicate the chapter is not known
icdpic_l$chapter <- 0

In [32]:
# arrange columns
icdpic_l <- icdpic_l %>%
                select(inc_key, aiscode, ais_predot, severity, chapter, region)

In [33]:
icdpic_l %>% head()

Unnamed: 0_level_0,inc_key,aiscode,ais_predot,severity,chapter,region
Unnamed: 0_level_1,<dbl>,<dbl>,<int>,<dbl>,<dbl>,<dbl>
1,190026915434,10000.1,10000,1,0,1
2,190026915434,40000.1,40000,1,0,4
3,190026915434,40000.4,40000,4,0,4
4,190026915434,40000.4,40000,4,0,4
5,190026915434,40000.3,40000,3,0,4
6,190026915434,40000.1,40000,1,0,4


## Output ISS values

In [34]:
write_csv(icdpic_l, results_ais_file, col_names=TRUE)

In [35]:
.libPaths()