In [None]:
library(RPostgreSQL)
library(memoise)
library(xgboost)
library(caret)
library(GA)
library(tidyverse)

In [None]:
drv <- dbDriver("PostgreSQL")
con <- dbConnect(drv, dbname = "mimic")
dbSendQuery(con, "set search_path=echo,public,mimiciii;")

In [None]:
full_data <- dbGetQuery(con, "select * from merged_data")

In [None]:
dbDisconnect(con)
dbUnloadDriver(drv)

In [None]:
head(full_data)
names(full_data)
dim(full_data)

In [None]:
feature_names <- full_data %>%
    names %>%
    keep(grepl("vs|lab|icd|age|gender|weight|saps|sofa|elix_score|vent|vaso|los", .)) %>%
    discard(grepl("vs|lab", .) & grepl("flag", .) & !grepl("bnp|troponin|kinase", .)) %>%
    discard(grepl("bnp|troponin|kinase", .) & !grepl("flag", .)) %>%
    discard(grepl("min|max", .))
feature_names
length(feature_names)

In [None]:
features <- full_data %>%
    select(!!!rlang::syms(feature_names)) %>%
    mutate(gender = as.integer(as.factor(gender)))
head(features)

In [None]:
label <- full_data %>% pull(echo)
head(label)

In [None]:
cost <- function(string, x, y) {
    features_mtx <- x[, which(string == 1)] %>% data.matrix
    label <- y
    1:nrow(features_mtx) %>%
    createFolds(5) %>%
    map_dbl(function(index) {
        model <- xgboost(features_mtx[index, ], label[index],
                         params = list(objective = "binary:logistic"),
                         nrounds = 100, verbose = 0)
        pred <- predict(model, features_mtx[-index, ])
        ROCR::performance(ROCR::prediction(pred, label[-index]), "auc")@y.values %>% first
    }) %>% mean
}

In [None]:
cost(base::sample(0:1, ncol(features), replace = TRUE), features, label)

In [None]:
mcost <- memoise(cost)
is.memoised(mcost)

In [None]:
initialPop <- function(object, ...) {
    population <- sample(0:1, 
                         replace = TRUE, 
                         size = object@nBits * object@popSize, 
                         prob = c(0.9, 0.1))
    population <- matrix(population, 
                         nrow = object@popSize, 
                         ncol = object@nBits)
    return(population)
}

In [None]:
ga_results <- ga(type = "binary",
                 fitness = mcost,
                 x = features,
                 y = label,
                 min = 0, max = 1,
                 maxiter = 1,
                 popSize = 10,
                 population = initialPop,
                 nBits = ncol(features),
                 names = feature_names,
                 keepBest = TRUE,
                 parallel = 4)

In [None]:
ga_results