---
title: "Exploratory Analysis"
output: html_notebook
---

In [None]:
### Execution environment setup ###
if (!exists('.SETUP', 1)) {

# find library's root directory
root_dir <- rprojroot::is_git_root$find_file()
if (getwd() != root_dir) setwd(root_dir)

# wait library setup to finish
source('lib/R/utils.R')
finished_setup <- 'tail -1 ~/.nb.setup.log | grep -q "Done initial azure notebooks environment setup"'
if (file.exists('~/.nb.setup.log')) retry(stopifnot(!system(finished_setup)), times=100, delay=5)

# load packages used here
.libPaths(c('env/lib/R', .libPaths()))
suppressPackageStartupMessages({
    library(magrittr)
    library(simpleCache)
    setCacheDir('cache')
    
    library(GEOquery)
})

# load global parameters
config <- yaml::read_yaml('config.yml')

# prevent re-execution
.SETUP <- TRUE
}

## Choose dataset

In [None]:
config$geo_datasets %>% paste(seq_along(.), ., sep=': ') %>% cat(sep=', ')

In [None]:
gse_id <- config$geo_datasets[2]
cat('Selected dataset:', gse_id)

## Load data

In [None]:
gse <- suppressMessages(simpleCache(sprintf('processed.%s', gse_id), {
    gse_id %>%
    sprintf(fmt='data/processed/%s_series_matrix.txt.gz') %>%
    GEOquery:::parseGSEMatrix(destdir='data/processed') %$%
    eset
}))
print(gse)

In [None]:
annot <- as.data.table(parse_annotation(gse_id, pData(gse)))
annot %>%
    filter(her2 == 'HER2+') %>%
    select(treatment, outcome) %>%
    table()