---
title: "Datasets Information"
output: html_notebook
---

In [None]:
### Execution environment setup ###
if (!exists('.SETUP', 1)) {

# find library's root directory
root_dir <- rprojroot::is_git_root$find_file()
if (getwd() != root_dir) setwd(root_dir)

# wait library setup to finish
source('lib/R/utils.R')
finished_setup <- 'tail -1 ~/.nb.setup.log | grep -q "Done initial azure notebooks environment setup"'
if (file.exists('~/.nb.setup.log')) retry(stopifnot(!system(finished_setup)), times=100, delay=5)

# load packages used here
.libPaths(c('env/lib/R', .libPaths()))
suppressPackageStartupMessages({
    library(magrittr)
    library(simpleCache)
    setCacheDir('cache')
})

# load global parameters
config <- yaml::read_yaml('config.yml')

# prevent re-execution
.SETUP <- TRUE
}

## Experimental design

In [None]:
read.delim('annot/datasets.tsv', na.strings='')

## Choose dataset

In [None]:
config$geo_datasets %>% paste(seq_along(.), ., sep=': ') %>% cat(sep=', ')

In [None]:
gse_id <- config$geo_datasets[8]
cat('Selected dataset:', gse_id)

## Description
Information retrieved from NCBI database, GEO website and `GEOmetadb`.

In [None]:
gse_id %>%
    sprintf(fmt='output/info/%s.txt') %>%
    dget() %>%
    extract(c('title', 'summary', 'Overall design', 'geo_url', 'article_url', 'article_title', 'article_abstract')) %>%
    lapply(paste, collapse=' ') %>%
    paste(toupper(names(.)), ., sep=': ') %>%
    stringr::str_wrap(width=90) %>%
    cat(sep='\n\n')

## Article highlights
Relevant excerpts from associated publication.

In [None]:
gse_id %>%
    sprintf(fmt='annot/article/%s.txt') %>%
    scan(character(), sep='\n', quote='"', comment.char='(', quiet=T) %>%
    stringr::str_wrap(width=90) %>%
    cat(sep='\n\n')