# ICSS data processing for cohort01
insert cohort information here. E.g., PM ICSS group
 
**Group information:**
 
|Sex    | ICSS+ShA | ICSS+LgA   |
|:-----:|:--------:|:----------:|
|Males  |  SG17    | SG15       |
|-------|----------|------------|
|Females|  SG14    | SG24, SG20 |
|-------|----------|------------|

surgery break dates : 
IVSA/ShA start date : 
IVSA/LgA start date :
IVSA/LgA end date   :

**OUTLINE:**
 
1. Setting variables
2. Preprocessing data
    1. Running `preprocessICSSFiles -f $filename` script 
    2. Collating all the \.csv files that the script generates into one data table
    3. Assign datatype to columns
    4. Get an overview of the Dates and the Subjects
3. Plotting data
---

In [None]:
%load_ext rpy2.ipython

In [None]:
%%capture
%%R
# loading required libraries for R
library(tidyverse)
library(lubridate)

---
## 1. Setting variables


In [None]:
%%R
# directory where all the animals in the cohort are stored
dataDir <- "~/Dropbox (Partners HealthCare)/Projects/R01_2017_OxycSA-NASh-Glutamate/_data_R01_2017/_data_R01_2017_ICSS/_ana_files/Cohort01"

# landmark dates for the experiment/cohort
preSurgeryEndDate <- "" 
postSurgeryStartDate <- ""
shaStartDate <- ""
lgaStartDate <- ""
absStartDate <- ""

# # output files for R
# outputFilename_longform_ <- "_icssData_pm_cohort01__longform.csv"
# outputFilename_longform_ <- "_icssData_pm_cohort01__longform.csv"
# outputFilename_longform_ <- "_icssData_pm_cohort01__longform.csv"
# 
# # output files for Prism
# outputFilename_wideform_ <- "_icssData_pm_cohort01__forPrism.csv"
# outputFilename_wideform_ <- "_icssData_pm_cohort01__forPrism.csv"
# outputFilename_wideform_ <- "_icssData_pm_cohort01__forPrism.csv"

---
## 2. Preprocessing data

In [None]:
%%capture
%%bash
## # 2.1. Preprocessing ANA files that are generated by Steve Cabilio's program
## # this line makes the script machine agnostic by assigning the username automatically
## userName=$(echo $USER)
## 
## # We move to the folder where the cohort data is stored
## # cd </path/to/cohort/data>
## cd /Users/$userName/Dropbox\ \(Partners\ HealthCare\)/Projects/R01_2017_OxycSA-NASh-Glutamate/_data_R01_2017/_data_R01_2017_ICSS/_ana_files/Cohort01
## 
## # entering each subject/animal directory to list all the CSV files and to run the program on each
## for dirName in $(ls -d */)
## do
##     cd $dirName
##     for fileName in $(ls ANA*.CSV)
##     do
##         preprocessICSSFiles --file $fileName;
##     done
##     cd ..
## done

In [None]:
%%capture
%%R
# 2.2. Collating all the files into one tibble
fileList <- list.files(path = dataDir, pattern = "preprocessed.csv", recursive = T)
# generating combined data table
data <- fileList %>% map(~ read_csv(file.path(dataDir, .))) %>% reduce(rbind)

In [None]:
%%R
data %>% print

In [None]:
%%R
# 2.3. Assign data type to columns
# This gets broken where there are missing values
# Ideal data types
# Date   | Subject  | Experiment |   Pass   |  T0   |  M50  | MaxRate
# <date> | <factor> |  <factor>  | <factor> | <num> | <num> |  <num>
 
data$Subject <- data$Subject %>% as.factor
data$Pass <- data$Pass %>% as.factor
data$T0 <- data$T0 %>% as.numeric
data$M50 <- data$M50 %>% as.numeric
data %>% print

In [None]:
%%R
# 2.4. Overview
# Dates over which the experiment has run
data$Date %>% str()
data$Date %>% unique()

In [None]:
%%R
# 2.4. Overview
# Subjects in the experiment over which the experiment has run
data$Subject %>% str()
data$Subject %>% unique()

---
## 3. Plotting data
### 3.1. Pass 1-4

In [None]:
%%R -w 640 -h 640
# Plotting raw values for T0, Pass 1 - 4
lower_limit = 0
upper_limit = 250
data %>% ggplot(aes(x = Date, y = T0)) + 
geom_jitter(aes(color = Pass), size = 2) + scale_color_manual(values = c("#1a1a1a", "#d73027", "#1a9850", "#313695")) +
#ylim(lower_limit, upper_limit) + 
ylim(c(0, NA)) +
geom_vline(xintercept = unclass(as.Date(preSurgeryEndDate)), linetype=4) + 
geom_vline(xintercept = unclass(as.Date(postSurgeryStartDate)), linetype=4) +
geom_vline(xintercept = unclass(as.Date(shaStartDate)), linetype=4) + 
geom_vline(xintercept = unclass(as.Date(lgaStartDate)), linetype=4) + 
geom_vline(xintercept = unclass(as.Date(absStartDate)), linetype=4) + 
facet_grid(rows = vars(Subject)) + ggtitle("Raw Theta0 values of Pass 1 - 4") + theme_minimal()

In [None]:
%%R -w 640 -h 640
# Plotting raw values for M50, Pass 1 - 4
lower_limit = 0
upper_limit = 250
data %>% ggplot(aes(x = Date, y = M50)) + 
geom_jitter(aes(color = Pass), size = 2) + scale_color_manual(values = c("#1a1a1a", "#d73027", "#1a9850", "#313695")) +
#ylim(lower_limit, upper_limit) +
ylim(c(0,NA)) +
geom_vline(xintercept = unclass(as.Date(preSurgeryEndDate)), linetype=4) + 
geom_vline(xintercept = unclass(as.Date(postSurgeryStartDate)), linetype=4) +
geom_vline(xintercept = unclass(as.Date(shaStartDate)), linetype=4) + 
geom_vline(xintercept = unclass(as.Date(lgaStartDate)), linetype=4) + 
geom_vline(xintercept = unclass(as.Date(absStartDate)), linetype=4) + 
facet_grid(rows = vars(Subject)) + ggtitle("Raw M50 values of Pass 1 - 4") + theme_minimal()

In [None]:
%%R -w 640 -h 640
# Plotting raw values for MaxRate, Pass 1 - 4
lower_limit = 0
upper_limit = 300
data %>% ggplot(aes(x = Date, y = MaxRate)) + 
geom_jitter(aes(color = Pass), size = 2) + scale_color_manual(values = c("#1a1a1a", "#d73027", "#1a9850", "#313695")) +
#ylim(lower_limit, upper_limit) +
ylim(c(0,NA)) +
geom_vline(xintercept = unclass(as.Date(preSurgeryEndDate)), linetype=4) + 
geom_vline(xintercept = unclass(as.Date(postSurgeryStartDate)), linetype=4) +
geom_vline(xintercept = unclass(as.Date(shaStartDate)), linetype=4) + 
geom_vline(xintercept = unclass(as.Date(lgaStartDate)), linetype=4) + 
geom_vline(xintercept = unclass(as.Date(absStartDate)), linetype=4) + 
facet_grid(rows = vars(Subject)) + ggtitle("Raw MaxRate values of Pass 1 - 4") + theme_minimal()

### 3.2. Pass 2-4

In [None]:
%%R -w 640 -h 640
# Plotting raw values for T0, Pass 2 - 4
lower_limit = 0
upper_limit = 150
data %>% filter(Pass != 1) %>% ggplot(aes(x = Date, y = T0)) + 
#ylim(lower_limit, upper_limit) +
ylim(c(0, NA)) +
geom_jitter(aes(color = Pass), size = 2) + scale_color_manual(values = c("#d73027", "#1a9850", "#313695")) +
geom_vline(xintercept = unclass(as.Date(preSurgeryEndDate)), linetype=4) + 
geom_vline(xintercept = unclass(as.Date(postSurgeryStartDate)), linetype=4) +
geom_vline(xintercept = unclass(as.Date(shaStartDate)), linetype=4) + 
geom_vline(xintercept = unclass(as.Date(lgaStartDate)), linetype=4) + 
geom_vline(xintercept = unclass(as.Date(absStartDate)), linetype=4) + 
facet_grid(rows = vars(Subject)) + ggtitle("Raw Theta0 values of Pass 2 - 4") + theme_minimal()

In [None]:
%%R -w 640 -h 640
# Plotting raw values for M50, Pass 2 - 4
lower_limit = 0
upper_limit = 150
data %>% filter(Pass != 1) %>% ggplot(aes(x = Date, y = M50)) + 
geom_jitter(aes(color = Pass), size = 2) + scale_color_manual(values = c("#d73027", "#1a9850", "#313695")) +
#ylim(lower_limit, upper_limit) +
ylim(c(0, NA)) +
geom_vline(xintercept = unclass(as.Date(preSurgeryEndDate)), linetype=4) + 
geom_vline(xintercept = unclass(as.Date(postSurgeryStartDate)), linetype=4) +
geom_vline(xintercept = unclass(as.Date(shaStartDate)), linetype=4) + 
geom_vline(xintercept = unclass(as.Date(lgaStartDate)), linetype=4) + 
geom_vline(xintercept = unclass(as.Date(absStartDate)), linetype=4) + 
facet_grid(rows = vars(Subject)) + ggtitle("Raw M50 values of Pass 2 - 4") + theme_minimal()

In [None]:
%%R -w 640 -h 640
# Plotting raw values for MaxRate, Pass 2 - 4
lower_limit = 0
upper_limit = 300
data %>% filter(Pass != 1) %>% ggplot(aes(x = Date, y = MaxRate)) + 
geom_jitter(aes(color = Pass), size = 2) + scale_color_manual(values = c("#d73027", "#1a9850", "#313695")) +
#ylim(lower_limit, upper_limit) +
ylim(c(0,NA)) +
geom_vline(xintercept = unclass(as.Date(preSurgeryEndDate)), linetype=4) + 
geom_vline(xintercept = unclass(as.Date(postSurgeryStartDate)), linetype=4) +
geom_vline(xintercept = unclass(as.Date(shaStartDate)), linetype=4) + 
geom_vline(xintercept = unclass(as.Date(lgaStartDate)), linetype=4) + 
geom_vline(xintercept = unclass(as.Date(absStartDate)), linetype=4) + 
facet_grid(rows = vars(Subject)) + ggtitle("Raw MaxRate values of Pass 2 - 4") + theme_minimal()