# IVSA analysis of cohort02 (AM-ICSS group)

**Note:**
Edits made to IVSA original file !2019-04-05 to remove data of Cohort02

**METADATA:**
 
|Sex    | ICSS+ShA | ICSS+LgA      |
|:-----:|:--------:|:-------------:|
|Males  | NULL     | NC1, NC3, NC7 |
|-------|----------|---------------|
|Females| NULL     | NC4, NC6, NC8 |
|-------|----------|---------------|

**OUTLINE:**

1. Preprocessing data
    1. Running `preprocessIVSAfiles` to generate \.csv files from MedPC text files
    2. Running `collateIVSAData` to create data frame
    3. Collating all animals across all days into single dataframe
2. Cleaning data
3. Saving collated data
---

In [1]:
%load_ext rpy2.ipython

In [2]:
%%capture
%%R
library(tidyverse)
library(lubridate)

---

## 1. Preprocessing data

In [37]:
%%bash
# # 1.1. using the preprocessIVSAFiles script to parse MedPC files and generating csv files that has different event types
# # defining userName to make running the cell in machine agnostic manner
# userName=$(echo $USER)
# 
# # We move to the folder where the cohort data is stored
# # cd </path/to/cohort/data>
# cd /Users/$userName/Dropbox\ \(Partners\ HealthCare\)/Projects/R01_2017_OxycSA-NASh-Glutamate/_data_R01_2017/_data_R01_2017_IVSA/Male-Female/ShA+LgA/AM_ICSS/Cohort02/_rigFiles
# 
# # running the preprocessing script over all files in the directory
# for fileName in $(ls \!2019-0* | sort)
# do
#     preprocessIVSAFiles --file $fileName;
# done

In [None]:
%%capture
%%bash
# # 1.2. using collateIVSAData to collate all the data into a single data frame of the format ...
# # DATA TABLE DESCRIPTION:
# # 
# #   date       |  cohort      | regimen   | group | subjectID | eventType   | eventTime
# #   -----------|--------------|-----------|-------|-----------|-------------|-----------
# #   2018-10-30 |  ICSS+IVSA   | 6H        | MALES | SG7       | rewards     | ...
# #   -----------|--------------|-----------|-------|-----------|-------------|-----------
# #   2018-10-30 |  ICSS+IVSA   | 6H        | MALES | SG7       | corrLever   | ...
# #   -----------|--------------|-----------|-------|-----------|-------------|-----------
# #   2018-10-30 |  ICSS+IVSA   | 6H        | MALES | SG7       | incorrLever | ...
# #   ...
# #   ...
# #   ...
# 
# userName=$(echo $USER)
# 
# # We move to the folder where the cohort files are stored
# # cd </path/to/cohort/data>
# cd /Users/$userName/Dropbox\ \(Partners\ HealthCare\)/Projects/R01_2017_OxycSA-NASh-Glutamate/_data_R01_2017/_data_R01_2017_IVSA/Male-Female/ShA+LgA/PM_ICSS/Cohort01/_csvFiles
# 
# # running the preprocessing script over all files in the directory
# for fileName in $(ls *rewards.csv | sort)
# do
#     collateIVSAData $fileName;
# done

In [3]:
%%capture
%%R
# 1.3. Collating all animals across days into single dataframe
# loading data: set the path to 
IVSA_dataDir <- "~/Dropbox (Partners HealthCare)/Projects/R01_2017_OxycSA-NASh-Glutamate/_data_R01_2017/_data_R01_2017_IVSA/Male-Female/ShA+LgA/AM_ICSS/Cohort02/_csvFiles"
IVSA_fileList <- list.files(path = IVSA_dataDir, pattern = "collated.csv")
## generating combined data table
IVSA_data <- IVSA_fileList %>% map(~ read_csv(file.path(IVSA_dataDir, .))) %>% reduce(rbind)

---
## 2. Cleaning data

In [4]:
%%R
IVSA_data %>% print

[90m# A tibble: 24,643 x 7[39m
   date       cohort    regimen group   subjectID eventType eventTime
   [3m[90m<date>[39m[23m     [3m[90m<chr>[39m[23m     [3m[90m<chr>[39m[23m   [3m[90m<chr>[39m[23m   [3m[90m<chr>[39m[23m     [3m[90m<chr>[39m[23m         [3m[90m<dbl>[39m[23m
[90m 1[39m 2018-11-01 ICSS+IVSA 1H      FEMALES NC4       rewards           4
[90m 2[39m 2018-11-01 ICSS+IVSA 1H      FEMALES NC4       rewards          16
[90m 3[39m 2018-11-01 ICSS+IVSA 1H      FEMALES NC4       rewards          50
[90m 4[39m 2018-11-01 ICSS+IVSA 1H      FEMALES NC4       rewards          69
[90m 5[39m 2018-11-01 ICSS+IVSA 1H      FEMALES NC4       rewards         456
[90m 6[39m 2018-11-01 ICSS+IVSA 1H      FEMALES NC4       rewards         467
[90m 7[39m 2018-11-01 ICSS+IVSA 1H      FEMALES NC4       rewards         479
[90m 8[39m 2018-11-01 ICSS+IVSA 1H      FEMALES NC4       rewards        [4m1[24m569
[90m 9[39m 2018-11-01 ICSS+IVSA 1H      FEM

In [5]:
%%R
# re-assigning data types to individual columns. This gets broken when there are missing values
# Ideal data types
# date   |  cohort  |  regimen   |  group   |  subjectID   |  eventType  | eventTime
# <date> | <factor> |  <factor>  | <factor> |  <factor>    |  <factor>   |  <int> 

IVSA_data$cohort <- IVSA_data$cohort %>% as.factor
IVSA_data$regimen <- IVSA_data$regimen %>% as.factor
IVSA_data$group <- IVSA_data$group %>% as.factor
IVSA_data$subjectID <- IVSA_data$subjectID %>% as.factor
IVSA_data$eventType <- IVSA_data$eventType %>% as.factor

In [6]:
%%R
IVSA_data %>% print

[90m# A tibble: 24,643 x 7[39m
   date       cohort    regimen group   subjectID eventType eventTime
   [3m[90m<date>[39m[23m     [3m[90m<fct>[39m[23m     [3m[90m<fct>[39m[23m   [3m[90m<fct>[39m[23m   [3m[90m<fct>[39m[23m     [3m[90m<fct>[39m[23m         [3m[90m<dbl>[39m[23m
[90m 1[39m 2018-11-01 ICSS+IVSA 1H      FEMALES NC4       rewards           4
[90m 2[39m 2018-11-01 ICSS+IVSA 1H      FEMALES NC4       rewards          16
[90m 3[39m 2018-11-01 ICSS+IVSA 1H      FEMALES NC4       rewards          50
[90m 4[39m 2018-11-01 ICSS+IVSA 1H      FEMALES NC4       rewards          69
[90m 5[39m 2018-11-01 ICSS+IVSA 1H      FEMALES NC4       rewards         456
[90m 6[39m 2018-11-01 ICSS+IVSA 1H      FEMALES NC4       rewards         467
[90m 7[39m 2018-11-01 ICSS+IVSA 1H      FEMALES NC4       rewards         479
[90m 8[39m 2018-11-01 ICSS+IVSA 1H      FEMALES NC4       rewards        [4m1[24m569
[90m 9[39m 2018-11-01 ICSS+IVSA 1H      FEM

In [7]:
%%R
IVSA_data$date %>% unique

 [1] "2018-11-01" "2018-11-02" "2018-11-05" "2018-11-06" "2018-11-07"
 [6] "2018-11-08" "2018-11-09" "2018-11-12" "2018-11-13" "2018-11-14"
[11] "2018-11-15" "2018-11-16" "2018-11-19" "2018-11-20" "2018-11-21"
[16] "2018-11-22" "2018-11-23" "2018-11-26" "2018-11-27" "2018-11-28"
[21] "2018-11-29" "2018-11-30" "2018-12-14"


In [8]:
%%R
IVSA_data$cohort %>% unique

[1] ICSS+IVSA  ICVSS+IVSA
Levels: ICSS+IVSA ICVSS+IVSA


In [9]:
%%R
# cleaning up data
# replace "ICS+IVSA" with "ICSS+IVSA"
# replace "NO-ICSSS+IVSA" with "NO-ICSS+IVSA"
IVSA_data$cohort[IVSA_data$cohort == "ICVSS+IVSA"] <- "ICSS+IVSA"
#IVSA_data$cohort[IVSA_data$cohort == "NO-ICSSS+IVSA"] <- "NO-ICSS+IVSA"

In [10]:
%%R
IVSA_data$cohort %>% unique()

[1] ICSS+IVSA
Levels: ICSS+IVSA ICVSS+IVSA


In [11]:
%%R
IVSA_data$regimen %>% unique()

[1] 1H 6H 3H
Levels: 1H 3H 6H


In [12]:
%%R
IVSA_data$group %>% unique()

[1] FEMALES MALES  
Levels: FEMALES MALES


In [13]:
%%R
IVSA_data$subjectID %>% unique()

[1] NC4  NC6  NC8  SG10 NC1  NC3  NC7  SG9 
Levels: NC1 NC3 NC4 NC6 NC7 NC8 SG10 SG9


In [15]:
%%R
IVSA_data <- IVSA_data %>% filter(
   subjectID == "NC1" | 
   subjectID == "NC3" | 
   subjectID == "NC7" | 
   subjectID == "NC4" | 
   subjectID == "NC6" | 
   subjectID == "NC8") %>% print

[90m# A tibble: 17,882 x 7[39m
   date       cohort    regimen group   subjectID eventType eventTime
   [3m[90m<date>[39m[23m     [3m[90m<fct>[39m[23m     [3m[90m<fct>[39m[23m   [3m[90m<fct>[39m[23m   [3m[90m<fct>[39m[23m     [3m[90m<fct>[39m[23m         [3m[90m<dbl>[39m[23m
[90m 1[39m 2018-11-01 ICSS+IVSA 1H      FEMALES NC4       rewards           4
[90m 2[39m 2018-11-01 ICSS+IVSA 1H      FEMALES NC4       rewards          16
[90m 3[39m 2018-11-01 ICSS+IVSA 1H      FEMALES NC4       rewards          50
[90m 4[39m 2018-11-01 ICSS+IVSA 1H      FEMALES NC4       rewards          69
[90m 5[39m 2018-11-01 ICSS+IVSA 1H      FEMALES NC4       rewards         456
[90m 6[39m 2018-11-01 ICSS+IVSA 1H      FEMALES NC4       rewards         467
[90m 7[39m 2018-11-01 ICSS+IVSA 1H      FEMALES NC4       rewards         479
[90m 8[39m 2018-11-01 ICSS+IVSA 1H      FEMALES NC4       rewards        [4m1[24m569
[90m 9[39m 2018-11-01 ICSS+IVSA 1H      FEM

In [16]:
%%R
IVSA_data$subjectID %>% unique

[1] NC4 NC6 NC8 NC1 NC3 NC7
Levels: NC1 NC3 NC4 NC6 NC7 NC8 SG10 SG9


In [17]:
%%R
IVSA_data$eventType %>% unique

[1] rewards     corrLever   incorrLever
Levels: corrLever incorrLever rewards


---
3. Saving data

In [18]:
%%R
write_csv(IVSA_data, file.path(IVSA_dataDir, "_ivsaData_am_cohort02.csv"))

---
Author: Suman K. Guha <br/>
Date: April 19, 2019