# Preparations

In [1]:
library(tidyverse)

── [1mAttaching packages[22m ─────────────────────────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.5     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.2     [32m✔[39m [34mdplyr  [39m 1.0.7
[32m✔[39m [34mtidyr  [39m 1.1.3     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.4.0     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



In [2]:
setwd('data')

# Extract sample information from file name

Get all file names in directory.

In [3]:
file_list <- list.files()

Get information from file names.

In [4]:
df_info <- as.data.frame(file_list) %>%
    mutate(sample = file_list) %>%
    separate(file_list,c('date_measurement','exp_number','analysis_type','mode',NA,NA), sep='_') %>%
    mutate(date_measurement = strptime(date_measurement, "%Y%m%d"))

“Expected 6 pieces. Missing pieces filled with `NA` in 2 rows [5, 6].”


# Combine files into single dataframe

In [5]:
i <- 1
df_RT <- setNames(data.frame(matrix(ncol = 3, nrow = 0)), c("compound", "compoundId", "medRt"))

for (files in file_list){
    df <- read.csv(files) %>%
    select(10,11,6)# %>%
    df$sample <- file_list[i]
    df_RT <- rbind(df_RT, df)
    i <- i+1
}

# Combine data and data info

In [6]:
data <- merge(df_RT,df_info,by='sample',all.x=TRUE) %>%
    rename(RT = medRt)

In [7]:
head(data)

Unnamed: 0_level_0,sample,compound,compoundId,RT,date_measurement,exp_number,analysis_type,mode
Unnamed: 0_level_1,<chr>,<fct>,<fct>,<dbl>,<dttm>,<chr>,<chr>,<chr>
1,20201104_LS20-027_Lipids_Neg_Xin_6MLiver.csv,PE(C34:1) M-H,PE(C34:1) M-H,9.308,2020-11-04,LS20-027,Lipids,Neg
2,20201104_LS20-027_Lipids_Neg_Xin_6MLiver.csv,PE(C34:2) M-H,PE(C34:2) M-H,9.02,2020-11-04,LS20-027,Lipids,Neg
3,20201104_LS20-027_Lipids_Neg_Xin_6MLiver.csv,PE(C34:3) M-H,PE(C34:3) M-H,8.754,2020-11-04,LS20-027,Lipids,Neg
4,20201104_LS20-027_Lipids_Neg_Xin_6MLiver.csv,PE(O-C36:1) M-H,PE(O-C36:1) M-H,10.14,2020-11-04,LS20-027,Lipids,Neg
5,20201104_LS20-027_Lipids_Neg_Xin_6MLiver.csv,PE(O-C36:2) M-H,PE(O-C36:2) M-H,10.026,2020-11-04,LS20-027,Lipids,Neg
6,20201104_LS20-027_Lipids_Neg_Xin_6MLiver.csv,PE(O-C36:5) M-H,PE(O-C36:5) M-H,9.132,2020-11-04,LS20-027,Lipids,Neg


# Put RT into context

## Calculate average RT

In [8]:
data_sum <- data %>%
    group_by(compound) %>%
    summarize(mean = mean(RT),
              n = n(),
              sd = sd(RT)
             )

## Add statistics to main dataframe

In [9]:
data <- merge(data,data_sum,by='compound',all.x=TRUE)

## Calculate difference between RT and meanRT

In [10]:
data <- data %>%
    mutate(cov = sd/mean*100)

In [11]:
names(data)

## Reorder columns

In [12]:
data <- data[,c(2,6,5,8,7,3,1,4,9,12,10,11)]

# Print samples with deviating RT

In [13]:
data_outliers <- data %>%
    filter(cov > 15)

In [14]:
data_mean <- data %>%
    group_by(compound) %>%
    select(analysis_type, compound, mean, n, sd)

In [17]:
data_mean

analysis_type,compound,mean,n,sd
<chr>,<fct>,<dbl>,<int>,<dbl>
Lipids,Cer(d18:1/22:0),10.76500,1,
Lipids,Cholesterol[-H2O],8.60750,2,0.05444722
Lipids,Cholesterol[-H2O],8.60750,2,0.05444722
Lipids,FA(16:0) M-H,7.71400,1,
Lipids,FA(16:1) M-H,7.37400,1,
Lipids,FA(18:1) M-H,7.79500,1,
Lipids,FA(18:2) M-H,7.48700,1,
Lipids,FA(20:3) M-H,7.62500,1,
Lipids,FA(20:4) M-H,7.43800,1,
Lipids,FA(20:5) M-H,7.16100,1,


In [19]:
data_mean %>%
    distinct(compound, .keep_all=TRUE)

analysis_type,compound,mean,n,sd
<chr>,<fct>,<dbl>,<int>,<dbl>
Lipids,Cer(d18:1/22:0),10.76500,1,
Lipids,Cholesterol[-H2O],8.60750,2,0.054447222
Lipids,FA(16:0) M-H,7.71400,1,
Lipids,FA(16:1) M-H,7.37400,1,
Lipids,FA(18:1) M-H,7.79500,1,
Lipids,FA(18:2) M-H,7.48700,1,
Lipids,FA(20:3) M-H,7.62500,1,
Lipids,FA(20:4) M-H,7.43800,1,
Lipids,FA(20:5) M-H,7.16100,1,
Lipids,FA(22:4) M-H,7.76400,1,


# Export

In [15]:
setwd('../')

In [16]:
write.csv(data, 'RT_tidy.csv', row.names=FALSE)
write.csv(data_mean, 'RT_mean.csv', row.names=FALSE)
write.csv(data_outliers, 'RT_outliers.csv', row.names=FALSE)