In [3]:
library(tidyverse)
library(compositions)

freq_clr <- function(freq_table, sample_col, freq_col, celltype_col){
    freq_selected <- select(freq_table, all_of(c(freq_col, celltype_col, sample_col)))
    freq_grouped <- group_by(freq_selected, across(all_of(c(sample_col, celltype_col))))
    freq_clean <- summarise(freq_grouped, across(all_of(freq_col), function(x) mean(x, na.rm = TRUE)), .groups = 'drop')
    
    freq <- pivot_wider(freq_clean, 
                       id_cols = all_of(sample_col), 
                       names_from = all_of(celltype_col),
                       values_from = all_of(freq_col))
    
    freq_mx_cols <- select(freq, -all_of(sample_col))
    freq_mx <- as.matrix(freq_mx_cols)
    rownames(freq_mx) <- freq[[sample_col]]
    
    freq_clr_matrix <- compositions::clr(freq_mx)
    freq_clr_tibble <- as_tibble(freq_clr_matrix, rownames = sample_col)
    freq_clr <- pivot_longer(freq_clr_tibble,
                           cols = -all_of(sample_col), 
                           names_to = celltype_col, 
                           values_to = paste0(freq_col, '_clr'))
    
    freq_table_grouped <- group_by(freq_table, across(all_of(c(sample_col, celltype_col))))
    freq_table_clean <- summarise(freq_table_grouped,
                                 across(where(is.numeric), function(x) mean(x, na.rm = TRUE)),
                                 across(where(is.character), first),
                                 .groups = 'drop')
    
    freq_meta_clr <- full_join(freq_table_clean, freq_clr, by = c(sample_col, celltype_col))
    
    return(freq_meta_clr)
}

Welcome to compositions, a package for compositional data analysis.
Find an intro with "? compositions"



Attaching package: ‘compositions’


The following objects are masked from ‘package:stats’:

    anova, cor, cov, dist, var


The following object is masked from ‘package:graphics’:

    segments


The following objects are masked from ‘package:base’:

    %*%, norm, scale, scale.default




### l1 clr

In [5]:
data <- read.csv('/home/workspace/pbmc_flow/data/T Cell PBMC data_final_counts, % viable cells(in).csv')
data <- mutate(data, Sample_ID = paste(Subject, Visit, sep = "_"))

celltypes <- list(
    l1 = c("CD4+ T cells", "CD8+ T cells", "DN T cells", "DP T cells"),
    l2 = c("Treg", "CD4 rm", "CM CD4", "EM CD4", "Naive CD4", "TEMRA CD4", "CD8 rm", "CM CD8", "EM CD8", "Naive CD8", "TEMRA CD8", "DN T cells", "DP T cells")
)

data <- filter(data, population %in% celltypes$l1, percentOf == "Viable Cells")
data <- group_by(data, Subject, Visit, percentOf)
data <- mutate(data, total_events = sum(eventCount, na.rm = TRUE), frequency = eventCount / total_events)
data <- ungroup(data)

result <- freq_clr(
    freq_table = data,
    sample_col = "Sample_ID",
    freq_col = "percent", 
    celltype_col = "population"
)

write.csv(result, "/home/workspace/pbmc_flow/data/l1_frequency_clr_results.csv", row.names = FALSE)

### l2 clr

In [94]:
data <- read.csv('/home/workspace/pbmc_flow/data/T Cell PBMC data_final_counts, % viable cells(in).csv')
data <- mutate(data, Sample_ID = paste(Subject, Visit, sep = "_"))

celltypes <- list(
    l1 = c("CD4+ T cells", "CD8+ T cells", "DN T cells", "DP T cells"),
    l2 = c("Treg", "CD4 rm", "CM CD4", "EM CD4", "Naive CD4", "TEMRA CD4", "CD8 rm", "CM CD8", "EM CD8", "Naive CD8", "TEMRA CD8", "DN T cells", "DP T cells")
)

data <- filter(data, population %in% celltypes$l2, percentOf == "Viable Cells")
data <- group_by(data, Subject, Visit, percentOf)
data <- mutate(data, total_events = sum(eventCount, na.rm = TRUE), frequency = eventCount / total_events)
data <- ungroup(data)

result <- freq_clr(
    freq_table = data,
    sample_col = "Sample_ID",
    freq_col = "percent", 
    celltype_col = "population"
)

write.csv(result, "/home/workspace/pbmc_flow/data/l2_frequency_clr_results.csv", row.names = FALSE)

### checking results

In [97]:
mask = result$Subject == "FH1002" & result$Visit == "Flu Year 1 Stand-Alone" & result$population %in% celltypes$l2
result <- result[mask,]

result
nrow(result)
sum(result$frequency)

Sample_ID,population,bc.lymphocyte.count,plate.column,eventCount,percent,total_events,frequency,filename,uniquePopulationName,⋯,Subject,Visit,Sex,Response,plate,plate.row,plate.well,percentOf,percentOfUniqueName,percent_clr
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,⋯,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>
FH1002_Flu Year 1 Stand-Alone,CD4 rm,2170,11,359,0.1309063,159283,0.00225385,B064_PT1_PB00250-01.fcs,CD4 rm,⋯,FH1002,Flu Year 1 Stand-Alone,Female,Responder,B064,D,D11,Viable Cells,Viable Cells,-2.70294
FH1002_Flu Year 1 Stand-Alone,CD8 rm,2170,11,486,0.1772157,159283,0.003051173,B064_PT1_PB00250-01.fcs,CD8 rm,⋯,FH1002,Flu Year 1 Stand-Alone,Female,Responder,B064,D,D11,Viable Cells,Viable Cells,-2.4000538
FH1002_Flu Year 1 Stand-Alone,CM CD4,2170,11,17246,6.2886064,159283,0.108272697,B064_PT1_PB00250-01.fcs,CM CD4,⋯,FH1002,Flu Year 1 Stand-Alone,Female,Responder,B064,D,D11,Viable Cells,Viable Cells,1.1690731
FH1002_Flu Year 1 Stand-Alone,CM CD8,2170,11,1807,0.6589071,159283,0.011344588,B064_PT1_PB00250-01.fcs,CM CD8,⋯,FH1002,Flu Year 1 Stand-Alone,Female,Responder,B064,D,D11,Viable Cells,Viable Cells,-1.0868391
FH1002_Flu Year 1 Stand-Alone,DN T cells,2170,11,7139,2.6031753,159283,0.044819598,B064_PT1_PB00250-01.fcs,DN T cells,⋯,FH1002,Flu Year 1 Stand-Alone,Female,Responder,B064,D,D11,Viable Cells,Viable Cells,0.2870656
FH1002_Flu Year 1 Stand-Alone,DP T cells,2170,11,461,0.1680997,159283,0.00289422,B064_PT1_PB00250-01.fcs,DP T cells,⋯,FH1002,Flu Year 1 Stand-Alone,Female,Responder,B064,D,D11,Viable Cells,Viable Cells,-2.4528644
FH1002_Flu Year 1 Stand-Alone,EM CD4,2170,11,14111,5.1454555,159283,0.088590747,B064_PT1_PB00250-01.fcs,EM CD4,⋯,FH1002,Flu Year 1 Stand-Alone,Female,Responder,B064,D,D11,Viable Cells,Viable Cells,0.9684475
FH1002_Flu Year 1 Stand-Alone,EM CD8,2170,11,14999,5.4692571,159283,0.09416573,B064_PT1_PB00250-01.fcs,EM CD8,⋯,FH1002,Flu Year 1 Stand-Alone,Female,Responder,B064,D,D11,Viable Cells,Viable Cells,1.0294764
FH1002_Flu Year 1 Stand-Alone,Naive CD4,2170,11,32682,11.9172118,159283,0.205181972,B064_PT1_PB00250-01.fcs,Naive CD4,⋯,FH1002,Flu Year 1 Stand-Alone,Female,Responder,B064,D,D11,Viable Cells,Viable Cells,1.8083173
FH1002_Flu Year 1 Stand-Alone,Naive CD8,2170,11,6890,2.5123796,159283,0.043256342,B064_PT1_PB00250-01.fcs,Naive CD8,⋯,FH1002,Flu Year 1 Stand-Alone,Female,Responder,B064,D,D11,Viable Cells,Viable Cells,0.251564
