<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"></ul></div>

In [15]:
library(ggplot2)
library(gridExtra)
library(iNEXT)
library(rowr)

In [16]:
mytheme <- 
  #axis
  theme(axis.title = element_text(size = 25),
        axis.line = element_line(color = "black"),
        axis.text.x = element_text(size = 25, angle = 45, hjust = 1),
        axis.text.y = element_text(size = 25)) +
  #plot
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_rect(fill = "transparent",colour = NA),
        plot.margin = unit(c(0.2,0,0,0),"cm")) +
  #legend
  theme(legend.key = element_rect(fill = "white", colour = "white"),
        legend.position = "right") 

In [17]:
immunelistfx <- function(indir, chain){
  file_list<- list.files(indir, pattern = paste("CLONES", chain, 
                                                sep = "_"))
  readlist = list()
  i <- 1
  for(f in file_list){
    mixcrfle <- read.table(paste(indir, f, 
                                 sep = ""), 
                           header = TRUE, sep = "\t",
                           stringsAsFactors = FALSE,
                           na.strings = c("", "NA"))
    f <- substr(f, 11, nchar(f)-4)
    if(nrow(mixcrfle) <= 1){next()}
    if(mean(mixcrfle$cloneCount) == 1){next()}
    readlist[[i]] <- mixcrfle$cloneCount
    names(readlist)[i] <- f
    i <- i + 1
  }
  return(readlist)
}

In [18]:
Divscorefx <- function(lst, chain, datapath, batchno){
  observed <- cbind(names(lst), 
                    NA, NA, NA, NA,
                    NA, NA, NA, NA)
  i <- 1
  for(j in lst){
    observed[i,2] <- length(j)
    observed[i,3] <- sum(j)
    observed[i,4] <- (length(j)/sum(j))*1000
    observed[i,5] <- mean(j)
    observed[i,6] <- max(j)
    observed[i,7] <- sd(j)/mean(j)
    observed[i,8] <- length(j[j == 1])
    observed[i,9] <- length(j[j == 2])
    i <- i + 1
  }
  colnames(observed) <- c("filename", chain,
                          "Reads", "CPKR",
                          "Average_reads", 
                          "Max_reads","IDis",
                          "Singletons", "Doubletons")
  out <- iNEXT(lst, 0, 
               datatype="abundance")
  est <- out$iNextEst
  qDest <- data.frame(names(est))
  qDest <- cbind(qDest, NA)
  colnames(qDest) <- c("filename","qD")
  i <- 1
  for(j in est){
    tmp <- as.data.frame(j)
    # qD is the estimate at double the size of sample
    qDest[i,2] <- tmp[nrow(tmp),4]
    i <- i + 1
  }
  
  AsyEst <- out$AsyEst
  
  rich <- AsyEst[AsyEst$Diversity == "Species richness",]
  erich <- cbind.fill(rich[,1], rich[,4])
  orich <- cbind.fill(rich[,1], rich[,3])
  colnames(erich) <- c("filename", "estimated_Richness")
  colnames(orich) <- c("filename", "observed_Richness")    
  
  shan <- AsyEst[AsyEst$Diversity == "Shannon diversity",]
  eshan <- cbind.fill(shan[,1], shan[,4])
  oshan <- cbind.fill(shan[,1], shan[,3])
  colnames(eshan) <- c("filename", "estimated_Shannon")
  colnames(oshan) <- c("filename", "observed_Shannon")
  
  simp <- AsyEst[AsyEst$Diversity == "Simpson diversity",]
  esimp <- cbind.fill(simp[,1], simp[,4])
  osimp <- cbind.fill(simp[,1], simp[,3])
  colnames(esimp) <- c("filename", "estimated_Simpson")
  colnames(osimp) <- c("filename", "observed_Simpson")
  
  
  qDest <- merge(qDest, erich, by = "filename")
  qDest <- merge(qDest, orich, by = "filename")
  qDest <- merge(qDest, eshan, by = "filename")
  qDest <- merge(qDest, oshan, by = "filename")
  qDest <- merge(qDest, esimp, by = "filename")
  qDest <- merge(qDest, osimp, by = "filename")
  final <- merge(observed, qDest, by = "filename")
  cols <- (2:ncol(final))
  final[,cols] <- apply(final[,cols],2,
                        function(x) as.numeric(as.character(x)))
  write.csv(final,
         file = paste(datapath, "divstats_", chain, 
                      "CHP", batchno, ".csv", sep = ""),
         row.names = F)
}

In [19]:
plotpath <- "/Users/anabbi/OneDrive - UHN/Documents/INTERCEPT/Plots/"
datapath <- "/Users/anabbi/OneDrive - UHN/Documents/INTERCEPT/Data/"

In [20]:
datapath_ds1 <- "/Users/anabbi/OneDrive - UHN/Documents/INTERCEPT/Data/Nextseq/batch1/ds_batch1/"
datapath_ds2 <- "/Users/anabbi/OneDrive - UHN/Documents/INTERCEPT/Data/Nextseq/batch2/ds_batch2/"
datapath_ds3 <- "/Users/anabbi/OneDrive - UHN/Documents/INTERCEPT/Data/Nextseq/batch3/ds_batch3/"
datapath_ds4 <- "/Users/anabbi/OneDrive - UHN/Documents/INTERCEPT/Data/Nextseq/batch4/ds_batch4/"

In [21]:
datapath_ds7 <- "/Users/anabbi/OneDrive - UHN/Documents/INTERCEPT/Data/Nextseq/batch7/ds_batch7/"

In [22]:
datapath_ds6a <- "/Users/anabbi/OneDrive - UHN/Documents/INTERCEPT/Data/Nextseq/batch6a/ds_batch6a/"

In [25]:
batch1TRB <- immunelistfx(datapath_ds1, "TRB")
Divscorefx(batch1TRB, "TRB", datapath_ds1, 1)

In [26]:
batch2TRB <- immunelistfx(datapath_ds2, "TRB")
Divscorefx(batch2TRB, "TRB", datapath_ds2, 2)

In [27]:
batch3TRB <- immunelistfx(datapath_ds3, "TRB")
Divscorefx(batch3TRB, "TRB", datapath_ds3, 3)

In [28]:
batch4TRB <- immunelistfx(datapath_ds4, "TRB")
Divscorefx(batch4TRB, "TRB", datapath_ds4, 4)

In [29]:
batch7TRB <- immunelistfx(datapath_ds7, "TRB")
Divscorefx(batch7TRB, "TRB", datapath_ds7, 7)

In [30]:
batch6aTRB <- immunelistfx(datapath_ds6a, "TRB")
Divscorefx(batch6aTRB, "TRB", datapath_ds6a, "6a")

In [46]:
div_trb <- rbind(Div_batch1TRB,
                Div_batch2TRB,
                Div_batch3TRB,
                Div_batch4TRB)

In [47]:
div_trb_CHP <- div_trb[grepl("CHP", div_trb$filename),]

In [48]:
div_trb_CHP$Patient <- gsub("-PBMC-DNA_2000000", "", div_trb_CHP$filename)
div_trb_CHP$Patient <- gsub("-.*", "", div_trb_CHP$Patient)
div_trb_CHP$Patient <- gsub("CHP_", "", div_trb_CHP$Patient)


div_trb_CHP$cycle <- substr(div_trb_CHP$filename, 9, 10)

In [50]:
write.csv(div_trb_CHP,
         file = paste(datapath, "divstats_TRB_CHP.csv", sep = ""),
         row.names = F)

In [49]:
div_trb_CHP

Unnamed: 0,filename,TRB,Reads,CPKR,Average_reads,Max_reads,IDis,Singletons,Doubletons,qD,estimated_Richness,observed_Richness,estimated_Shannon,observed_Shannon,estimated_Simpson,observed_Simpson,Patient,cycle
1,CHP_346-01-PBMC-DNA_2000000,55,1322,41.603631,24.036364,148,1.0474291,2,1,56.264,56.998,55,38.740,37.908,26.999,26.478,346,01
2,CHP_348-03-PBMC-DNA_2000000,153,1766,86.636467,11.542484,127,1.2982159,19,3,169.290,213.133,153,100.498,94.881,59.092,57.210,348,03
3,CHP_348-04-PBMC-DNA_2000000,56,859,65.192084,15.339286,79,1.1623604,9,0,63.961,91.958,56,36.320,34.642,24.731,24.066,348,04
4,CHP_349-01-PBMC-DNA_2000000,26,228,114.035088,8.769231,38,1.0571773,3,3,27.291,27.493,26,18.226,17.128,13.203,12.532,349,01
5,CHP_349-02-PBMC-DNA_2000000,105,1193,88.013412,11.361905,94,1.2934197,12,5,113.138,119.388,105,65.638,62.304,40.838,39.518,349,02
6,CHP_350-03-PBMC-DNA_2000000,211,1193,176.865046,5.654028,78,1.2994434,39,25,232.964,241.395,211,154.001,138.005,84.205,78.715,350,03
7,CHP_352-01-PBMC-DNA_2000000,34,9703,3.504071,285.382353,5964,4.0776110,4,0,36.919,39.999,34,2.383,2.378,1.984,1.984,352,01
8,CHP_352-02-PBMC-DNA_2000000,150,1795,83.565460,11.966667,62,0.7324472,12,5,158.139,164.392,150,124.152,118.421,103.438,97.854,352,02
9,CHP_356-05-PBMC-DNA_2000000,6,435,13.793103,72.500000,131,0.6537844,0,0,6.000,6.000,6,4.879,4.851,4.459,4.424,356,05
10,CHP_358-01-PBMC-DNA_2000000,25,508,49.212598,20.320000,108,1.0378220,1,0,25.000,25.000,25,17.790,17.365,12.571,12.291,358,01
