<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Dependencies" data-toc-modified-id="Dependencies-1">Dependencies</a></span></li><li><span><a href="#FUNCTIONS" data-toc-modified-id="FUNCTIONS-2">FUNCTIONS</a></span></li><li><span><a href="#Plotting-function-for-clone-tracking" data-toc-modified-id="Plotting-function-for-clone-tracking-3">Plotting function for clone tracking</a></span></li><li><span><a href="#Paths" data-toc-modified-id="Paths-4">Paths</a></span></li><li><span><a href="#Make-a-list-of-files" data-toc-modified-id="Make-a-list-of-files-5">Make a list of files</a></span></li><li><span><a href="#Patient-clone-tracking" data-toc-modified-id="Patient-clone-tracking-6">Patient clone tracking</a></span></li><li><span><a href="#This-version-tracks-all-recovered-clonotypes-regardless-of-outofframe-stop-codon-or-productive" data-toc-modified-id="This-version-tracks-all-recovered-clonotypes-regardless-of-outofframe-stop-codon-or-productive-7">This version tracks all recovered clonotypes regardless of outofframe stop codon or productive</a></span><ul class="toc-item"><li><span><a href="#PLTK-version" data-toc-modified-id="PLTK-version-7.1">PLTK version</a></span></li></ul></li></ul></div>

# Dependencies

In [1]:
library(ggalluvial)
library(randomcoloR)

“package ‘ggalluvial’ was built under R version 3.5.2”Loading required package: ggplot2
“package ‘randomcoloR’ was built under R version 3.5.2”

# FUNCTIONS

In [2]:
patient_cdr3.fx <- function(datapath, chain, filelist, totalinframe){
    
      if (!(totalinframe %in% c("total", "inframe"))) {
    stop("Error: unknown argument ", totalinframe, ". Please provide either total (for all clonotypes) or inframe (for in-frame clonotypes only)")
  }  

# Ensure only one chain is included
    filelist <- filelist[grepl(chain, filelist)]
    
#Compile a big file with patient's mixcr files loaded in
  i <- 1
  for (f in filelist){
    mixcrfle <- read.table(paste(datapath, f, sep = ""), 
                           header = TRUE, sep = "\t",
                           stringsAsFactors = FALSE,
                           na.strings = c("", "NA"))
    if(i == 1){
# Remove duplicated CDR3s
      compldfle <- mixcrfle[!duplicated(mixcrfle$aaSeqCDR3),]
      compldfle <- cbind(cloneno = row.names(compldfle), 
                         filename = f, 
                         compldfle)
      i <- i + 1   
    }
    else{
      compldfle1 <- mixcrfle[!duplicated(mixcrfle$aaSeqCDR3),]
      compldfle1 <- cbind(cloneno = row.names(compldfle1), 
                          filename = f, 
                          compldfle1)
      compldfle <- rbind(compldfle, compldfle1)
      rm(compldfle1)
    }
  }
    myfiles <- unique(as.character(compldfle$filename))
    message("my files:")
    print(myfiles)
    
    message("Total recovered clonotypes:")
    print(length(compldfle$aaSeqCDR3))    
     
    message("Total out-of-frame clonotypes:")    
    print(length(compldfle$aaSeqCDR3[grepl("_", compldfle$aaSeqCDR3)]))     
    message("Total clonotypes with stop codon:")    
    print(length(compldfle$aaSeqCDR3[grepl("[*]", compldfle$aaSeqCDR3) &
                                    !grepl("_", compldfle$aaSeqCDR3)]))     
    
    #make samplename column
    compldfle$filename <- as.character(compldfle$filename)
    compldfle$samplename <- gsub(".*.CLONES_","", compldfle$filename) 

# remove outof frame clonotypes and those with stop codon    
    compldfle_clean <- compldfle[!grepl("_", compldfle$aaSeqCDR3) &
                                !grepl("[*]", compldfle$aaSeqCDR3),]
    
    message("Total productive clonotypes:")
    print(length(compldfle_clean$aaSeqCDR3))      

    if(totalinframe == "inframe"){
       message("Output contains in_frame clonotypes only")
        return(compldfle_clean)}
    if(totalinframe == "total"){
       message("Output contains all clonotypes")
        return(compldfle)}
}

# Plotting function for clone tracking

In [3]:
plotclontrack.fx <- function(compldfle, plotpath, chain, countfrac, clnefrc){
    
  if (!(countfrac %in% c("cloneFraction", "cloneCount"))) {
    stop("Error: unknown argument ", countfrac, ". Please provide either cloneFraction or cloneCount.")
  }  

    message("list of samples to track clones: ")
    mysamples <- unique(compldfle$samplename)
    print(mysamples)


    # Subset df
    CDR3_fraction <- compldfle[, c("samplename","aaSeqCDR3","cloneFraction", "cloneCount")]
# Subset to include only clonotypes with more than specified clonal fraction    
    CDR3_fraction <- CDR3_fraction[CDR3_fraction$cloneFraction > clnefrc,] 
     
#Assign colors to recurring clonotypes
    recurring <- unique(CDR3_fraction$aaSeqCDR3[duplicated(CDR3_fraction$aaSeqCDR3)])
    notrecurring <- CDR3_fraction$aaSeqCDR3[!CDR3_fraction$aaSeqCDR3 %in% recurring]

    message("Total number of recurring clonotypes: ")     
    print(length(recurring))
    
    if(length(recurring) == 0){
      #Introduce a dummy common cdr3 dataframe for alluvium to work  
        mydummy_df <- as.data.frame(matrix(ncol = 4, nrow = length(mysamples)))
        colnames(mydummy_df) <- colnames(CDR3_fraction)
  
        mydummy_df$samplename <- mysamples
        mydummy_df$aaSeqCDR3 <-  "XXXXX"
        mydummy_df$cloneFraction <- 0
        mydummy_df$cloneCount <- 0     
       CDR3_fraction <- rbind(CDR3_fraction, mydummy_df)
        
        recurring <- "XXXXX"
        
    }
    if(length(recurring) > 50){
        recurring_df <- CDR3_fraction[CDR3_fraction$aaSeqCDR3 %in% recurring,]
        recurringcdr3_ordered <- unique(recurring_df$aaSeqCDR3[order(recurring_df$cloneCount, decreasing = TRUE)])
        message("Total number of recurring clonotypes > 50 ")   
        message("Tracking top 10 recurring clonotypes ")  
        myColors <- distinctColorPalette(10)
        
        myColors <- c(myColors, rep("white",length(recurring)-10),
                      rep("white",length(notrecurring)))
        names(myColors) <- c(recurringcdr3_ordered, notrecurring)

        message("these are what we color: ")  
        print(myColors[myColors != "white"])         
    }
    else{
        myColors <- distinctColorPalette(length(recurring))
        myColors <- c(myColors, rep("white",length(notrecurring)))
        names(myColors) <- c(recurring, notrecurring)
        
        myColors[names(myColors) == "XXXXX"] <- "white"
        
        message("these are what we color: ")  
        print(myColors[myColors != "white"]) 
    }

# Generate a row for each sample that doesnot have recurring clonotype
## This ensures alluvia are colored
    
    for(c in recurring){
    tmp <- CDR3_fraction[CDR3_fraction$aaSeqCDR3 == c,]
    nonexsiting <- mysamples[!mysamples %in% tmp$samplename]
    if(length(nonexsiting) > 0){
    newentries <- data.frame("samplename" = nonexsiting, "aaSeqCDR3" = c, 
                             "cloneFraction" = 0, "cloneCount" = 0)
    CDR3_fraction <- rbind(CDR3_fraction, newentries)
    }
}

    
    p <-  ggplot(CDR3_fraction, aes(x = samplename, 
                                y = eval(as.name(countfrac)),
                                fill = aaSeqCDR3,
                                stratum = aaSeqCDR3,
                                alluvium = aaSeqCDR3,
                                label = aaSeqCDR3))

    myp <- p + geom_alluvium(decreasing = FALSE) + 
    geom_stratum(decreasing = FALSE, stat = "alluvium") + 
    scale_fill_manual(breaks = names(myColors[myColors != "white"]),
                      values = myColors) +
    theme(axis.title.y = element_text(size = 50),
          axis.title.x = element_blank(),
          axis.line = element_line(color = "black"),
          axis.text = element_text(size = 50),
          axis.text.x = element_text(angle = 45, hjust = 1)) +
    theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_rect(fill = "transparent",colour = NA),
        legend.key = element_rect(fill = "white", colour = "white"),
        legend.position = "none",
        plot.margin = unit(c(0.2,0,0,0),"cm")) + 
    labs(y = countfrac) 
    
    pdf(paste0(plotpath, "clonetracking", mysamples[1],
              chain, countfrac, ".pdf"),
        width = 15, 
        height = 20,
        useDingbats = FALSE,
        onefile = FALSE)       
    print(myp)  
    dev.off()      

}

# Paths

In [4]:
datapath <- "/Users/anabbi/OneDrive - UHN/Documents/INTERCEPT/Data/"
plotpath <- "/Users/anabbi/OneDrive - UHN/Documents/INTERCEPT/Plots/"

# Make a list of files

In [16]:
flelst <- list.files(datapath,
                     recursive = TRUE,
                     pattern = paste("CLONES", "TRB", sep = "_"))

# subset to include only downsampled files
ds_flelst <- flelst[grep("2000000", flelst)]

# subset to include only CHP
ds_flelst_chp <- ds_flelst[grep("CHP", ds_flelst)]

# Patient clone tracking

In [14]:
pt_df <- patient_cdr3.fx(datapath, "TRB", ds_flelst_chp[grepl("353", ds_flelst_chp)],
                            "total")

pt_df <- pt_df[ pt_df$filename != "Nextseq/batch_reseq/ds_batch_reseq/CLONES_TRBCHP_353-02-PBMC-DNA_2000000.txt",]
#samplename cleanup
    pt_df$samplename <- gsub("-PBMC-DNA_2000000.txt", "", pt_df$samplename)
    pt_df$samplename <- gsub("TRB", "", pt_df$samplename)
    
plotclontrack.fx(pt_df, plotpath, "TRB", "cloneCount", 0)

ERROR: Error in unique(as.character(compldfle$filename)): object 'compldfle' not found


In [11]:
patient_ids <- c(338,345,347,348,357,359,360,361,364,368,370,373,374,378,380, 353, 358, 343, 362)

In [17]:
for(pt in as.character(patient_ids)){
    pt_df <- patient_cdr3.fx(datapath, "TRB", ds_flelst_chp[grepl(pt, ds_flelst_chp)], "total")
#samplename cleanup
    pt_df$samplename <- gsub("-PBMC-DNA_2000000.txt", "", pt_df$samplename)
    pt_df$samplename <- gsub("TRB", "", pt_df$samplename)
    
    plotclontrack.fx(pt_df, plotpath, "TRB",
                          "cloneFraction", 0) 
}

my files:


[1] "Nextseq/batch2/ds_batch2/CLONES_TRBCHP_338-04-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch2/ds_batch2/CLONES_TRBCHP_338-05-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch4/ds_batch4/CLONES_TRBCHP_338-03-PBMC-DNA_2000000.txt"


Total recovered clonotypes:


[1] 1012


Total out-of-frame clonotypes:


[1] 202


Total clonotypes with stop codon:


[1] 16


Total productive clonotypes:


[1] 794


Output contains all clonotypes
list of samples to track clones: 


[1] "CHP_338-04" "CHP_338-05" "CHP_338-03"


Total number of recurring clonotypes: 


[1] 1


these are what we color: 


CASSFNTEAFF 
  "#BEB5B1" 


my files:


[1] "Nextseq/batch3/ds_batch3/CLONES_TRBCHP_345-04-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch3/ds_batch3/CLONES_TRBCHP_345-05-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch4/ds_batch4/CLONES_TRBCHP_345-02-PBMC-DNA_2000000.txt"
[4] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_345-03-PBMC-DNA_2000000.txt"


Total recovered clonotypes:


[1] 717


Total out-of-frame clonotypes:


[1] 179


Total clonotypes with stop codon:


[1] 11


Total productive clonotypes:


[1] 527


Output contains all clonotypes
list of samples to track clones: 


[1] "CHP_345-04" "CHP_345-05" "CHP_345-02" "CHP_345-03"


Total number of recurring clonotypes: 


[1] 14


these are what we color: 


                                                                CASSPDGPRGTQYF 
                                                                     "#DC4ECB" 
                                                                  CASTYNDNEQFF 
                                                                     "#D3D9A2" 
                                                               CAISESGGSHNEQFF 
                                                                     "#88B9DB" 
                                                               CASSPDRGGRDGYTF 
                                                                     "#DC6380" 
                                                               CASSQGLAGSYEQYF 
                                                                     "#82E2A1" 
                                                             CASSHISDLPNPYEQYF 
                                                                     "#AF70CC" 
                                        

my files:


[1] "Nextseq/batch4/ds_batch4/CLONES_TRBCHP_347-01-PBMC-DNA_2000000.txt"  
[2] "Nextseq/batch4/ds_batch4/CLONES_TRBCHP_347-02-PBMC-DNA_2000000.txt"  
[3] "Nextseq/batch6a/ds_batch6a/CLONES_TRBCHP_347-04-PBMC-DNA_2000000.txt"
[4] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_347-03-PBMC-DNA_2000000.txt"  
[5] "Nextseq/batch8/ds_batch8/CLONES_TRBCHP_347-05-PBMC-DNA_2000000.txt"  


Total recovered clonotypes:


[1] 863


Total out-of-frame clonotypes:


[1] 155


Total clonotypes with stop codon:


[1] 13


Total productive clonotypes:


[1] 695


Output contains all clonotypes
list of samples to track clones: 


[1] "CHP_347-01" "CHP_347-02" "CHP_347-04" "CHP_347-03" "CHP_347-05"


Total number of recurring clonotypes: 


[1] 2


these are what we color: 


CASNFAGGMSTGELFF    CSARTGGVYEQYF 
       "#B7D6A0"        "#BF85C5" 


my files:


[1] "Nextseq/batch1/ds_batch1/CLONES_TRBCHP_348-03-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch1/ds_batch1/CLONES_TRBCHP_348-04-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch2/ds_batch2/CLONES_TRBCHP_348-01-PBMC-DNA_2000000.txt"
[4] "Nextseq/batch2/ds_batch2/CLONES_TRBCHP_348-02-PBMC-DNA_2000000.txt"


Total recovered clonotypes:


[1] 313


Total out-of-frame clonotypes:


[1] 56


Total clonotypes with stop codon:


[1] 7


Total productive clonotypes:


[1] 250


Output contains all clonotypes
list of samples to track clones: 


[1] "CHP_348-03" "CHP_348-04" "CHP_348-01" "CHP_348-02"


Total number of recurring clonotypes: 


[1] 1


these are what we color: 


CASRPIAGGGADTQYF 
       "#BEB5B1" 


my files:


[1] "Nextseq/batch4/ds_batch4/CLONES_TRBCHP_357-01-PBMC-DNA_2000000.txt"  
[2] "Nextseq/batch6a/ds_batch6a/CLONES_TRBCHP_357-03-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_357-04-PBMC-DNA_2000000.txt"  
[4] "Nextseq/batch8/ds_batch8/CLONES_TRBCHP_357-02-PBMC-DNA_2000000.txt"  


Total recovered clonotypes:


[1] 1042


Total out-of-frame clonotypes:


[1] 270


Total clonotypes with stop codon:


[1] 31


Total productive clonotypes:


[1] 741


Output contains all clonotypes
list of samples to track clones: 


[1] "CHP_357-01" "CHP_357-03" "CHP_357-04" "CHP_357-02"


Total number of recurring clonotypes: 


[1] 3


these are what we color: 


                  CASRLGWIV_TGGHEQYF                      CASRKLDREGNTIYF 
                           "#B7C4C8"                            "#B7DA7C" 
CASSESTVLHSCLLSMPS_CDFHLDPCHCVNTEAFF 
                           "#BF6CC4" 


my files:


[1] "Nextseq/batch6a/ds_batch6a/CLONES_TRBCHP_359-02-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch6a/ds_batch6a/CLONES_TRBCHP_359-04-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_359-01-PBMC-DNA_2000000.txt"  


Total recovered clonotypes:


[1] 828


Total out-of-frame clonotypes:


[1] 143


Total clonotypes with stop codon:


[1] 12


Total productive clonotypes:


[1] 673


Output contains all clonotypes
list of samples to track clones: 


[1] "CHP_359-02" "CHP_359-04" "CHP_359-01"


Total number of recurring clonotypes: 


[1] 1


these are what we color: 


CASSEAGGNQPQHF 
     "#BEB5B1" 


my files:


[1] "Nextseq/batch3/ds_batch3/CLONES_TRBCHP_360-03-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_360-02-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch8/ds_batch8/CLONES_TRBCHP_360-05-PBMC-DNA_2000000.txt"


Total recovered clonotypes:


[1] 795


Total out-of-frame clonotypes:


[1] 154


Total clonotypes with stop codon:


[1] 14


Total productive clonotypes:


[1] 627


Output contains all clonotypes
list of samples to track clones: 


[1] "CHP_360-03" "CHP_360-02" "CHP_360-05"


Total number of recurring clonotypes: 


[1] 4


these are what we color: 


    CASSPLYEQYF CASSLGQGIVAEAFF   CAISESVHNEQFF CASSETSGSTDTQYF 
      "#A7D2D2"       "#D891A2"       "#B7DB77"       "#B066D5" 


my files:


[1] "Nextseq/batch3/ds_batch3/CLONES_TRBCHP_361-02-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch3/ds_batch3/CLONES_TRBCHP_361-03-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch4/ds_batch4/CLONES_TRBCHP_361-01-PBMC-DNA_2000000.txt"
[4] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_361-04-PBMC-DNA_2000000.txt"


Total recovered clonotypes:


[1] 333


Total out-of-frame clonotypes:


[1] 74


Total clonotypes with stop codon:


[1] 5


Total productive clonotypes:


[1] 254


Output contains all clonotypes
list of samples to track clones: 


[1] "CHP_361-02" "CHP_361-03" "CHP_361-01" "CHP_361-04"


Total number of recurring clonotypes: 


[1] 0


these are what we color: 


named character(0)


my files:


[1] "Nextseq/batch4/ds_batch4/CLONES_TRBCHP_364-01-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch4/ds_batch4/CLONES_TRBCHP_364-02-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch4/ds_batch4/CLONES_TRBCHP_364-04-PBMC-DNA_2000000.txt"


Total recovered clonotypes:


[1] 360


Total out-of-frame clonotypes:


[1] 72


Total clonotypes with stop codon:


[1] 8


Total productive clonotypes:


[1] 280


Output contains all clonotypes
list of samples to track clones: 


[1] "CHP_364-01" "CHP_364-02" "CHP_364-04"


Total number of recurring clonotypes: 


[1] 2


these are what we color: 


CAISGPGTSGRAQDTQYF     CASSGLAGADTQYF 
         "#B7D6A0"          "#BF85C5" 


my files:


[1] "Nextseq/batch3/ds_batch3/CLONES_TRBCHP_368-05-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_368-01-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_368-04-PBMC-DNA_2000000.txt"


Total recovered clonotypes:


[1] 924


Total out-of-frame clonotypes:


[1] 238


Total clonotypes with stop codon:


[1] 20


Total productive clonotypes:


[1] 666


Output contains all clonotypes
list of samples to track clones: 


[1] "CHP_368-05" "CHP_368-01" "CHP_368-04"


Total number of recurring clonotypes: 


[1] 15


these are what we color: 


                                              CASSQESSPIGTEAFF 
                                                     "#D656C0" 
                                               CASSGSGTVWETQYF 
                                                     "#9447E3" 
                                         CASSQDLGETLLFMNTGELFF 
                                                     "#8BAEDC" 
                                               CASSAGGTANTEAFF 
                                                     "#9681D8" 
                                              CASSKGAC_TNEKLFF 
                                                     "#DADD84" 
                                                CASTLPN_GKKLFF 
                                                     "#72E588" 
                                                 CASSQSYYLEQFF 
                                                     "#6BE4C7" 
     CASSGSGTVWETQYFGPGTRLLVLGERG_AGARARAAWVWFLRGVPGLCSGANVLTF 
                                        

my files:


[1] "Nextseq/batch3/ds_batch3/CLONES_TRBCHP_370-02-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_370-01-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_370-05-PBMC-DNA_2000000.txt"
[4] "Nextseq/batch8/ds_batch8/CLONES_TRBCHP_370-03-PBMC-DNA_2000000.txt"


Total recovered clonotypes:


[1] 1123


Total out-of-frame clonotypes:


[1] 194


Total clonotypes with stop codon:


[1] 25


Total productive clonotypes:


[1] 904


Output contains all clonotypes
list of samples to track clones: 


[1] "CHP_370-02" "CHP_370-01" "CHP_370-05" "CHP_370-03"


Total number of recurring clonotypes: 


[1] 4


these are what we color: 


CASSIGGTLWAGGTEAFF    CASSLGTLPNQPQHF   CASRSGGYSYNSPLHF     CASSDSSGANVLTF 
         "#A7D2D2"          "#B7DB77"          "#B066D6"          "#D890A2" 


my files:


[1] "Nextseq/batch3/ds_batch3/CLONES_TRBCHP_373-01-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_373-02-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch8/ds_batch8/CLONES_TRBCHP_373-03-PBMC-DNA_2000000.txt"


Total recovered clonotypes:


[1] 452


Total out-of-frame clonotypes:


[1] 102


Total clonotypes with stop codon:


[1] 9


Total productive clonotypes:


[1] 341


Output contains all clonotypes
list of samples to track clones: 


[1] "CHP_373-01" "CHP_373-02" "CHP_373-03"


Total number of recurring clonotypes: 


[1] 0


these are what we color: 


named character(0)


my files:


[1] "Nextseq/batch3/ds_batch3/CLONES_TRBCHP_374-03-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch3/ds_batch3/CLONES_TRBCHP_374-04-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch3/ds_batch3/CLONES_TRBCHP_374-05-PBMC-DNA_2000000.txt"


Total recovered clonotypes:


[1] 265


Total out-of-frame clonotypes:


[1] 66


Total clonotypes with stop codon:


[1] 2


Total productive clonotypes:


[1] 197


Output contains all clonotypes
list of samples to track clones: 


[1] "CHP_374-03" "CHP_374-04" "CHP_374-05"


Total number of recurring clonotypes: 


[1] 0


these are what we color: 


named character(0)


my files:


[1] "Nextseq/batch3/ds_batch3/CLONES_TRBCHP_378-01-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_378-02-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch8/ds_batch8/CLONES_TRBCHP_378-04-PBMC-DNA_2000000.txt"


Total recovered clonotypes:


[1] 1161


Total out-of-frame clonotypes:


[1] 244


Total clonotypes with stop codon:


[1] 20


Total productive clonotypes:


[1] 897


Output contains all clonotypes
list of samples to track clones: 


[1] "CHP_378-01" "CHP_378-02" "CHP_378-04"


Total number of recurring clonotypes: 


[1] 13


these are what we color: 


             CASSQTLA_GNQETQYF                 CASSHSTVNTEAFF 
                     "#7ADADA"                      "#87A9D5" 
                   CAWSPNTEAFF             CASS*PLTG_QNDSPLHF 
                     "#8C4BE4"                      "#7CE24E" 
              CASSGDRQVYPNEQFF                CASSGIELAKNIQYF 
                     "#DA9D45"                      "#DCAFBD" 
                CASSHDWDTNTQYF               CASSQGAGFVNTEAFF 
                     "#D856C6"                      "#D1DED5" 
                   CAWSPATEAFF CASSLDTV*QRLGGF_SGPGRLCSGNTIYF 
                     "#D2E161"                      "#AE88D2" 
                CASSPGDLNTEAFF               CASSGLPI_PDYEQYF 
                     "#D3D499"                      "#86E3A6" 
               CASSDLGGARETQYF 
                     "#DB6578" 


my files:


[1] "Nextseq/batch3/ds_batch3/CLONES_TRBCHP_380-02-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch8/ds_batch8/CLONES_TRBCHP_380-03-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch8/ds_batch8/CLONES_TRBCHP_380-05-PBMC-DNA_2000000.txt"


Total recovered clonotypes:


[1] 196


Total out-of-frame clonotypes:


[1] 36


Total clonotypes with stop codon:


[1] 1


Total productive clonotypes:


[1] 159


Output contains all clonotypes
list of samples to track clones: 


[1] "CHP_380-02" "CHP_380-03" "CHP_380-05"


Total number of recurring clonotypes: 


[1] 10


these are what we color: 


     CASRSGSTEAFF CASSFKRGFSGANVLTF CASSEPPGANSYNEQFF   CASSPRSGDQETQYF 
        "#AD51DD"         "#D9D974"         "#D4DABF"         "#8ACEDE" 
      CASGFGGELFF CASSDDLTGGYTDTQYF   CASSHQGNTYNEQFF       CAKDG_REGHF 
        "#92E352"         "#9B92D7"         "#D8B1C3"         "#82E2AF" 
 CSARDLIR_QSYEQYF     CASSFSLPYEQYF 
        "#DB629C"         "#D6894F" 


my files:


[1] "Nextseq/batch_reseq/ds_batch_reseq/CLONES_TRBCHP_353-02-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_353-01-PBMC-DNA_2000000.txt"          
[3] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_353-03-PBMC-DNA_2000000.txt"          
[4] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_353-05-PBMC-DNA_2000000.txt"          


Total recovered clonotypes:


[1] 864


Total out-of-frame clonotypes:


[1] 148


Total clonotypes with stop codon:


[1] 15


Total productive clonotypes:


[1] 701


Output contains all clonotypes
list of samples to track clones: 


[1] "CHP_353-02" "CHP_353-01" "CHP_353-03" "CHP_353-05"


Total number of recurring clonotypes: 


[1] 0


these are what we color: 


named character(0)


my files:


[1] "Nextseq/batch_reseq/ds_batch_reseq/CLONES_TRBCHP_358-02-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch1/ds_batch1/CLONES_TRBCHP_358-01-PBMC-DNA_2000000.txt"          
[3] "Nextseq/batch3/ds_batch3/CLONES_TRBCHP_358-03-PBMC-DNA_2000000.txt"          
[4] "Nextseq/batch3/ds_batch3/CLONES_TRBCHP_358-05-PBMC-DNA_2000000.txt"          


Total recovered clonotypes:


[1] 1501


Total out-of-frame clonotypes:


[1] 288


Total clonotypes with stop codon:


[1] 23


Total productive clonotypes:


[1] 1190


Output contains all clonotypes
list of samples to track clones: 


[1] "CHP_358-02" "CHP_358-01" "CHP_358-03" "CHP_358-05"


Total number of recurring clonotypes: 


[1] 1


these are what we color: 


CASSHSSYNSPLHF 
     "#BEB5B1" 


my files:


[1] "Nextseq/batch2/ds_batch2/CLONES_TRBCHP_343-02-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch2/ds_batch2/CLONES_TRBCHP_343-03-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch2/ds_batch2/CLONES_TRBCHP_343-04-PBMC-DNA_2000000.txt"
[4] "Nextseq/batch4/ds_batch4/CLONES_TRBCHP_343-05-PBMC-DNA_2000000.txt"


Total recovered clonotypes:


[1] 119


Total out-of-frame clonotypes:


[1] 21


Total clonotypes with stop codon:


[1] 1


Total productive clonotypes:


[1] 97


Output contains all clonotypes
list of samples to track clones: 


[1] "CHP_343-02" "CHP_343-03" "CHP_343-04" "CHP_343-05"


Total number of recurring clonotypes: 


[1] 0


these are what we color: 


named character(0)


my files:


[1] "Nextseq/batch_reseq/ds_batch_reseq/CLONES_TRBCHP_362-02-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_362-01-PBMC-DNA_2000000.txt"          
[3] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_362-03-PBMC-DNA_2000000.txt"          
[4] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_362-04-PBMC-DNA_2000000.txt"          


Total recovered clonotypes:


[1] 691


Total out-of-frame clonotypes:


[1] 151


Total clonotypes with stop codon:


[1] 13


Total productive clonotypes:


[1] 527


Output contains all clonotypes
list of samples to track clones: 


[1] "CHP_362-02" "CHP_362-01" "CHP_362-03" "CHP_362-04"


Total number of recurring clonotypes: 


[1] 3


these are what we color: 


 CASSLGLA_SSYNEQFF CASRQYRLGRGPNQPQHF    CSARDGQAGDTEAFF 
         "#BF6CC4"          "#B6C5C8"          "#B8D979" 


# This version tracks all recovered clonotypes regardless of outofframe stop codon or productive

## PLTK version

In [14]:
clontrack.fx <- function(datapath, plotpath, chain, filelist, countfrac, clnefrc){
  
  if (!(countfrac %in% c("cloneFraction", "cloneCount"))) {
    stop("Error: unknown argument ", countfrac, ". Please provide either cloneFraction or cloneCount.")
  }  
  if (!(chain %in% c("TRA", "TRB", "TRD", "TRG"))) {
    stop("Error: unknown argument ", chain, ". Please provide one of the following: TRA, TRB, TRD, TRG.")
  }   
  
  message("list of files to track clones: ")
  print(filelist)
  
  #Compile a big file with patient's mixcr files loaded in
  i <- 1
  for (f in filelist){
    mixcrfle <- read.table(paste(datapath, f, sep = ""), 
                           header = TRUE, sep = "\t",
                           stringsAsFactors = FALSE,
                           na.strings = c("", "NA"))
    if(i == 1){
      compldfle <- mixcrfle[!duplicated(mixcrfle$aaSeqCDR3),]
      compldfle <- cbind(cloneno = row.names(compldfle), 
                         filename = f, 
                         compldfle)
      i <- i + 1   
    }
    else{
      compldfle1 <- mixcrfle[!duplicated(mixcrfle$aaSeqCDR3),]
      compldfle1 <- cbind(cloneno = row.names(compldfle1), 
                          filename = f, 
                          compldfle1)
      compldfle <- rbind(compldfle, compldfle1)
      rm(compldfle1)
    }
  }
  
  #Clean the sample name. It should be in this format: CHP_XXX-0X
  compldfle$samplename <- gsub(paste(".*",chain, sep = ""), "", compldfle$filename)
  compldfle$samplename <- gsub("-PBMC-DNA_2000000.txt", "", compldfle$samplename)  

    
  # Subset df
  CDR3_fraction <- compldfle[, c("samplename","aaSeqCDR3","cloneFraction", "cloneCount")]
  # Subset to include only clonotypes with more than specified clonal fraction    
  CDR3_fraction <- CDR3_fraction[CDR3_fraction$cloneFraction > clnefrc,] 
  ## append the empty clonotypes after here.   
  
  
  # Number of samples
  mysamples <- unique(CDR3_fraction$samplename)
  
  #Assign colors to recurring clonotypes
  recurring <- unique(CDR3_fraction$aaSeqCDR3[duplicated(CDR3_fraction$aaSeqCDR3)])
  notrecurring <- CDR3_fraction$aaSeqCDR3[!CDR3_fraction$aaSeqCDR3 %in% recurring]
  
  message("Total number of recurring clonotypes: ")     
  print(length(recurring))
  
  if(length(recurring) > 50){
    recurring_df <- CDR3_fraction[CDR3_fraction$aaSeqCDR3 %in% recurring,]
    recurringcdr3_ordered <- unique(recurring_df$aaSeqCDR3[order(recurring_df$cloneCount, decreasing = TRUE)])
    message("Total number of recurring clonotypes > 50 ")   
    message("Tracking top 10 recurring clonotypes ")  
    myColors <- distinctColorPalette(10)
    
    myColors <- c(myColors, rep("white",length(recurring)-10),
                  rep("white",length(notrecurring)))
    names(myColors) <- c(recurringcdr3_ordered, notrecurring)
    
    message("these are what we color: ")  
    print(myColors[myColors != "white"])         
  }
  else{
    myColors <- distinctColorPalette(length(recurring))
    myColors <- c(myColors, rep("white",length(notrecurring)))
    names(myColors) <- c(recurring, notrecurring)
    
    message("these are what we color: ")  
    print(myColors[myColors != "white"]) 
  }
  
  
  # Generate a row for each sample that doesnot have recurring clonotype
  ## This ensures alluvia are colored
  
  for(c in recurring){
    tmp <- CDR3_fraction[CDR3_fraction$aaSeqCDR3 == c,]
    nonexsiting <- mysamples[!mysamples %in% tmp$samplename]
    if(length(nonexsiting) > 0){
      newentries <- data.frame("samplename" = nonexsiting, "aaSeqCDR3" = c, 
                               "cloneFraction" = 0, "cloneCount" = 0)
      CDR3_fraction <- rbind(CDR3_fraction, newentries)
    }
  }
  
  p <-  ggplot(CDR3_fraction, aes(x = samplename, 
                                  y = eval(as.name(countfrac)),
                                  fill = aaSeqCDR3,
                                  stratum = aaSeqCDR3,
                                  alluvium = aaSeqCDR3,
                                  label = aaSeqCDR3))
  
  myp <- p + geom_alluvium(decreasing = FALSE) + 
    geom_stratum(decreasing = FALSE, stat = "alluvium") + 
    scale_fill_manual(breaks = names(myColors[myColors != "white"]),
                      values = myColors) +
    theme(axis.title.y = element_text(size = 50),
          axis.title.x = element_blank(),
          axis.line = element_line(color = "black"),
          axis.text = element_text(size = 50),
          axis.text.x = element_text(angle = 45, hjust = 1)) +
    theme(panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          panel.background = element_rect(fill = "transparent",colour = NA),
          legend.key = element_rect(fill = "white", colour = "white"),
          legend.position = "bottom",
          plot.margin = unit(c(0.2,0,0,0),"cm")) + 
    labs(y = countfrac) 
  
  pdf(paste(plotpath, "clonetracking_",compldfle$samplename, 
            chain, countfrac, ".pdf", sep = ""),
      width = 15, 
      height = 20,
      useDingbats = FALSE,
      onefile = FALSE)       
  print(myp)  
  dev.off()      
  
}

In [15]:
datapath <- "/Users/anabbi/OneDrive - UHN/Documents/INTERCEPT/Data/"
plotpath <- "/Users/anabbi/OneDrive - UHN/Documents/INTERCEPT/Plots/"

In [16]:
patient_ids <- c(323, 338, 340, 343, 344, 345, 347, 
                 348, 353, 357, 358, 359, 360, 361, 362,
                 364, 368, 370, 374, 378, 380)

In [17]:
flelst <- list.files(datapath,
                     recursive = TRUE,
                     pattern = paste("CLONES", "TRB", sep = "_"))

# subset to include only downsampled files
ds_flelst <- flelst[grep("2000000", flelst)]

# subset to include only CHP
ds_flelst_chp <- ds_flelst[grep("CHP", ds_flelst)]

In [18]:
for(pt in as.character(patient_ids)){
   clontrack.fx(datapath, plotpath, 
                "TRB", 
                ds_flelst_chp[grepl(pt,ds_flelst_chp)], 
                "cloneCount", 0) 
}

list of files to track clones: 


[1] "Nextseq/batch4/ds_batch4/CLONES_TRBCHP_323-03-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_323-04-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_323-05-PBMC-DNA_2000000.txt"


Total number of recurring clonotypes: 


[1] 5


these are what we color: 


         CAASRGC_AKNIQYF    YAFNVIFTLT_CHCVNTEAFF         CARVPRAV_NTGELFF 
               "#A5D9C0"                "#BB63D3"                "#B7E166" 
CAQQRLGGFEVA_GRLCSGNTIYF                 CANYGYTF 
               "#DC9178"                "#B3ABD6" 


list of files to track clones: 


[1] "Nextseq/batch2/ds_batch2/CLONES_TRBCHP_338-04-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch2/ds_batch2/CLONES_TRBCHP_338-05-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch4/ds_batch4/CLONES_TRBCHP_338-03-PBMC-DNA_2000000.txt"


Total number of recurring clonotypes: 


[1] 9


these are what we color: 


                                CAASRGC_AKNIQYF 
                                      "#8EB5D4" 
                          YAFNVIFTLT_CHCVNTEAFF 
                                      "#8383D9" 
                       CAQQRLGGFEVA_GRLCSGNTIYF 
                                      "#8BDEBE" 
CVIPIKHRRRFLSSLSSILARAL_PSLCLQCDFHLDPCHCVNTEAFF 
                                      "#DBCDC4" 
                               CARVPRAV_NTGELFF 
                                      "#B549DD" 
     GPGTRLLVLGERGLLGRGRGR_WVWFLRGVPGLCSGANVLTF 
                                      "#D7776C" 
                                    CASSFNTEAFF 
                                      "#DC83CA" 
                                   CSCAPI_SPLHF 
                                      "#D6CE70" 
            CAGRPGLGPQGGQELGEP*RFACGIAPPCSYEQYF 
                                      "#96EA5E" 


list of files to track clones: 


[1] "Nextseq/batch3/ds_batch3/CLONES_TRBCHP_340-05-PBMC-DNA_2000000.txt"  
[2] "Nextseq/batch4/ds_batch4/CLONES_TRBCHP_340-03-PBMC-DNA_2000000.txt"  
[3] "Nextseq/batch6a/ds_batch6a/CLONES_TRBCHP_340-04-PBMC-DNA_2000000.txt"


Total number of recurring clonotypes: 


[1] 6


these are what we color: 


                           CAASRGC_AKNIQYF 
                                 "#A7E862" 
                          CARVPRAV_NTGELFF 
                                 "#DD7B8B" 
                     YAFNVIFTLT_CHCVNTEAFF 
                                 "#D8C37F" 
GPGTRLLVLGERGLLGRGRGR_WVWFLRGVPGLCSGANVLTF 
                                 "#AFAED8" 
                                     TP_HF 
                                 "#9DDBC5" 
                  CAQQRLGGFEVA_GRLCSGNTIYF 
                                 "#B762D7" 


list of files to track clones: 


[1] "Nextseq/batch2/ds_batch2/CLONES_TRBCHP_343-02-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch2/ds_batch2/CLONES_TRBCHP_343-03-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch2/ds_batch2/CLONES_TRBCHP_343-04-PBMC-DNA_2000000.txt"
[4] "Nextseq/batch4/ds_batch4/CLONES_TRBCHP_343-05-PBMC-DNA_2000000.txt"


Total number of recurring clonotypes: 


[1] 6


these are what we color: 


                     YAFNVIFTLT_CHCVNTEAFF 
                                 "#94E964" 
                           CAASRGC_AKNIQYF 
                                 "#B862D6" 
                          CARVPRAV_NTGELFF 
                                 "#A2D9C5" 
                  CAQQRLGGFEVA_GRLCSGNTIYF 
                                 "#AFADD9" 
GPGTRLLVLGERGLLGRGRGR_WVWFLRGVPGLCSGANVLTF 
                                 "#DC8287" 
                            CASYRGPNTGELFF 
                                 "#D8CC75" 


list of files to track clones: 


[1] "Nextseq/batch4/ds_batch4/CLONES_TRBCHP_344-05-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_344-02-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_344-03-PBMC-DNA_2000000.txt"
[4] "Nextseq/batch8/ds_batch8/CLONES_TRBCHP_344-04-PBMC-DNA_2000000.txt"


Total number of recurring clonotypes: 


[1] 4


these are what we color: 


   YAFNVIFTLT_CHCVNTEAFF          CAASRGC_AKNIQYF CAQQRLGGFEVA_GRLCSGNTIYF 
               "#B7DD72"                "#DA9DA0"                "#9FD2CE" 
        CARVPRAV_NTGELFF 
               "#B470D6" 


list of files to track clones: 


[1] "Nextseq/batch3/ds_batch3/CLONES_TRBCHP_345-04-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch3/ds_batch3/CLONES_TRBCHP_345-05-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch4/ds_batch4/CLONES_TRBCHP_345-02-PBMC-DNA_2000000.txt"
[4] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_345-03-PBMC-DNA_2000000.txt"


Total number of recurring clonotypes: 


[1] 22


these are what we color: 


                                CAASRGC_AKNIQYF 
                                      "#7AE6D2" 
                               CARVPRAV_NTGELFF 
                                      "#E1B191" 
                          YAFNVIFTLT_CHCVNTEAFF 
                                      "#D0E682" 
     GPGTRLLVLGERGLLGRGRGR_WVWFLRGVPGLCSGANVLTF 
                                      "#D1AD4F" 
                       CAQQRLGGFEVA_GRLCSGNTIYF 
                                      "#6C9572" 
                                 CASSPDGPRGTQYF 
                                      "#A6539D" 
               CVSSFWKEGPTPAPLG_CHTHDALCSNQPQHF 
                                      "#E656CE" 
CVIPIKHRRRFLSSLSSILARAL_PSLCLQCDFHLDPCHCVNTEAFF 
                                      "#73E498" 
                                      CATNEKLFF 
                                      "#6B7FD8" 
                                   CASTYNDNEQFF 
                                      "#AB36E8" 
                    

list of files to track clones: 


[1] "Nextseq/batch4/ds_batch4/CLONES_TRBCHP_347-01-PBMC-DNA_2000000.txt"  
[2] "Nextseq/batch4/ds_batch4/CLONES_TRBCHP_347-02-PBMC-DNA_2000000.txt"  
[3] "Nextseq/batch6a/ds_batch6a/CLONES_TRBCHP_347-04-PBMC-DNA_2000000.txt"
[4] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_347-03-PBMC-DNA_2000000.txt"  
[5] "Nextseq/batch8/ds_batch8/CLONES_TRBCHP_347-05-PBMC-DNA_2000000.txt"  


Total number of recurring clonotypes: 


[1] 13


these are what we color: 


                                CAASRGC_AKNIQYF 
                                      "#7FD4D4" 
                               CARVPRAV_NTGELFF 
                                      "#D3C3D6" 
     GPGTRLLVLGERGLLGRGRGR_WVWFLRGVPGLCSGANVLTF 
                                      "#D77796" 
                          YAFNVIFTLT_CHCVNTEAFF 
                                      "#7293D4" 
                       CAQQRLGGFEVA_GRLCSGNTIYF 
                                      "#814EE1" 
                               CASNFAGGMSTGELFF 
                                      "#DBDB5B" 
                                  CSARVRGDYGYTF 
                                      "#D1E2B5" 
                                 CAGLGGRDQETQYF 
                                      "#EC6D46" 
                                  CSARTGGVYEQYF 
                                      "#89EA54" 
CVIPIKHRRRFLSSLSSILARAL_PSLCLQCDFHLDPCHCVNTEAFF 
                                      "#DC4FCF" 
                    

list of files to track clones: 


[1] "Nextseq/batch1/ds_batch1/CLONES_TRBCHP_348-03-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch1/ds_batch1/CLONES_TRBCHP_348-04-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch2/ds_batch2/CLONES_TRBCHP_348-01-PBMC-DNA_2000000.txt"
[4] "Nextseq/batch2/ds_batch2/CLONES_TRBCHP_348-02-PBMC-DNA_2000000.txt"


Total number of recurring clonotypes: 


[1] 6


these are what we color: 


   YAFNVIFTLT_CHCVNTEAFF          CAASRGC_AKNIQYF         CARVPRAV_NTGELFF 
               "#BB5FD6"                "#DD848C"                "#AAA8DA" 
CAQQRLGGFEVA_GRLCSGNTIYF         CASRPIAGGGADTQYF             CSCAPI_SPLHF 
               "#94E867"                "#A4D9C6"                "#D9CB75" 


list of files to track clones: 


[1] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_353-01-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_353-03-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_353-05-PBMC-DNA_2000000.txt"


Total number of recurring clonotypes: 


[1] 5


these are what we color: 


                           CAASRGC_AKNIQYF 
                                 "#B6E067" 
                     YAFNVIFTLT_CHCVNTEAFF 
                                 "#B2ACD8" 
                          CARVPRAV_NTGELFF 
                                 "#DC8E79" 
                  CAQQRLGGFEVA_GRLCSGNTIYF 
                                 "#A7D9C1" 
GPGTRLLVLGERGLLGRGRGR_WVWFLRGVPGLCSGANVLTF 
                                 "#BB63D4" 


list of files to track clones: 


[1] "Nextseq/batch4/ds_batch4/CLONES_TRBCHP_357-01-PBMC-DNA_2000000.txt"  
[2] "Nextseq/batch6a/ds_batch6a/CLONES_TRBCHP_357-03-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_357-04-PBMC-DNA_2000000.txt"  
[4] "Nextseq/batch8/ds_batch8/CLONES_TRBCHP_357-02-PBMC-DNA_2000000.txt"  


Total number of recurring clonotypes: 


[1] 11


these are what we color: 


                                CAASRGC_AKNIQYF 
                                      "#7EE0BE" 
                          YAFNVIFTLT_CHCVNTEAFF 
                                      "#AE49E1" 
                               CARVPRAV_NTGELFF 
                                      "#8AEA62" 
                       CAQQRLGGFEVA_GRLCSGNTIYF 
                                      "#D87C6A" 
     GPGTRLLVLGERGLLGRGRGR_WVWFLRGVPGLCSGANVLTF 
                                      "#D6DCCB" 
                                   CSCAPI_SPLHF 
                                      "#DA73C3" 
            CAGRPGLGPQGGQELGEP*RFACGGAPPCSYEQYF 
                                      "#7D83D7" 
                                      CATNEKLFF 
                                      "#D4B5D6" 
                             CASRLGWIV_TGGHEQYF 
                                      "#C6CD8A" 
                                CASRKLDREGNTIYF 
                                      "#78BBD1" 
CVIPIKHRRRFLSSLSSILA

list of files to track clones: 


[1] "Nextseq/batch1/ds_batch1/CLONES_TRBCHP_358-01-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch3/ds_batch3/CLONES_TRBCHP_358-03-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch3/ds_batch3/CLONES_TRBCHP_358-05-PBMC-DNA_2000000.txt"


Total number of recurring clonotypes: 


[1] 8


these are what we color: 


                                 CAASRGC_AKNIQYF 
                                       "#CED4A6" 
                                CARVPRAV_NTGELFF 
                                       "#BC4ED8" 
                        CAQQRLGGFEVA_GRLCSGNTIYF 
                                       "#B085D5" 
      GPGTRLLVLGERGLLGRGRGR_WVWFLRGVPGLCSGANVLTF 
                                       "#B7BAD7" 
                           YAFNVIFTLT_CHCVNTEAFF 
                                       "#DAD055" 
CPANSRQEKIGSGKRGRVCPLSVL_GGGEGEGILGSPFPLCSYNEQFF 
                                       "#DC7C7D" 
                                  CASSHSSYNSPLHF 
                                       "#89E96D" 
 CVIPIKHRRRFLSSLSSILARAL_PSLCLQCDFHLDPCHCVNTEAFF 
                                       "#7CD8CA" 


list of files to track clones: 


[1] "Nextseq/batch6a/ds_batch6a/CLONES_TRBCHP_359-02-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch6a/ds_batch6a/CLONES_TRBCHP_359-04-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_359-01-PBMC-DNA_2000000.txt"  


Total number of recurring clonotypes: 


[1] 7


these are what we color: 


                           CAASRGC_AKNIQYF 
                                 "#A691DA" 
                     YAFNVIFTLT_CHCVNTEAFF 
                                 "#A0E95C" 
                          CARVPRAV_NTGELFF 
                                 "#8DDEBE" 
                  CAQQRLGGFEVA_GRLCSGNTIYF 
                                 "#DC7D88" 
GPGTRLLVLGERGLLGRGRGR_WVWFLRGVPGLCSGANVLTF 
                                 "#BEC7D0" 
                           CAASRGC_SQKHSVH 
                                 "#D8C876" 
                            CASSEAGGNQPQHF 
                                 "#BD52D6" 


list of files to track clones: 


[1] "Nextseq/batch3/ds_batch3/CLONES_TRBCHP_360-03-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_360-02-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch8/ds_batch8/CLONES_TRBCHP_360-05-PBMC-DNA_2000000.txt"


Total number of recurring clonotypes: 


[1] 13


these are what we color: 


                           CAASRGC_AKNIQYF 
                                 "#7DD7D2" 
                          CARVPRAV_NTGELFF 
                                 "#DBDFC1" 
                     YAFNVIFTLT_CHCVNTEAFF 
                                 "#E16A73" 
                  CAQQRLGGFEVA_GRLCSGNTIYF 
                                 "#D1B8CD" 
                               CASSPLYEQYF 
                                 "#96DD9B" 
                           CASSLGQGIVAEAFF 
                                 "#DB53C9" 
                           CASSFIAGEADTQYF 
                                 "#7A6FD9" 
                CAKGGGEGEGILGSPFPLCSYNEQFF 
                                 "#DDDE5B" 
                             CAISESVHNEQFF 
                                 "#D48BD2" 
GPGTRLLVLGERGLLGRGRGR_WVWFLRGVPGLCSGANVLTF 
                                 "#7AA3D6" 
                           CASSETSGSTDTQYF 
                                 "#CCA167" 
                           CAASR

list of files to track clones: 


[1] "Nextseq/batch3/ds_batch3/CLONES_TRBCHP_361-02-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch3/ds_batch3/CLONES_TRBCHP_361-03-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch4/ds_batch4/CLONES_TRBCHP_361-01-PBMC-DNA_2000000.txt"
[4] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_361-04-PBMC-DNA_2000000.txt"


Total number of recurring clonotypes: 


[1] 11


these are what we color: 


                                CAASRGC_AKNIQYF 
                                      "#DE84C5" 
                          YAFNVIFTLT_CHCVNTEAFF 
                                      "#9DE19E" 
                       CAQQRLGGFEVA_GRLCSGNTIYF 
                                      "#8775DC" 
                               CARVPRAV_NTGELFF 
                                      "#98ADDE" 
     GPGTRLLVLGERGLLGRGRGR_WVWFLRGVPGLCSGANVLTF 
                                      "#DDCC68" 
CVIPIKHRRRFLSSLSSILARAL_PSLCLQCDFHLDPCHCVNTEAFF 
                                      "#7FDCD5" 
       GAGTRLSVLGKLGPPGDRGR_LRSGFCAGLGGRDQETQYF 
                                      "#C245DB" 
                                CAASRGC_SQKHSVH 
                                      "#DDD2C9" 
                     CAKGGGEGEGILGSPFPLCSYNEQFF 
                                      "#E1796C" 
               CVSSFWKEGPTPAPLG_CHTHDALCSNQPQHF 
                                      "#9BEB52" 
                    

list of files to track clones: 


[1] "Nextseq/batch4/ds_batch4/CLONES_TRBCHP_362-02-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_362-01-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_362-03-PBMC-DNA_2000000.txt"
[4] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_362-04-PBMC-DNA_2000000.txt"


Total number of recurring clonotypes: 


[1] 9


these are what we color: 


                           CAASRGC_AKNIQYF 
                                 "#7DD8CE" 
                     YAFNVIFTLT_CHCVNTEAFF 
                                 "#84E86B" 
                          CARVPRAV_NTGELFF 
                                 "#CFC0D1" 
                  CAQQRLGGFEVA_GRLCSGNTIYF 
                                 "#D879C8" 
GPGTRLLVLGERGLLGRGRGR_WVWFLRGVPGLCSGANVLTF 
                                 "#DCD558" 
                                 CATNEKLFF 
                                 "#C8D5A2" 
                        CASRQYRLGRGPNQPQHF 
                                 "#AD4CE0" 
                           CSARDGQAGDTEAFF 
                                 "#D97C6B" 
  GAGTRLSVLGKLGPPGDRGR_LRSGFCAGLGGRDQETQYF 
                                 "#7E90D7" 


list of files to track clones: 


[1] "Nextseq/batch4/ds_batch4/CLONES_TRBCHP_364-01-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch4/ds_batch4/CLONES_TRBCHP_364-02-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch4/ds_batch4/CLONES_TRBCHP_364-04-PBMC-DNA_2000000.txt"


Total number of recurring clonotypes: 


[1] 12


these are what we color: 


                                CAASRGC_AKNIQYF 
                                      "#76DEAE" 
                               CARVPRAV_NTGELFF 
                                      "#DCDA52" 
                          YAFNVIFTLT_CHCVNTEAFF 
                                      "#C6DA95" 
     GPGTRLLVLGERGLLGRGRGR_WVWFLRGVPGLCSGANVLTF 
                                      "#CF9D69" 
                       CAQQRLGGFEVA_GRLCSGNTIYF 
                                      "#87EA5B" 
                                   CSCAPI_SPLHF 
                                      "#D0B1CF" 
                                      CATNEKLFF 
                                      "#E16770" 
                                       CANYGYTF 
                                      "#79CCD7" 
                             CAISGPGTSGRAQDTQYF 
                                      "#D573CA" 
            CAGRPGLGPQGGQELGEP*RFACGGAPPCSYEQYF 
                                      "#A847E0" 
                    

list of files to track clones: 


[1] "Nextseq/batch3/ds_batch3/CLONES_TRBCHP_368-05-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_368-01-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_368-04-PBMC-DNA_2000000.txt"


Total number of recurring clonotypes: 


[1] 18


these are what we color: 


                           CAASRGC_AKNIQYF 
                                 "#E06569" 
                     YAFNVIFTLT_CHCVNTEAFF 
                                 "#CADD97" 
                          CASSQESSPIGTEAFF 
                                 "#7AED4D" 
                          CARVPRAV_NTGELFF 
                                 "#E5E64B" 
                           CASSGSGTVWETQYF 
                                 "#D886CB" 
                           CASSAGGTANTEAFF 
                                 "#D3E2CF" 
                     CASSQDLGETLLFMNTGELFF 
                                 "#806DDA" 
                  CAQQRLGGFEVA_GRLCSGNTIYF 
                                 "#8697DA" 
                          CASSKGAC_TNEKLFF 
                                 "#DFAE8B" 
GPGTRLLVLGERGLLGRGRGR_WVWFLRGVPGLCSGANVLTF 
                                 "#78E1D3" 
                            CASTLPN_GKKLFF 
                                 "#AFE362" 
                             CAS

list of files to track clones: 


[1] "Nextseq/batch3/ds_batch3/CLONES_TRBCHP_370-02-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_370-01-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_370-05-PBMC-DNA_2000000.txt"
[4] "Nextseq/batch8/ds_batch8/CLONES_TRBCHP_370-03-PBMC-DNA_2000000.txt"


Total number of recurring clonotypes: 


[1] 10


these are what we color: 


                           CAASRGC_AKNIQYF 
                                 "#A748E0" 
                     YAFNVIFTLT_CHCVNTEAFF 
                                 "#7BD7CF" 
                          CARVPRAV_NTGELFF 
                                 "#D49E66" 
                  CAQQRLGGFEVA_GRLCSGNTIYF 
                                 "#D6DE5A" 
GPGTRLLVLGERGLLGRGRGR_WVWFLRGVPGLCSGANVLTF 
                                 "#C6DAA9" 
                        CASSIGGTLWAGGTEAFF 
                                 "#D1C0D2" 
                           CASSLGTLPNQPQHF 
                                 "#D376CE" 
                          CASRSGGYSYNSPLHF 
                                 "#DF6D7D" 
                            CASSDSSGANVLTF 
                                 "#7AE86F" 
         CATSPSTALKCQFLLA_TNLPQTQLRKQVMY*V 
                                 "#7D91D7" 


list of files to track clones: 


[1] "Nextseq/batch3/ds_batch3/CLONES_TRBCHP_374-03-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch3/ds_batch3/CLONES_TRBCHP_374-04-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch3/ds_batch3/CLONES_TRBCHP_374-05-PBMC-DNA_2000000.txt"


Total number of recurring clonotypes: 


[1] 10


these are what we color: 


                                CAASRGC_AKNIQYF 
                                      "#838BD9" 
                               CARVPRAV_NTGELFF 
                                      "#80E1C1" 
                          YAFNVIFTLT_CHCVNTEAFF 
                                      "#DBD358" 
     GPGTRLLVLGERGLLGRGRGR_WVWFLRGVPGLCSGANVLTF 
                                      "#8AEA64" 
                       CAQQRLGGFEVA_GRLCSGNTIYF 
                                      "#D7BED2" 
   CTRLTVVGKTFFRFFCRSVTGK_WVHSVPFRVAIFLCANYGYTF 
                                      "#D87C6A" 
                CALCLC*GRWGRRRN_LGSPFPLCSYNEQFF 
                                      "#80C0D2" 
CVIPIKHRRRFLSSLSSILARAL_PSLCLQCDFHLDPCHCVNTEAFF 
                                      "#AE4BDF" 
               CVSSFWKEGPTPAPLG_CHTHDALCSNQPQHF 
                                      "#CDD4A4" 
       GAGTRLSVLGKLGPPGDRGR_LRSGFCAGLGGRDQETQYF 
                                      "#D976C5" 


list of files to track clones: 


[1] "Nextseq/batch3/ds_batch3/CLONES_TRBCHP_378-01-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch7/ds_batch7/CLONES_TRBCHP_378-02-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch8/ds_batch8/CLONES_TRBCHP_378-04-PBMC-DNA_2000000.txt"


Total number of recurring clonotypes: 


[1] 17


these are what we color: 


           YAFNVIFTLT_CHCVNTEAFF                  CAASRGC_AKNIQYF 
                       "#75E599"                        "#7B7B85" 
               CASSQTLA_GNQETQYF                   CASSHSTVNTEAFF 
                       "#7AE0D0"                        "#DCBBDC" 
        CAQQRLGGFEVA_GRLCSGNTIYF               CASS*PLTG_QNDSPLHF 
                       "#E1AE8C"                        "#D86692" 
                     CAWSPNTEAFF                 CASSGDRQVYPNEQFF 
                       "#D0E6A3"                        "#7F5AD8" 
                CARVPRAV_NTGELFF                  CASSGIELAKNIQYF 
                       "#7E8FDC"                        "#EB7C48" 
                  CASSHDWDTNTQYF                 CASSQGAGFVNTEAFF 
                       "#8BEB52"                        "#7DC3E0" 
CVSSFWKEGPTPAPLG_CHTHDALCSNQPQHF                      CAWSPATEAFF 
                       "#8E9D5C"                        "#D67FD8" 
                  CASSPGDLNTEAFF                 CASSGLPI_PDYE

list of files to track clones: 


[1] "Nextseq/batch3/ds_batch3/CLONES_TRBCHP_380-02-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch8/ds_batch8/CLONES_TRBCHP_380-03-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch8/ds_batch8/CLONES_TRBCHP_380-05-PBMC-DNA_2000000.txt"


Total number of recurring clonotypes: 


[1] 16


these are what we color: 


                           CAASRGC_AKNIQYF 
                                 "#73E3A1" 
                     YAFNVIFTLT_CHCVNTEAFF 
                                 "#E0AE8D" 
                          CARVPRAV_NTGELFF 
                                 "#D9E658" 
                              CASRSGSTEAFF 
                                 "#B63FE3" 
                         CASSFKRGFSGANVLTF 
                                 "#D4D6D5" 
                         CASSEPPGANSYNEQFF 
                                 "#CCE1A4" 
                  CAQQRLGGFEVA_GRLCSGNTIYF 
                                 "#71A5D8" 
                           CASSPRSGDQETQYF 
                                 "#82E952" 
                               CASGFGGELFF 
                                 "#7C73DB" 
GPGTRLLVLGERGLLGRGRGR_WVWFLRGVPGLCSGANVLTF 
                                 "#D66FCA" 
          CVSSFWKEGPTPAPLG_CHTHDALCSNQPQHF 
                                 "#D2B04E" 
                         CASSDDL

In [25]:
clontrack.fx(datapath, plotpath, 
             "TRB", ds_flelst_chp[grepl("358",ds_flelst_chp)], 
             "cloneCount", 0)

list of files to track clones: 


[1] "batch1/ds_batch1/CLONES_TRBCHP_358-01-PBMC-DNA_2000000.txt"
[2] "batch3/ds_batch3/CLONES_TRBCHP_358-03-PBMC-DNA_2000000.txt"
[3] "batch3/ds_batch3/CLONES_TRBCHP_358-05-PBMC-DNA_2000000.txt"


my sample names:


[1] "CHP_358-01" "CHP_358-03" "CHP_358-05"


Total number of recurring clonotypes: 


[1] 8


these are what we color: 


                                 CAASRGC_AKNIQYF 
                                       "#86E181" 
                                CARVPRAV_NTGELFF 
                                       "#DA79AF" 
                        CAQQRLGGFEVA_GRLCSGNTIYF 
                                       "#DA845B" 
      GPGTRLLVLGERGLLGRGRGR_WVWFLRGVPGLCSGANVLTF 
                                       "#B95ADC" 
                           YAFNVIFTLT_CHCVNTEAFF 
                                       "#DBDC62" 
CPANSRQEKIGSGKRGRVCPLSVL_GGGEGEGILGSPFPLCSYNEQFF 
                                       "#A1A9D7" 
                                  CASSHSSYNSPLHF 
                                       "#84DCD2" 
 CVIPIKHRRRFLSSLSSILARAL_PSLCLQCDFHLDPCHCVNTEAFF 
                                       "#D4D0BA" 


In [13]:
clontrack.fx(datapath, plotpath, 
             "TRB", ds_flelst_chp[grepl("358",ds_flelst_chp)], 
             "cloneCount", 0)

list of files to track clones: 


[1] "Nextseq/batch1/ds_batch1/CLONES_TRBCHP_358-01-PBMC-DNA_2000000.txt"
[2] "Nextseq/batch3/ds_batch3/CLONES_TRBCHP_358-03-PBMC-DNA_2000000.txt"
[3] "Nextseq/batch3/ds_batch3/CLONES_TRBCHP_358-05-PBMC-DNA_2000000.txt"


Total number of recurring clonotypes: 


[1] 8


these are what we color: 


                                 CAASRGC_AKNIQYF 
                                       "#85E96C" 
                                CARVPRAV_NTGELFF 
                                       "#DAD358" 
                        CAQQRLGGFEVA_GRLCSGNTIYF 
                                       "#DB7E7A" 
      GPGTRLLVLGERGLLGRGRGR_WVWFLRGVPGLCSGANVLTF 
                                       "#B184D4" 
                           YAFNVIFTLT_CHCVNTEAFF 
                                       "#CCD4A6" 
CPANSRQEKIGSGKRGRVCPLSVL_GGGEGEGILGSPFPLCSYNEQFF 
                                       "#B7BAD7" 
                                  CASSHSSYNSPLHF 
                                       "#BC4ED8" 
 CVIPIKHRRRFLSSLSSILARAL_PSLCLQCDFHLDPCHCVNTEAFF 
                                       "#7CD9CC" 


In [30]:
flelst <- list.files("/Users/anabbi/OneDrive - UHN/Documents/Bioinformatics/Data/INSPIRE/", 
                     recursive = TRUE,
                       pattern = paste("CLONES", "TRB", sep = "_"))

In [31]:
# subset to include only downsampled files
  ds_flelst <- flelst[grep("C-018", flelst)]


In [32]:
ds_flelst

In [34]:
pt_df <- patient_cdr3.fx("/Users/anabbi/OneDrive - UHN/Documents/Bioinformatics/Data/INSPIRE/", 
                         "TRB",
                   ds_flelst)



my files:


[1] "CLONES_TRBINSPIRE_INS-C-018-C3B_TCR-VJ_cDNA_48_CCTCTATC.txt"
[2] "CLONES_TRBINSPIRE_INS-C-018-C6B_TCR-VJ_cDNA_60_CGAACTTA.txt"
[3] "CLONES_TRBINSPIRE_INS-C-018-C9B_TCR-VJ_cDNA_30_GTCGTAGA.txt"
[4] "CLONES_TRBINSPIRE_INS-C-018-SB_TCR-VJ_cDNA_19_ACCTCCAA.txt" 


Total recovered clonotypes:


[1] 498767


Total out-of-frame clonotypes:


[1] 22065


Total clonotypes with stop codon:


[1] 3253


Total productive clonotypes:


[1] 473449


In [36]:
head(pt_df)

cloneno,filename,cloneId,cloneCount,cloneFraction,clonalSequence,clonalSequenceQuality,allVHitsWithScore,allDHitsWithScore,allJHitsWithScore,⋯,minQualFR4,aaSeqFR1,aaSeqCDR1,aaSeqFR2,aaSeqCDR2,aaSeqFR3,aaSeqCDR3,aaSeqFR4,refPoints,samplename
<fct>,<fct>,<int>,<int>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,⋯,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<chr>,<lgl>,<chr>,<chr>
1,CLONES_TRBINSPIRE_INS-C-018-C3B_TCR-VJ_cDNA_48_CCTCTATC.txt,0,143643,0.08038666,TGTGCCAGCAGCTTAGGGAGCACCGGGGAGCTGTTTTTT,EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE,TRBV7-8*00(478.4),,TRBJ2-2*00(218.4),⋯,,,,,,,CASSLGSTGELFF,,:::::::::0:-1:16:::::17:-1:39:::,CLONES_TRBINSPIRE_INS-C-018-C3B_TCR-VJ_cDNA_48_CCTCTATC.txt
2,CLONES_TRBINSPIRE_INS-C-018-C3B_TCR-VJ_cDNA_48_CCTCTATC.txt,1,120163,0.06724659,TGCAGCTCCTTTTCTCTAAATGAGCAGTTCTTC,EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE,TRBV29-1*00(440.4),,TRBJ2-1*00(205.6),⋯,,,,,,,CSSFSLNEQFF,,:::::::::0:-8:6:::::15:-3:33:::,CLONES_TRBINSPIRE_INS-C-018-C3B_TCR-VJ_cDNA_48_CCTCTATC.txt
3,CLONES_TRBINSPIRE_INS-C-018-C3B_TCR-VJ_cDNA_48_CCTCTATC.txt,2,94131,0.05267835,TGTGCCAGCAGTTCCCCAGCGGGAGGGCCCTCCGATGAGCAGTTCTTC,EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE,"TRBV12-4*00(464.2),TRBV12-3*00(460.1)",TRBD2*00(30),TRBJ2-1*00(199.3),⋯,,,,,,,CASSSPAGGPSDEQFF,,:::::::::0:-4:13:17:-6:-4:23:23:5:48:::,CLONES_TRBINSPIRE_INS-C-018-C3B_TCR-VJ_cDNA_48_CCTCTATC.txt
4,CLONES_TRBINSPIRE_INS-C-018-C3B_TCR-VJ_cDNA_48_CCTCTATC.txt,3,69599,0.03894956,TGTGCCAGCAGCCTGAGTGGGGGCAGTCATCAGCCCCAGCATTTT,EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE,TRBV5-6*00(437.7),TRBD1*00(30),TRBJ1-5*00(211.4),⋯,,,,,,,CASSLSGGSHQPQHF,,:::::::::0:-1:15:18:-6:0:24:28:-5:45:::,CLONES_TRBINSPIRE_INS-C-018-C3B_TCR-VJ_cDNA_48_CCTCTATC.txt
5,CLONES_TRBINSPIRE_INS-C-018-C3B_TCR-VJ_cDNA_48_CCTCTATC.txt,4,36605,0.02048519,TGTGCCAGCAGTTTTCAGGGAGGCTTACAAGAGACCCAGTACTTC,EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE,TRBV28*00(445.1),"TRBD2*00(30),TRBD1*00(28)",TRBJ2-5*00(212.6),⋯,,,,,,,CASSFQGGLQETQYF,,:::::::::0:-3:14:17:-9:-1:23:27:-2:45:::,CLONES_TRBINSPIRE_INS-C-018-C3B_TCR-VJ_cDNA_48_CCTCTATC.txt
6,CLONES_TRBINSPIRE_INS-C-018-C3B_TCR-VJ_cDNA_48_CCTCTATC.txt,5,32179,0.01800827,TGTGCCACCAGCGAAAGGTCCGAGCAGTACTTC,EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE,TRBV15*00(405.5),,TRBJ2-7*00(196.1),⋯,,,,,,,CATSERSEQYF,,:::::::::0:-5:12:::::20:-6:33:::,CLONES_TRBINSPIRE_INS-C-018-C3B_TCR-VJ_cDNA_48_CCTCTATC.txt


In [37]:
#samplename cleanup
    pt_df$samplename <- gsub("CLONES_TRBINSPIRE_", "", pt_df$samplename)
    pt_df$samplename <- gsub("_cDNA.*", "", pt_df$samplename)
    

In [41]:
    clontrack_inframes.fx(pt_df, plotpath, "TRB",
                          "cloneCount", 0.001) 

list of samples to track clones: 


[1] "INS-C-018-C3B_TCR-VJ" "INS-C-018-C6B_TCR-VJ" "INS-C-018-C9B_TCR-VJ"
[4] "INS-C-018-SB_TCR-VJ" 


Total number of recurring clonotypes: 


[1] 29


these are what we color: 


       CSSFSLNEQFF      CASSLGSTGELFF   CASSSPAGGPSDEQFF    CASSLSGGSHQPQHF 
         "#B2C7E5"          "#B5E987"          "#DB9ADD"          "#E245CC" 
   CASSFQGGLQETQYF        CATSERSEQYF    CASSQSGLEYTEQFF    CASSLEQAGRGKLFF 
         "#52E56D"          "#DA5E51"          "#6FA860"          "#8099E7" 
     CASSQGQRYEQYF    CASRLALEGIQPQHF      CASSLGTSNEQFF   CASSWGLSSYNSPLHF 
         "#687E75"          "#61B6DA"          "#E3A92C"          "#AFB297" 
   CASRSRREHGETQYF    CASSLAVRGNTEAFF     CASGMARGNTEAFF   CASSDSGGRGDNEQFF 
         "#DFE3DE"          "#D7CF6C"          "#6D689F"          "#A6E5BC" 
    CASSLAGVLSEQFF    CASSDRQGPNYGYTF       CASSTQGSEAFF   CASSYKGSPGANVLTF 
         "#7BDFDD"          "#E3BEDF"          "#C479E4"          "#B654A0" 
     CAWNLIAAGEQYF       CASINPGGGYTF    CASSIDGGEADTQYF     CASSLSRGKEKLFF 
         "#5FE5B3"          "#5C5CDB"          "#AF3CE6"          "#A0E949" 
CASSSKFGLAGLTGELFF    CASSVSRTGENEAFF   CASSQDGDRGRFEQYF   CASSSTLGDPTYEQYF 

In [42]:
clontrack.fx("/Users/anabbi/OneDrive - UHN/Documents/Bioinformatics/Data/INSPIRE/", 
             plotpath, "TRB", ds_flelst, "cloneCount", 0.001)

list of files to track clones: 


[1] "CLONES_TRBINSPIRE_INS-C-018-C3B_TCR-VJ_cDNA_48_CCTCTATC.txt"
[2] "CLONES_TRBINSPIRE_INS-C-018-C6B_TCR-VJ_cDNA_60_CGAACTTA.txt"
[3] "CLONES_TRBINSPIRE_INS-C-018-C9B_TCR-VJ_cDNA_30_GTCGTAGA.txt"
[4] "CLONES_TRBINSPIRE_INS-C-018-SB_TCR-VJ_cDNA_19_ACCTCCAA.txt" 


Total recovered clonotypes
my sample names:


[1] "CLONES_TRBINSPIRE_INS-C-018-C3B_TCR-VJ_cDNA_48_CCTCTATC.txt"
[2] "CLONES_TRBINSPIRE_INS-C-018-C6B_TCR-VJ_cDNA_60_CGAACTTA.txt"
[3] "CLONES_TRBINSPIRE_INS-C-018-C9B_TCR-VJ_cDNA_30_GTCGTAGA.txt"
[4] "CLONES_TRBINSPIRE_INS-C-018-SB_TCR-VJ_cDNA_19_ACCTCCAA.txt" 


Total number of recurring clonotypes: 


[1] 32


these are what we color: 


       CSSFSLNEQFF      CASSLGSTGELFF   CASSSPAGGPSDEQFF    CASSLSGGSHQPQHF 
         "#7DC5A9"          "#E66845"          "#E3B632"          "#E245CA" 
   CASSFQGGLQETQYF        CATSERSEQYF    CASSQSGLEYTEQFF    CASSLEQAGRGKLFF 
         "#E1E84F"          "#D3ECE5"          "#B0E7AA"          "#51C67A" 
     CASSQGQRYEQYF    CASRLALEGIQPQHF       CS**RSTDTQYF      CASSLGTSNEQFF 
         "#5C5AD9"          "#77E8E3"          "#7B994A"          "#E7A9B4" 
  CASSWGLSSYNSPLHF    CASRSRREHGETQYF    CASSLAVRGNTEAFF     CASGMARGNTEAFF 
         "#54EC6F"          "#728876"          "#D8D671"          "#D2CBE7" 
  CASSDSGGRGDNEQFF     CASSLAGVLSEQFF    CASSDRQGPNYGYTF       CASSTQGSEAFF 
         "#9EE943"          "#DDDDA8"          "#D7C7B7"          "#708CE0" 
  CASSYKGSPGANVLTF      CASSLG_SGDREF    CAASRGC_AKNIQYF      CAWNLIAAGEQYF 
         "#AF3CE6"          "#DBA7E0"          "#D36BC2"          "#DC5A94" 
      CASINPGGGYTF    CASSIDGGEADTQYF     CASSLSRGKEKLFF CASSSKFGLAGLTGELFF 

In [43]:
plotpath