## Compile taxonomy assignments
Following assigning taxonomy to reference sequences, compile with exisiting ASV or OTU table

In [5]:
library(Biostrings)

Loading required package: BiocGenerics
Loading required package: parallel

Attaching package: ‘BiocGenerics’

The following objects are masked from ‘package:parallel’:

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB

The following objects are masked from ‘package:dplyr’:

    combine, intersect, setdiff, union

The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs

The following objects are masked from ‘package:base’:

    anyDuplicated, append, as.data.frame, basename, cbind, colnames,
    dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
    grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
    order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
    rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
    union, unique, unsplit, which, which.max, which.min

Loading required p

In [14]:
# Import reference sequences

## ASVs
ref_asv <- readDNAStringSet("/vortexfs1/omics/huber/shu/slo-pier-weekly/qiime2/asv/slo-pier-ref-seqs-asv.fna")
Feature.ID <- names(ref_asv)
ReferenceSequence <- paste(ref_asv)
fna_df <- data.frame(Feature.ID, ReferenceSequence)
# head(fna_df)

## OTUs
ref_otu <- readDNAStringSet("/vortexfs1/omics/huber/shu/slo-pier-weekly/qiime2/otu/slo-pier-ref-seqs-otu.fna")
Feature.ID <- names(ref_otu)
ReferenceSequence <- paste(ref_otu)
fna_df_otu <- data.frame(Feature.ID, ReferenceSequence)
# head(fna_df_otu)

In [None]:
# library(dada2)
# seqs <- as.character(fna_df$ReferenceSequence) #extract sequences
# taxa_pr2 <- assignTaxonomy(seqs, "/vortexfs1/omics/huber/shu/db/pr2-db/pr2_version_4.12.0_18S_dada2.fasta.gz",
#         taxLevels = c("Kingdom","Supergroup","Division","Class","Order","Family","Genus","Species"), multithread = TRUE, minBoot = 0)

In [1]:
library(tidyverse)

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
✔ ggplot2 3.3.2     ✔ purrr   0.3.4
✔ tibble  3.0.3     ✔ dplyr   1.0.0
✔ tidyr   1.1.0     ✔ stringr 1.4.0
✔ readr   1.3.1     ✔ forcats 0.4.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()


In [19]:
# Import pr2 assigned taxonomy output (minBoot = 0) and original count tables
load("Pier-assigned-refseqs.RData", verbose = T)
asv_table <- read.delim("/vortexfs1/omics/huber/shu/slo-pier-weekly/qiime2/asv/CountTable-wtax-2020-04-22.txt")
otu_table <- read.delim("/vortexfs1/omics/huber/shu/slo-pier-weekly/qiime2/otu/CountTable-wtax-2020-07-02.txt")

Loading objects:
  taxa_pr2
  taxa_pr2_otu


In [31]:
# Comile ASV results with reference sequence
asv_wtax <- data.frame(taxa_pr2) %>% 
    rownames_to_column(var = "ReferenceSequence") %>% 
    right_join(fna_df) %>% 
    unite(Taxon_dada2_boot0, starts_with("tax."), sep = ";") %>% 
    unite(Confidence_dada2, starts_with("boot."), sep = ";") %>% 
    left_join(asv_table) %>% 
    select(Feature.ID, Taxon_qiime2 = Taxon, Conf_qiime2 = Confidence, 
            Taxon_dada2_boot0, Conf_dada2_boot0 = Confidence_dada2, everything()) %>% 
    data.frame
# head(asv_wtax)

Joining, by = "ReferenceSequence"
Joining, by = "Feature.ID"


In [32]:
# Comile OTU results with reference sequence
otu_wtax <- data.frame(taxa_pr2_otu) %>% 
    rownames_to_column(var = "ReferenceSequence") %>% 
    right_join(fna_df_otu) %>% 
    unite(Taxon_dada2_boot0, starts_with("tax."), sep = ";") %>% 
    unite(Confidence_dada2, starts_with("boot."), sep = ";") %>% 
    left_join(otu_table) %>% 
    select(Feature.ID, Taxon_qiime2 = Taxon, Conf_qiime2 = Confidence, 
            Taxon_dada2_boot0, Conf_dada2_boot0 = Confidence_dada2, everything()) %>% 
    data.frame
# head(otu_wtax)

Joining, by = "ReferenceSequence"
Joining, by = "Feature.ID"


In [29]:
write_delim(asv_wtax, path = "/vortexfs1/omics/huber/shu/slo-pier-weekly/slo-pier-ASV-counttable-wtax-14-08-2020.txt", delim = "\t")
write_delim(otu_wtax, path = "/vortexfs1/omics/huber/shu/slo-pier-weekly/slo-pier-OTU-counttable-wtax-14-08-2020.txt", delim = "\t")

In [None]:
# 14-08-2020

In [34]:
head(otu_wtax)

Feature.ID,Taxon_qiime2,Conf_qiime2,Taxon_dada2_boot0,Conf_dada2_boot0,ReferenceSequence,AP.AV17.AV180216.18S,AP.AV23.AV180316.18S,AV12.18S,AV13.18S,...,AV78.18S,AV81.18S,AV84.18S,AV90.18S,AP.AV29.AV180420.18S,AP.AV41.AV180518.18S,AV32.18S,AV35.18S,AV56.18S,NEGPCR18SControl.18S
c145e8099f5a2f17debf82f9bbda6db5a220543e,Eukaryota;Alveolata;Dinoflagellata;Dinophyceae;Gymnodiniales;Gymnodiniaceae;Margalefidinium;Margalefidinium_fulvescens;,0.8200643,Eukaryota;Alveolata;Dinoflagellata;Dinophyceae;Gymnodiniales;Gymnodiniaceae;Margalefidinium;Margalefidinium_fulvescens,100;100;100;100;100;100;100;100,AGCTCCAATAGCGTATATTAAAGTTGTTGCGGTTAAAAAGCTCGTAGTTGGATTTCCGTTCGAGTTCGTACCTCCCCTTGGGGGTTGGTGTCGAGCTCGAGCCTTTCTGGGTGTATACGTGCGTACTTCATTGTATGACGTATTCAACCCGGACTTTTACTTTGAGGAAATTAGAGTGTTTCAGGCAGGCAAACGCCTTGAATACATTAGCATGGAATAATAAGATAAGACTTTGGTCTTGTTTGTTGGTTCATAGACCGAAGTAATGATTAATAGGGATAGTTGGGGGCATTCGTATTTAACTGTCAGAGGTGAAATTCTTGGATTTGTTAAAGACGAACTACTGCGAAAGCATTTGCCAAGGATGTTTTCA,584,284,1172,1619,...,29811,8140,902,5656,2,0,2,0,3,0
2b17741c943bfa5057c2ba8ddced03eacd595b41,Eukaryota;Alveolata;Dinoflagellata;Dinophyceae,0.999963,Eukaryota;Alveolata;Dinoflagellata;Dinophyceae;Gymnodiniales;Gymnodiniaceae;Gyrodinium;Gyrodinium_spirale,100;100;100;100;100;100;100;88,AGCTCCAATAGCGTATATTAAAGTTGTTGCGGTTAAAAAGCTCGTAGTTGGATTTCTGCTGAGGACGACCGGTCCGCCCTCTGGGTGAGTATCTGGCTTGGCCTTGGCATCTTCTTGGAGAACGTATCTGCACTTGACTGTGTGGTGCGGTATCCAGGACTTTTACTTTGAGGAAATTAGAGTGTTTCAAGCAGGCACACGCCTTGAATACATTAGCATGGAATAATAAGATAGGACCTTGGTTCTATTTTGTTGGTTTCTAGAGCTGAGGTAATGATTAATAGGGATAGTTGGGGGCATTCGTATTTAACTGTCAGAGGTGAAATTCTTGGATTTGTTAAAGACGGACTACTGCGAAAGCATTTGCCAAGGATGTTTTCA,2000,1246,1475,13380,...,462,2181,3063,15725,9449,5928,50249,2318,21,0
a940c0f810f1168ddf166d4f9e17e069a1036719,Eukaryota;Archaeplastida,0.9964566,Eukaryota;Archaeplastida;Chlorophyta;Mamiellophyceae;Mamiellales;Mamiellaceae;Micromonas;Micromonas_bravo_B1,100;100;100;100;100;100;100;71,AGCTCCAATAGCGTATATTTAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGGATTTCGGTTGAGAACGGCCGGTCCGCCGTTTGGTGTGCACTGGCTGGTTTCAACTTCCTGTAGAGGACGCGCTCTGGCTTCATCGCTGGACGCGGAGTCTACGTGGTTACTTTGAAAAAATTAGAGTGTTCAAAGCGGGCTTACGCTTGAATATTTCAGCATGGAATAACACTATAGGACTCCTGTCCTATTTCGTTGGTCTCGGGACGGGAGTAATGATTAAGAGGAACAGTTGGGGGCATTCGTATTTCATTGTCAGAGGTGAAATTCTTGGATTTATGAAAGACGAACTTCTGCGAAAGCATTTGCCAAGGATGTTTTCA,117,17,1028,98,...,4,7,17,6476,2,510,628,214,1,0
59f6fc378a02095eb9a6c5fa614439f511ce9ccb,Eukaryota;Archaeplastida,0.9970485,Eukaryota;Archaeplastida;Chlorophyta;Mamiellophyceae;Mamiellales;Bathycoccaceae;Ostreococcus;Ostreococcus_lucimarinus,100;100;100;100;100;100;100;67,AGCTCCAATAGCGTATATTTAAGTTGTTGCAGTTAAAAAGCTCGTAGTCGGATTTTGGCTGAGAACGGTCGGTCCGCCGTTAGGTGTGCACTGACTGGTCTCAGCTTCCTGGTGAGGAGGTGTGCTTCATCGCCACTTAGTCACCGTGGTTACTTTGAAAAAATTAGAGTGTTCAAAGCGGGCTTACGCTTGAATATATTAGCATGGAATAACACCATAGGACTCCTGTCCTATTTCGTTGGTCTCGGGACGGGAGTAATGATTAAGAGGAACAGTTGGGGGCATTCGTATTTCATTGTCAGAGGTGAAATTCTTGGATTTATGAAAGACGAACTTCTGCGAAAGCATTTGCCAAGGATGTTTTCA,193,87,3622,628,...,13,2,144,15365,6,95,346,168,1,0
cefd772c8ba271fc75492d41ff67d76dbd18e4dc,Eukaryota;Stramenopiles;Ochrophyta;Bacillariophyta;Bacillariophyta_X;Polar-centric-Mediophyceae,0.9999457,Eukaryota;Stramenopiles;Ochrophyta;Bacillariophyta;Bacillariophyta_X;Polar-centric-Mediophyceae;Thalassiosira;Thalassiosira_sp.,100;100;100;100;100;100;48;40,AGCTCCAATAGCGTATATTAAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGGATTTCTGGCAGGAGCGACCGGTCACACACTCTGTGTGTGAACTTGTGTTGTCTCTGGCCATCCTTGGGGAGATCCTGTTTGGCATTAAGTTGTCGGGCAGGGGATACCCATCGTTTACTGTGAAAAAATTAGAGTGTTTAAAGCAGGCTTATGCCGTTGAATATATTAGCATGGAATAATAAGATAGGACTTCGGAACTATTTTGTTGGTTTGCGTTACGAAGTAATGATTAATAGGGACAGTTGGGGGTATTCGTATTTCGTTGTCAGAGGTGAAATTCTTGGATTTCCGAAAGACGAACTACTGCGAAAGCATTTACCAAGGATGTTTTCA,89,206,468,27999,...,29,0,0,2979,3991,207,680,19895,0,5
ae9f200e11d4c6035213bf838782d6d106acd19c,Eukaryota;Alveolata;Dinoflagellata;Dinophyceae;Gonyaulacales;Ceratiaceae;Tripos,0.9998879,Eukaryota;Alveolata;Dinoflagellata;Dinophyceae;Gonyaulacales;Ceratiaceae;Tripos;Tripos_fusus,100;100;100;100;100;100;100;96,AGCTCCAATAGCGTATATTAAAGTTGTTGCGGTTAAAAAGCTCGTAGTTGGATTTCTGCTGAAGCAAACCGGTCCGCCCTCTGGGTGAGCATCTGGCTTTATTTTGGCATATGCTTAGACTTTGCAGCTGCACTTGACTGTGTGGTGTGAAGGCTAAGCCATTTACTTTGAGGAAATCAGAGTGTTTCAAGCAGGCAATTGCCTTGAATACACTAGCATGGAATAATATGATATGACTGTGGTTTTATTTTGTTGGCTTCTAGAATTAGAGTAATGGTTAATAGGGATAGTTGGGGGCATTCATATTTAACTGTCAGAGGTGAAATTCTTGGATTTGTTAAGGATGAACGACTGCGAAAGCATTTGCCAAGGATGCTTTCA,550,141,45,62,...,11,187,81,210,5,18,24,3,18302,0
