### Re-calculate DNA and RNA sequence abundances for diel dataset - for Coesel et al.
SKH - 21-07-2020

In [1]:
library(tidyverse)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.0 ──

[32m✔[39m [34mggplot2[39m 3.3.1     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.0.1     [32m✔[39m [34mdplyr  [39m 1.0.0
[32m✔[39m [34mtidyr  [39m 1.1.0     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.3.1     [32m✔[39m [34mforcats[39m 0.5.0

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



In [2]:
# Import taxa list to match 
matchto <- read.delim("coesel-taxa-match.txt"); matchto

higher,lower
<fct>,<fct>
Amoebozoa,Amoebozoa
Opisthokonta,"Animalia, Fungi, Choanozoa"
Rhizaria,"Chlorarachneae, Foraminifera"
Alveolata,Ciliophora
Alveolata,Apicomplexa
Alveolata,Dinophyceae
Cryptophyta,Cryptophyta
Haptophyta,Haptophyta
Stramenopiles,Bigyra
Stramenopiles,Bacillariophyceae


In [3]:
otu_table <- read.delim("OTUtable_Diel18S_ALOHA.txt", sep = "\t", skip = 1)
head(otu_table[1:2,])

Unnamed: 0_level_0,OTU.ID,Diel.DNA.19,Diel.DNA.4,Diel.RNA.6,Diel.DNA.10,Diel.DNA.11,Diel.DNA.12,Diel.DNA.13,Diel.DNA.14,Diel.DNA.15,⋯,Diel.RNA.11,Diel.RNA.13,Diel.RNA.2,Diel.RNA.3,Diel.RNA.4,Diel.RNA.5,Diel.RNA.7,Diel.RNA.8,taxonomy,rep.seq
Unnamed: 0_level_1,<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<fct>,<fct>
1,AB353770.1.1740_U,1,1,1,0,0,0,0,0,0,⋯,0,0,0,0,0,0,0,0,Eukaryota; Alveolata; Dinophyta; Dinophyceae; Dinophyceae_X; Dinophyceae_XX; Dinophyceae_XXX; Dinophyceae_XXX+sp.,AGCTCCAATAGCGTATATTAAAGTTGTTGCGGTTAAAAAGCTCGTAGTTGGGCATCTTCTTGGAGAACGTAACTGCACTTGACTGTGTGGTGCGGTACTCAAGACTTTTACTTTGAGGAAATTAGAGTGTTTCAAGCAGGCGCATGCCTTGAATACATTAGCATGGAATAATGAGATAGGACCTTGGTTCTATTTTGTTGGTTTCTAGAGCTGAGGTAATGATTAATAGGGATAGTTGGGGGCATTCGTATTTAACTGTCAGAGGTGAAATTCTTGGATTTGTTAAAGACGGACTACTGCGAAAGCATTTGCCAAGGATGTTTTCATTGATCAAGAACGAAAGT
2,KF130109.1.1755_U,9,14,2,12,4,41,1,15,15,⋯,0,0,0,0,0,0,0,0,Eukaryota; Alveolata; Dinophyta; Syndiniales; Dino-Group-II; Dino-Group-II-Clade-14; Dino-Group-II-Clade-14_X; Dino-Group-II-Clade-14_X+sp.,AGCTCCAATAACGTATATTAAAGTTGTTGCGGTTAAAAAGCTCGTAGTTGGATTTCTGTTGAAAGCGATCGGTCCCCTCTCTGAGGGCGTATCTGACTCGCTTTTGGCATCCTCCTGTAGCACGTTTCTGTCCTTCACTGGATGGAGCGGGATGCAGGACTTTTACTTTGAGGAAATTCGAGTGTTCACAGCAGGCAATCGCCCTGAAGATATTAGCATGGAATAATAATATAGGACCTTGGTTCTATTTTGTTGGTTTCTAGAGCTGAGGTAATGATTAATAGGGATATTTGGGGGCATTCGTACTAACCGGTCAGAGGTGGAATTCTTGGATTCGGTTATGACGAACGACTGCGAAAGCATTTGTCAAGAATGTTTTCATTGATCAAGAACGAAAGT


In [4]:
# series of case_when statements
diel_modified <- otu_table %>% 
        select(Taxa = taxonomy, starts_with("Diel")) %>% 
        pivot_longer(cols = starts_with("Diel"), names_to = "SAMPLE", values_to = "COUNT") %>% 
    # add high and low level taxonomies to match
    mutate(higher = case_when(
        grepl("Alveolata", Taxa) ~ "Alveolata",
        grepl("Rhizaria", Taxa) ~ "Rhizaria",
        grepl("Opisthoko", Taxa) ~ "Opisthokonta",
        grepl("Archaeplastid", Taxa) ~ "Archaeplastida",
        grepl("Haptophyt", Taxa) ~ "Haptophyta",
        grepl("Stramenopil", Taxa) ~ "Stramenopiles",
        grepl("Cryptophyt", Taxa) ~ "Cryptophyta",
        TRUE ~ "OTHER"),
          lower = case_when(
        grepl("Ciliophora", Taxa) ~ "Ciliophora",
        grepl("Dinophyceae", Taxa) ~ "Dinophyceae",
        grepl("Syndinial", Taxa) ~ "Dinophyceae",
        grepl("Cryptophyt", Taxa) ~ "Cryptophyta",
        grepl("Haptophyt", Taxa) ~ "Haptophyta",
        grepl("Apicomplexa", Taxa) ~ "Apicomplexa",
        grepl("Chlorarachne", Taxa) ~ "Chlorarachneae, Foraminifera",
        grepl("Foraminife", Taxa) ~ "Chlorarachneae, Foraminifera",
        grepl("Chlorophyt", Taxa) ~ "Chlorophyta",
        grepl("Rhodophyta", Taxa) ~ "Rhodophyta",
        grepl("Bicosoe", Taxa) ~ "Bigyra",
        grepl("Labyrinth", Taxa) ~ "Bigyra",
        grepl("Blastocystis", Taxa) ~ "Bigyra",
        grepl("Bacillariophyt", Taxa) ~ "Bacillariophyceae",
        grepl("Chrysophyce", Taxa) ~ "Chrysophyceae",
        grepl("Dictyochophyc", Taxa) ~ "Dictyochophyceae",
        grepl("Pelagophy", Taxa) ~ "Pelagophyceae",
        grepl("Pinguioph", Taxa) ~ "Pinguiophyceae",
        grepl("Fungi", Taxa) ~ "Animalia, Fungi, Choanozoa",
        grepl("Metazoa", Taxa) ~ "Animalia, Fungi, Choanozoa",
        grepl("Choanoflagella", Taxa) ~ "Animalia, Fungi, Choanozoa",
        TRUE ~ "OTHER")) %>% 
    data.frame

In [5]:
# View taxa lists
select(diel_modified, higher, lower) %>% distinct()
matchto

higher,lower
<chr>,<chr>
Alveolata,Dinophyceae
Alveolata,Ciliophora
Stramenopiles,Chrysophyceae
Opisthokonta,"Animalia, Fungi, Choanozoa"
Rhizaria,OTHER
Stramenopiles,OTHER
Haptophyta,Haptophyta
OTHER,OTHER
Stramenopiles,Pelagophyceae
Stramenopiles,Bacillariophyceae


higher,lower
<fct>,<fct>
Amoebozoa,Amoebozoa
Opisthokonta,"Animalia, Fungi, Choanozoa"
Rhizaria,"Chlorarachneae, Foraminifera"
Alveolata,Ciliophora
Alveolata,Apicomplexa
Alveolata,Dinophyceae
Cryptophyta,Cryptophyta
Haptophyta,Haptophyta
Stramenopiles,Bigyra
Stramenopiles,Bacillariophyceae


In [6]:
# Summarise and calculate mean/std dev
diel_modified_summary <- diel_modified %>% 
    separate(SAMPLE, c("DIEL", "MATERIAL", "SAMPLEID"), sep = "\\.") %>% 
    select(-DIEL) %>% 
    group_by(higher, lower, MATERIAL, SAMPLEID) %>% 
    # Sum by taxonomic designation within samples
    summarise(total = sum(COUNT),
             .groups = "rowwise") %>% 
    ungroup() %>% 
    # Average and std dev across all samples
    group_by(higher, lower, MATERIAL) %>% 
    summarise(MEAN = mean(total),
              STD = sd(total),
             .groups = "rowwise") %>%
    ungroup() %>% 
    # Average
    group_by(MATERIAL) %>% 
    mutate(PERC = round(100*(MEAN/sum(MEAN)), 3)) %>% 
    data.frame
head(diel_modified_summary)

Unnamed: 0_level_0,higher,lower,MATERIAL,MEAN,STD,PERC
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>
1,Alveolata,Ciliophora,DNA,2751.632,1833.729994,1.412
2,Alveolata,Ciliophora,RNA,25851.89,16315.878023,16.416
3,Alveolata,Dinophyceae,DNA,128822.8,30053.418028,66.082
4,Alveolata,Dinophyceae,RNA,47884.58,18669.39337,30.407
5,Alveolata,OTHER,DNA,140.2632,81.077358,0.072
6,Alveolata,OTHER,RNA,8.789474,9.009741,0.006


In [7]:
# Write RNA and DNA-based table
write.csv(filter(diel_modified_summary, MATERIAL == "DNA"), file = "DNA-taxa-summary-DIEL.csv")
write.csv(filter(diel_modified_summary, MATERIAL == "RNA"), file = "RNA-taxa-summary-DIEL.csv")

In [8]:
## END