In [39]:
library(tidyverse)
library(gtsummary)
library(pivottabler)
library(openxlsx)
library(phyloseq)
library(qiime2R)

In [40]:
# Read in the 16S qza files and clean them up for phyloseq
ASVtable_16S <- read_qza("/Users/stephanie.rosales/Documents/Tissueloss/SCTLD_MetaAnalysis/QiimeOutPut/2022_Process/tableV_BacArc_99_SCTLD.qza")
ASVtable_16S <- ASVtable_16S$data # Extract the count data from list
ASVtaxa_16S <- read_qza("/Users/stephanie.rosales/Documents/Tissueloss/SCTLD_MetaAnalysis/QiimeOutPut/2022_Process/taxaVsearch_rep-seqs-dn-99_SCTLD.qza")
taxtable_16S <- ASVtaxa_16S$data %>% as_tibble() %>% separate(Taxon, sep=";",
c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")) 
# Convert taxonomy info to data frame with correct taxonomy labels
taxtable_16S <- taxtable_16S[-1,]
#asv_tab$asv_id <- rownames(asv_tab) # add a new column for ids


“Expected 7 pieces. Missing pieces filled with `NA` in 70468 rows [2, 7, 11, 13, 15, 17, 21, 23, 27, 29, 34, 38, 39, 41, 43, 46, 47, 50, 54, 56, ...].”


In [27]:
sample_info_tab_16S <- read.csv("/Users/stephanie.rosales/Documents/Tissueloss/SCTLD_MetaAnalysis/metadata_for_qiime/SCTLD_meta_analysis_metadata.csv", header = T, row.names = 1,
na.strings = c("", "NA"))  

In [43]:
physeq_16S <- phyloseq(otu_table(ASVtable_16S, taxa_are_rows= T),
tax_table(as.data.frame(taxtable_16S) %>% column_to_rownames("Feature.ID") %>%
as.matrix()), sample_data(sample_info_tab_16S))
physeq_16S
#taxa_names(physeq_16S) <- paste0("ASV", seq(ntaxa(physeq_16S)))

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 119456 taxa and 2425 samples ]
sample_data() Sample Data:       [ 2425 samples by 61 sample variables ]
tax_table()   Taxonomy Table:    [ 119456 taxa by 8 taxonomic ranks ]

In [50]:
sub_samples = c("TissueSlurry", "Mucus", "TissueSlurry_Skeleton", "Seawater", "Sediment")
ps.coral =subset_samples(physeq_16S, sample_type %in% sub_samples)
ps.coral

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 119456 taxa and 2362 samples ]
sample_data() Sample Data:       [ 2362 samples by 61 sample variables ]
tax_table()   Taxonomy Table:    [ 119456 taxa by 8 taxonomic ranks ]

In [48]:
df = sample_data(ps.coral) %>% as_tibble()

In [31]:
df = sample_info_tab_16S %>% 
filter(sample_type %in% sub_samples)

In [57]:
pt <- PivotTable$new(tableStyle=list("border-color"="black"),
                     headingStyle=list("color"="cornsilk", "background-color"="grey", 
                                       "font-style"="bold", "border-color"="lightgrey"), 
                     cellStyle=list("color"="black", "background-color"="white", 
                                    "border-color"="lightgrey"),
                     totalStyle=list("color"="black", "background-color"="whte", 
                                     "border-color"="black", "font-weight"="bold"))
pt$addData(df)
pt$addColumnDataGroups("species_code")
pt$addRowDataGroups("Biome")
pt$addRowDataGroups("collection_year")
pt$addRowDataGroups("tissue_type")
pt$defineCalculation(calculationName="sample_type", summariseExpression="n()")
pt$evaluatePivot()
pt


                            ACCE  ACPA  CNAT  DLAB  DSTO  MCAV  MMEA  OANN  OFAV  OFRA  PAST  PCLI  PSTR  SINT  SSID  NA   Total  
CoralReef  2016   DL                       2     1                                                                             3  
                  DU                       3     1                                                                             4  
                  Total                    5     2                                                                             7  
           2017   AH          23    17     3     7     4    16                 4           2                       8          84  
                  DL          18    12           8     8    19                 2                                   4          71  
                  DU          19     5                                                                             4          28  
                  Total       60    34     3    15    12    35                 6   

In [58]:
wb <- createWorkbook(creator = Sys.getenv("stephanie.rosales"))
addWorksheet(wb, "Data")
pt$writeToExcelWorksheet(wb=wb, wsName="Data", 
                         topRowNumber=2, leftMostColumnNumber=2, applyStyles=TRUE)
saveWorkbook(wb, file="/Users/stephanie.rosales/Documents/Tissueloss/SCTLD_MetaAnalysis/figs/test.xlsx", overwrite = TRUE)

In [59]:
df %>% count(sample_type)

sample_type,n
<chr>,<int>
Mucus,393
Seawater,198
Sediment,133
TissueSlurry,1585
TissueSlurry_Skeleton,53


In [63]:
df %>% 
filter(Alias!="AcroporaDisease")  %>% 
count(tissue_type)

tissue_type,n
<chr>,<int>
AH,981
DL,631
DU,325
Seawater,198
Sediment,133


In [61]:
df %>% count(primer_names, forwardPrimer)

primer_names,forwardPrimer,n
<chr>,<chr>,<int>
515F_archaea806R,GTGCCAGCMGCCGCGGTAA,984
515FCaporaso_806RCaporaso,GTGCCAGCMGCCGCGGTAA,49
515FPr_806RAp,GTGYCAGCMGCCGCGGTAA,1219
CS1-515F_CS2-806R,ACACTGACGACATGGTTCTACAGTGCCAGCMGCCGCGGTAA,79
S-D-Bact-0341-b-S-17_S-D-Bact-0785-a-A-21,CCTACGGGNGGCWGCAG,31


In [64]:
df %>% count(country)

country,n
<chr>,<int>
USA:FLKeys,1235
USA:Florida,49
USA:FTL,256
USA:Miami,365
USA:VirginIslands,457
