In [1]:
library(dplyr)
library(ggforce) # for 'geom_arc_bar'

"package 'dplyr' was built under R version 3.6.3"

Attaching package: 'dplyr'


The following objects are masked from 'package:stats':

    filter, lag


The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union


"package 'ggforce' was built under R version 3.6.3"
Loading required package: ggplot2

"package 'ggplot2' was built under R version 3.6.3"


In [2]:
CONTdf = read.csv('../taxonomy/Taxonomy_Complete.csv')
cgr_proj_dir='/DCEG/Projects/Microbiome/Analysis/NP0084_MB/20200410_2019.1'
proj_dir='/Users/slsevilla/Google Drive/MyDocuments_Current/Education/George Mason University/Dissertation/Data/Aim1/'
data_dir=paste(proj_dir,'output/data/',sep="")
img_dir=paste(proj_dir,'output/analysis/img/',sep="")
stats_dir=paste(proj_dir,'output/analysis/stats/',sep="")

<h2> Code <h2>

In [3]:
sub_df <- function(df_in,cont){
    df = subset(df_in,select=c("Genus",cont))
    #Combine genera
    df_out = aggregate(df[,cont], list(df$Genus), sum)
    colnames(df_out) = c("Genus",cont)
    
    #calculate percent with 2 sig figs
    df_out$perc = signif(df_out[,cont] * 100, 2)
    df_out = filter(df_out, perc > 0) 
    print(paste(cont,nrow(df_out),sep=" has # of genera "))
    return(df_out)
}

In [4]:
donut <- function(){
    ggplot(CONTdf_sub, aes(x = 2, y = DZ35316, fill = Genus)) +
      geom_bar(stat = "identity", color = "white") +
      coord_polar(theta = "y", start = 0)+
      geom_text(aes(y = DZ35316, label = DZ35316), color = "white")+
      theme_void()+
      xlim(0.5, 2.5)
}

In [5]:
plot_label <- function(df_in,cont_in){
    df <- CONTdf_sub %>% 
      mutate(end = 2 * pi * cumsum(DZ35316)/sum(DZ35316),
             start = lag(end, default = 0),
             middle = 0.5 * (start + end),
             hjust = ifelse(middle > pi, 1, 0),
             vjust = ifelse(middle < pi/2 | middle > 3 * pi/2, 0, 1.5))

    p = ggplot(df) +
        geom_arc_bar(aes(x0 = 0, y0 = 0, r0 = 0, r = 1,
                         start = start, end = end, fill = Genus)) +
        geom_text(aes(x = 1 * sin(middle), y = 1.01 * cos(middle), label = paste(Genus,perc,sep="-"),
                      hjust = hjust, vjust = vjust)) +
        coord_fixed() +
        #theme(legend.position="none") + 
        scale_x_continuous(limits = c(-1.5, 1.4),  # Adjust so labels are not cut off
                           name = "", breaks = NULL, labels = NULL) +
        scale_y_continuous(limits = c(-1, 1),      # Adjust so labels are not cut off
                           name = "", breaks = NULL, labels = NULL)

    aspect_ratio <- 2.5
    height_in <- 5
    ggsave(filename=paste(img_dir,"taxplot_nl_",cont_in,".png",sep=""), plot=p, device="png",
           height=height_in, width=height_in*aspect_ratio, units="in", dpi=500)
}

In [6]:
plot_nolabel <- function(df_in,cont_in){
    df <- df_in %>% 
      mutate(end = 2 * pi * cumsum(get(cont_in))/sum(get(cont_in)),
             start = lag(end, default = 0),
             middle = 0.5 * (start + end),
             hjust = ifelse(middle > pi, 1, 0),
             vjust = ifelse(middle < pi/2 | middle > 3 * pi/2, 0, 1))

    p = ggplot(df) +
        geom_arc_bar(aes(x0 = 0, y0 = 0, r0 = 0, r = 1,
                         start = start, end = end, fill = Genus)) +
        coord_fixed() +
        theme(legend.title = element_text(size=16, face="bold"),
            panel.background = element_rect(fill = "white"),legend.text=element_text(size=14)) + 
        scale_x_continuous(limits = c(-1.5, 1.4),  # Adjust so labels are not cut off
                           name = "", breaks = NULL, labels = NULL) +
        scale_y_continuous(limits = c(-1, 1),      # Adjust so labels are not cut off
                           name = "", breaks = NULL, labels = NULL)

    p = p + scale_fill_discrete(name = paste(cont," Mock Community",sep=""), labels = paste(df$Genus," ",df$perc,"%",sep=""))
    aspect_ratio <- 2.5
    height_in <- 5
    #ggsave(filename=paste(out_dir,"taxplot_",cont_in,".png",sep=""), plot=p, device="png",
    #       height=height_in, width=height_in*aspect_ratio, units="in", dpi=500)
    
    legend_save = cowplot::get_legend(p)
    ggsave(filename=paste(img_dir,"taxplot_legend_",cont_in,".png",sep=""), plot=legend_save, device="png", 
           height=height_in, width=height_in*aspect_ratio, units="in", dpi=500)
}

In [7]:
plot_nolabel_legsep <- function(df_in,cont_in){
    df <- df_in %>% 
      mutate(end = 2 * pi * cumsum(get(cont_in))/sum(get(cont_in)),
             start = lag(end, default = 0),
             middle = 0.5 * (start + end),
             hjust = ifelse(middle > pi, 1, 0),
             vjust = ifelse(middle < pi/2 | middle > 3 * pi/2, 0, 1))

    p = ggplot(df) +
        geom_arc_bar(aes(x0 = 0, y0 = 0, r0 = 0, r = 1,
                         start = start, end = end, fill = Genus)) +
        coord_fixed() +
        theme(panel.background = element_rect(fill = "white"),legend.position="none") + 
        scale_x_continuous(limits = c(-1.5, 1.4),  # Adjust so labels are not cut off
                           name = "", breaks = NULL, labels = NULL) +
        scale_y_continuous(limits = c(-1, 1),      # Adjust so labels are not cut off
                           name = "", breaks = NULL, labels = NULL)

    p = p + scale_fill_discrete(name = paste(cont," Mock Community",sep=""), labels = paste(df$Genus," ",df$perc,"%",sep=""))
    aspect_ratio <- 2.5
    height_in <- 5
    ggsave(filename=paste(img_dir,"taxplot_",cont_in,".png",sep=""), plot=p, device="png",
           height=height_in, width=height_in*aspect_ratio, units="in", dpi=500)
    
    p = plot_nolabel(df_in,cont_in)
}

<h2> Create Taxonomy Plots <h2>

In [8]:
cont_list = c("DZ35322","DZ35316","MSA1000","MSA1001","MSA1002","MSA1003","D6300","D6305","D6306","D6310","D6311","D6300_Updated","D6310_Updated")

for (cont in cont_list){
    CONTdf_sub = sub_df(CONTdf,cont)
    plot_nolabel_legsep(CONTdf_sub,cont)
}

[1] "DZ35322 has # of genera 19"
[1] "DZ35316 has # of genera 22"
[1] "MSA1000 has # of genera 10"
[1] "MSA1001 has # of genera 10"
[1] "MSA1002 has # of genera 18"
[1] "MSA1003 has # of genera 18"
[1] "D6300 has # of genera 8"
[1] "D6305 has # of genera 8"
[1] "D6306 has # of genera 8"
[1] "D6310 has # of genera 8"
[1] "D6311 has # of genera 8"
[1] "D6300_Updated has # of genera 7"
[1] "D6310_Updated has # of genera 7"
