Skip to content
Find file
Fetching contributors…
Cannot retrieve contributors at this time
543 lines (388 sloc) 12.6 KB
title subtitle date author job framework highlighter hitheme widgets mode assets
Intro to R
data manipluation, vizualization, communities, networks
2013-07-31
Scott Chamberlain
io2012
highlight.js
tomorrow
selfcontained

Where to find this presentation




The presentation = http://recology.info/talks/sfu/workshop2




The code (written in R Markdown) = https://github.com/sckott/talks/blob/gh-pages/sfu/workshop2/index.Rmd


The plan

  • Data manipulation
  • General visualizations
  • Networks/graphs: analyses and viz
  • Community ecology: analyses and viz

Data manipulation





plyr + reshape2 =


Intro to subsetting





See here https://github.com/hadley/devtools/wiki/Subsetting


A few quick examples

library(reshape2)
head(iris)[1:3,]
iris_m <- melt(iris, id.vars=5)
head(iris_m)[1:3,1:3]
dcast(iris_m, Species ~ variable, fun.aggregate=sd)

What can we do with this?

manipulate, then plot

iris_m <- melt(iris, id.vars=5)
ggplot(iris_m, aes(Species, value)) +
  geom_boxplot() +
  facet_wrap(~ variable) +
  theme_grey(base_size=20)

Your turn





Read in a data file, melt the data, then cast the data, applying a function to that data.


plyr - split, apply, combine

Function naming scheme: first letter of source R object + first letter of output R object + ply

  • data.frame = ddply, dlply, daply
  • list = llply, ldply, laply
  • array/vector: aaply, alply, adply
  • matrix: maply , mlply, mdply

data <- data.frame(x = c("a", "a", "b", "b", "c", "c"), y = c(2, 4, 0, 5, 5, 10))
ddply(data, .(x), summarise, mean(y))
data <- data.frame(x = c("a", "a", "b", "b", "c", "c"), y = c(2, 4, 0, 5, 5, 10))
data_list <- list(data, data, data)
ldply(data_list, function(z) ddply(z, .(x), summarise, mean(y)))

DIY plyr





Perform split-apply-combine for an R object of your choice.


ggplot2 terminology

  • ggplot - The main function where you specify the dataset and variables to plot
  • geom - geometric objects
    • geom_point(), geom_bar(), geom_density(), geom_line(), geom_area()
  • aes - aesthetics
    • shape, alpha (transparency), color, fill, linetype
  • scale - Define how your data will be plotted
    • continuous, discrete, log

install.packages("ggplot2")
library(ggplot2)
ggplot(data=iris, aes(Sepal.Length, Sepal.Width)) +
  geom_point() + 
  theme_grey(base_size=20)

Building blocks, mix and match

ggplot(data=iris, aes(Sepal.Length, Sepal.Width, colour=Species)) +
  geom_point() + 
  theme_grey(base_size=20)

OR, just do

p <- ggplot(data=iris, aes(Sepal.Length, Sepal.Width, colour=Species))
p <- p + geom_point()
p + theme_grey(base_size=20)

Color by species

ggplot(data=iris, aes(Sepal.Length, Sepal.Width, colour=Species)) +
  geom_point() +
  theme_grey(base_size=20)

--- &twocol

Adjust the size (and color) of points

*** =left

ggplot(data=iris, aes(Sepal.Length, Sepal.Width, colour=Species)) +
  geom_point(size = 6) +
  theme_grey(base_size=20)

*** =right

ggplot(data=iris, aes(Sepal.Length, Sepal.Width, color=Species, size=Species)) +
  geom_point() +
  theme_grey(base_size=20)

Facet by species

ggplot(data=iris, aes(Sepal.Length, Sepal.Width)) +
  geom_point() +
  facet_wrap(~ Species) +
  theme_grey(base_size=20)

--- &twocol

Combine geoms

*** =left

Combine geom_boxplot and geom_point

ggplot(data=iris, aes(Species, Petal.Width)) +
  geom_boxplot() +
  geom_point()

*** =right

Order matters!

ggplot(data=iris, aes(Species, Petal.Width)) +
  geom_point() +
  geom_boxplot()

Make this plot

ggplot(data=iris, aes(Petal.Width, fill=Species)) +
  geom_histogram() +
  facet_wrap(~ Species) +
  theme_grey(base_size=20)

Saving plots using ggplot2

If the plot is on your screen

ggsave('~/path/to/figure/filename.png')

If your plot is assigned to an object

ggsave(plot1, file = "~/path/to/figure/filename.png")

Specify a size

ggsave(file = "/path/to/figure/filename.png", width = 6,
height =4)

Or any format (pdf, png, eps, svg, jpg)

ggsave(file = "/path/to/figure/filename.jpg")
ggsave(file = "/path/to/figure/filename.pdf")

Networks

  • Visualizations
  • Analyses
    • Network level metrics
    • Species level metrics

Network vizualizations

library(bipartite)
plotweb(small1976)

Another way to visualize networks

visweb(small1976, labsize=2)

Variety of other plotting options

# Eg 1
plotweb(small1976, high.lablength=3, low.lablength=0, arrow="down")

# Eg 2
plotweb(small1976, text.rot=90, arrow="down.center", col.interaction="wheat2",
  y.lim=c(-1,2.5))

# Eg 3
low.abun = round(runif(dim(small1976)[1],1,40))
names(low.abun) <- rownames(small1976)
plotweb(small1976, text.rot=90, low.abun=low.abun, col.interaction="purple", 
  y.width.low=0.05, y.width.high=0.05)

Network metrics - of the whole community

Start with the bipartite package. Others include X, Y, and Z.

res <- networklevel(small1976, index="binary")
res

Let's compare some metrics among networks!!!

library(reshape2)
networks <- list(Safariland,barrett1987,bezerra2009,elberling1999,inouye1988,kato1990,kevan1970)
res <- llply(networks, function(x) networklevel(x, index=c('connectance','links per species','nestedness')))
names(res) <- c('Safariland','barrett1987','bezerra2009','elberling1999','inouye1988','kato1990','kevan1970')
df <- melt(ldply(res), id.vars='.id')
ggplot(df, aes(.id, value)) +
  geom_point(size=4, alpha=0.6) + 
  facet_wrap(~variable, scales="free") +
  theme_bw(base_size=20) +
  theme(axis.text.x = element_blank())

Make this plot

networks <- list(Safariland,barrett1987,bezerra2009,elberling1999,inouye1988,kato1990,kevan1970)
res <- llply(networks, function(x) networklevel(x, index=c('web asymmetry','H2')))
names(res) <- c('Safariland','barrett1987','bezerra2009','elberling1999','inouye1988','kato1990','kevan1970')
df <- melt(ldply(res), id.vars='.id')
ggplot(df, aes(.id, value, color=.id)) +
  geom_point(size=4, alpha=0.6) + 
  facet_wrap(~variable, scales="free") +
  theme_bw(base_size=20) +
  theme(axis.text.x = element_blank())

Species level metrics

splevel <- specieslevel(small1976, index="degree")
head(splevel[[1]], n=3)
head(splevel[[2]], n=3)

Species level metrics - roll your own

  • Pick a network in the bipartite package
  • Calculate one species level metric for all species in that network
  • Plot the metrics for each species, both plants and pollinators

Community structure

  • Diversity indices
  • Rarefaction - comparing diverity in different samples
  • Ordination

--- &twocol align1:left

Diversity indices

*** =left

Shannon-Weaver

library(vegan)
data(BCI)
bci_subset <- BCI[1:3,]
head(bci_subset)[,10:11]
(H <- diversity(bci_subset))

*** =right

Simpson

diversity(bci_subset, index="simpson")

Evenness

H <- diversity(bci_subset)
H/log(specnumber(bci_subset))

Rarefaction

data(BCI)
nosp <- specnumber(BCI)
raremax <- min(rowSums(BCI))
nosp_rare <- rarefy(BCI, raremax)
df <- data.frame(nosp, nosp_rare)
head(df)

Make this plot

ggplot(df, aes(nosp, nosp_rare)) +
  geom_point(size=4) +
  theme_grey(base_size=20) +
  scale_x_continuous(limits=c(70,110)) +
  scale_y_continuous(limits=c(70,110))

--- &twocol

Rarefaction curves

*** =left

raremax <- min(rowSums(BCI))
rarecurve(BCI, step=20, sample=raremax, col="blue", cex=0.6)

*** =right

data(BCI)
sp1 <- specaccum(BCI)
sp2 <- specaccum(BCI, "random")
plot(sp1, ci.type="poly", col="blue", lwd=2, ci.lty=0, ci.col="lightblue")
boxplot(sp2, col="yellow", add=TRUE, pch="+")

Ordination

data(dune)
ord <- metaMDS(dune, trace=0)
plot(ord)

In ggplot2!

ord_axes <- data.frame(scores(ord))
ord_spp <- data.frame(scores(ord, display="species"))
ggplot(ord_axes, aes(NMDS1, NMDS2)) +
  geom_point(shape=21) +
  geom_point(data=ord_spp, aes(NMDS1, NMDS2), shape=3, colour="red") +
  coord_fixed()

But I want species names

ord_spp$spp <- row.names(ord_spp)
ggplot(ord_axes, aes(NMDS1, NMDS2)) +
  geom_point(shape=21) +
  geom_text(data=ord_spp, aes(NMDS1, NMDS2, label=spp), colour="red") +
  coord_fixed()

--- &twocol

Comparison between treatments

*** =left

data(dune)
data(dune.env)
dune[1:3,1:3]
dune.env[1:3,1:3]
help(dune.env)

*** =right

data.frame(adonis(dune ~ Management*A1, data=dune.env, permutations=99)$aov.tab)

Do your own ordination

  • Take one of the datasets in the vegan pacage
  • Pick an ordination method
    • cca: constrained ordination
    • add environmental variables to ordination, see envfit
    • adonis: Permutational Multivariate Analysis of Variance
  • Try it out!

Resources

Jump to Line
Something went wrong with that request. Please try again.