Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
branch: gh-pages
Fetching contributors…

Cannot retrieve contributors at this time

543 lines (388 sloc) 12.951 kb
title subtitle date author job framework highlighter hitheme widgets mode assets
Intro to R
data manipluation, vizualization, communities, networks
2013-07-31
Scott Chamberlain
io2012
highlight.js
tomorrow
selfcontained

Where to find this presentation




The presentation = http://recology.info/talks/sfu/workshop2




The code (written in R Markdown) = https://github.com/sckott/talks/blob/gh-pages/sfu/workshop2/index.Rmd


The plan

  • Data manipulation
  • General visualizations
  • Networks/graphs: analyses and viz
  • Community ecology: analyses and viz

Data manipulation





plyr + reshape2 =


Intro to subsetting





See here https://github.com/hadley/devtools/wiki/Subsetting


A few quick examples

library(reshape2)
head(iris)[1:3,]
iris_m <- melt(iris, id.vars=5)
head(iris_m)[1:3,1:3]
dcast(iris_m, Species ~ variable, fun.aggregate=sd)

What can we do with this?

manipulate, then plot

iris_m <- melt(iris, id.vars=5)
ggplot(iris_m, aes(Species, value)) +
  geom_boxplot() +
  facet_wrap(~ variable) +
  theme_grey(base_size=20)

Your turn





Read in a data file, melt the data, then cast the data, applying a function to that data.


plyr - split, apply, combine

Function naming scheme: first letter of source R object + first letter of output R object + ply

  • data.frame = ddply, dlply, daply
  • list = llply, ldply, laply
  • array/vector: aaply, alply, adply
  • matrix: maply , mlply, mdply

data <- data.frame(x = c("a", "a", "b", "b", "c", "c"), y = c(2, 4, 0, 5, 5, 10))
ddply(data, .(x), summarise, mean(y))
data <- data.frame(x = c("a", "a", "b", "b", "c", "c"), y = c(2, 4, 0, 5, 5, 10))
data_list <- list(data, data, data)
ldply(data_list, function(z) ddply(z, .(x), summarise, mean(y)))

DIY plyr





Perform split-apply-combine for an R object of your choice.


ggplot2 terminology

  • ggplot - The main function where you specify the dataset and variables to plot
  • geom - geometric objects
    • geom_point(), geom_bar(), geom_density(), geom_line(), geom_area()
  • aes - aesthetics
    • shape, alpha (transparency), color, fill, linetype
  • scale - Define how your data will be plotted
    • continuous, discrete, log

install.packages("ggplot2")
library(ggplot2)
ggplot(data=iris, aes(Sepal.Length, Sepal.Width)) +
  geom_point() + 
  theme_grey(base_size=20)

Building blocks, mix and match

ggplot(data=iris, aes(Sepal.Length, Sepal.Width, colour=Species)) +
  geom_point() + 
  theme_grey(base_size=20)

OR, just do

p <- ggplot(data=iris, aes(Sepal.Length, Sepal.Width, colour=Species))
p <- p + geom_point()
p + theme_grey(base_size=20)

Color by species

ggplot(data=iris, aes(Sepal.Length, Sepal.Width, colour=Species)) +
  geom_point() +
  theme_grey(base_size=20)

--- &twocol

Adjust the size (and color) of points

*** =left

ggplot(data=iris, aes(Sepal.Length, Sepal.Width, colour=Species)) +
  geom_point(size = 6) +
  theme_grey(base_size=20)

*** =right

ggplot(data=iris, aes(Sepal.Length, Sepal.Width, color=Species, size=Species)) +
  geom_point() +
  theme_grey(base_size=20)

Facet by species

ggplot(data=iris, aes(Sepal.Length, Sepal.Width)) +
  geom_point() +
  facet_wrap(~ Species) +
  theme_grey(base_size=20)

--- &twocol

Combine geoms

*** =left

Combine geom_boxplot and geom_point

ggplot(data=iris, aes(Species, Petal.Width)) +
  geom_boxplot() +
  geom_point()

*** =right

Order matters!

ggplot(data=iris, aes(Species, Petal.Width)) +
  geom_point() +
  geom_boxplot()

Make this plot

ggplot(data=iris, aes(Petal.Width, fill=Species)) +
  geom_histogram() +
  facet_wrap(~ Species) +
  theme_grey(base_size=20)

Saving plots using ggplot2

If the plot is on your screen

ggsave('~/path/to/figure/filename.png')

If your plot is assigned to an object

ggsave(plot1, file = "~/path/to/figure/filename.png")

Specify a size

ggsave(file = "/path/to/figure/filename.png", width = 6,
height =4)

Or any format (pdf, png, eps, svg, jpg)

ggsave(file = "/path/to/figure/filename.jpg")
ggsave(file = "/path/to/figure/filename.pdf")

Networks

  • Visualizations
  • Analyses
    • Network level metrics
    • Species level metrics

Network vizualizations

library(bipartite)
plotweb(small1976)

Another way to visualize networks

visweb(small1976, labsize=2)

Variety of other plotting options

# Eg 1
plotweb(small1976, high.lablength=3, low.lablength=0, arrow="down")

# Eg 2
plotweb(small1976, text.rot=90, arrow="down.center", col.interaction="wheat2",
  y.lim=c(-1,2.5))

# Eg 3
low.abun = round(runif(dim(small1976)[1],1,40))
names(low.abun) <- rownames(small1976)
plotweb(small1976, text.rot=90, low.abun=low.abun, col.interaction="purple", 
  y.width.low=0.05, y.width.high=0.05)

Network metrics - of the whole community

Start with the bipartite package. Others include X, Y, and Z.

res <- networklevel(small1976, index="binary")
res

Let's compare some metrics among networks!!!

library(reshape2)
networks <- list(Safariland,barrett1987,bezerra2009,elberling1999,inouye1988,kato1990,kevan1970)
res <- llply(networks, function(x) networklevel(x, index=c('connectance','links per species','nestedness')))
names(res) <- c('Safariland','barrett1987','bezerra2009','elberling1999','inouye1988','kato1990','kevan1970')
df <- melt(ldply(res), id.vars='.id')
ggplot(df, aes(.id, value)) +
  geom_point(size=4, alpha=0.6) + 
  facet_wrap(~variable, scales="free") +
  theme_bw(base_size=20) +
  theme(axis.text.x = element_blank())

Make this plot

networks <- list(Safariland,barrett1987,bezerra2009,elberling1999,inouye1988,kato1990,kevan1970)
res <- llply(networks, function(x) networklevel(x, index=c('web asymmetry','H2')))
names(res) <- c('Safariland','barrett1987','bezerra2009','elberling1999','inouye1988','kato1990','kevan1970')
df <- melt(ldply(res), id.vars='.id')
ggplot(df, aes(.id, value, color=.id)) +
  geom_point(size=4, alpha=0.6) + 
  facet_wrap(~variable, scales="free") +
  theme_bw(base_size=20) +
  theme(axis.text.x = element_blank())

Species level metrics

splevel <- specieslevel(small1976, index="degree")
head(splevel[[1]], n=3)
head(splevel[[2]], n=3)

Species level metrics - roll your own

  • Pick a network in the bipartite package
  • Calculate one species level metric for all species in that network
  • Plot the metrics for each species, both plants and pollinators

Community structure

  • Diversity indices
  • Rarefaction - comparing diverity in different samples
  • Ordination

--- &twocol align1:left

Diversity indices

*** =left

Shannon-Weaver

library(vegan)
data(BCI)
bci_subset <- BCI[1:3,]
head(bci_subset)[,10:11]
(H <- diversity(bci_subset))

*** =right

Simpson

diversity(bci_subset, index="simpson")

Evenness

H <- diversity(bci_subset)
H/log(specnumber(bci_subset))

Rarefaction

data(BCI)
nosp <- specnumber(BCI)
raremax <- min(rowSums(BCI))
nosp_rare <- rarefy(BCI, raremax)
df <- data.frame(nosp, nosp_rare)
head(df)

Make this plot

ggplot(df, aes(nosp, nosp_rare)) +
  geom_point(size=4) +
  theme_grey(base_size=20) +
  scale_x_continuous(limits=c(70,110)) +
  scale_y_continuous(limits=c(70,110))

--- &twocol

Rarefaction curves

*** =left

raremax <- min(rowSums(BCI))
rarecurve(BCI, step=20, sample=raremax, col="blue", cex=0.6)

*** =right

data(BCI)
sp1 <- specaccum(BCI)
sp2 <- specaccum(BCI, "random")
plot(sp1, ci.type="poly", col="blue", lwd=2, ci.lty=0, ci.col="lightblue")
boxplot(sp2, col="yellow", add=TRUE, pch="+")

Ordination

data(dune)
ord <- metaMDS(dune, trace=0)
plot(ord)

In ggplot2!

ord_axes <- data.frame(scores(ord))
ord_spp <- data.frame(scores(ord, display="species"))
ggplot(ord_axes, aes(NMDS1, NMDS2)) +
  geom_point(shape=21) +
  geom_point(data=ord_spp, aes(NMDS1, NMDS2), shape=3, colour="red") +
  coord_fixed()

But I want species names

ord_spp$spp <- row.names(ord_spp)
ggplot(ord_axes, aes(NMDS1, NMDS2)) +
  geom_point(shape=21) +
  geom_text(data=ord_spp, aes(NMDS1, NMDS2, label=spp), colour="red") +
  coord_fixed()

--- &twocol

Comparison between treatments

*** =left

data(dune)
data(dune.env)
dune[1:3,1:3]
dune.env[1:3,1:3]
help(dune.env)

*** =right

data.frame(adonis(dune ~ Management*A1, data=dune.env, permutations=99)$aov.tab)

Do your own ordination

  • Take one of the datasets in the vegan pacage
  • Pick an ordination method
    • cca: constrained ordination
    • add environmental variables to ordination, see envfit
    • adonis: Permutational Multivariate Analysis of Variance
  • Try it out!

Resources

Jump to Line
Something went wrong with that request. Please try again.