Bruce Swihart
November 5, 2014
[1] The original paper: Lasagna Plots: A Saucy Alternative to Spaghetti Plots
[2] The follow-up paper for SAS code: Lasagna Plots Made in Different (Statistical) Ovens
There is an appendix for [1] that has some R
code, but no one likes it. At the time the only option for supplemental materials was to put R code into a PDF, thus introducing a hurdle for easy usage. This repo on github has been long overdue. This repo is an R-package, and can be installed and ready to go with a few lines in R.
#Quick start!:
library(devtools)
install_github("swihart/lasagnar")
library(fields)
library(lasagnar)
library(ggplot2)
library(reshape2)
library(RColorBrewer)
library(colorspace)
## get me a matrix with rownames:
mat <- matrix(1:100, nrow=10, ncol=10, byrow=TRUE)[sample(1:10),]
rownames(mat) <- letters[1:nrow(mat)]
lasagna(mat)
Note: Windows machines will need to install Rtools. All machines will need to have run install.packages("devtools")
at the R prompt once.
#Base and ggplot2 implementation:
## the matrix containing data for Figure 02a
H.mat <- matrix(NA, nrow=4, ncol=6)
H.mat[1, 1:6] = 100*c(2, 1, 1, 1, 1, 2)
H.mat[2, 1:6] = 100*c(2, 2, 2, 3, 2, 1)
H.mat[3, 1:6] = 100*c(2, 2, 1, 1, 1, 3)
H.mat[4, 1:6] = 100*c(3, 3, 2, 1, 2, 3)
## set rownames with ids
rownames(H.mat)<-c("P1","T1","P2","T2")
## set colnames with time / location / column index:
colnames(H.mat)<-seq(ncol(H.mat))
## name your dimensions
names(dimnames(H.mat))<-c('Subject','Time')
## print out to see it!
H.mat
## Time
## Subject 1 2 3 4 5 6
## P1 200 100 100 100 100 200
## T1 200 200 200 300 200 100
## P2 200 200 100 100 100 300
## T2 300 300 200 100 200 300
## Fiddle with the margins:
par(mai = c(.34,.39,.34,.09))
## base:
lasagna(H.mat)
## ggplot:
gglasagna(H.mat)
## Choose a palette
palette <- brewer.pal(4, "PuOr")[-2]
lasagna(H.mat, col=palette)
gglasagna(H.mat, col=palette)
## Legends:
lasagna(H.mat, col=palette, legend=TRUE)
gglasagna(H.mat, col=palette, legend=TRUE)
## however, you may want lasagna_plain() and customize the plot how you wish
## with axes statements, etc:
## variables to specify the scalar CEX multiplier for title, labels, and axis:
fig02.main <- 1.75
fig02.lab <- 1.75
fig02.axis <- 1.75
##
## Base:
##
## initial lasagna_plain() call followed by may title() and axis() calls:
lasagna_plain(H.mat,
col=palette,
axes=F,
xlab = "",
ylab = "", cex.lab=fig02.lab, tck=0, mgp=c(0,0,0))
box()
title("(A) Initial Lasagna Plot", adj=0)
axis(1, seq(0,1,1/5), 1:6, cex.axis=fig02.axis, tck=0, mgp=c(0,.50,0))
axis(2, seq(0,1,1/3), rev(c("P1","T1","P2","T2")),las=1,
cex.axis=fig02.axis, tck=0, mgp=c(0,.2,0))
## next two axis() calls add grid-lines:
axis(1,
c(1/10,3/10,5/10,7/10,9/10),
lab=NA,
tck=1,
lty=1,
col="black")
axis(2,
c(1/6,3/6,5/6),
lab=NA,
tck=1,
lty=1,
col="black")
##
## ggplot2: There is currently no gglasagna_plain(), but below is the ggplot code
##
##library(ggplot2)
##library(reshape2)
H.df<-melt(H.mat)
## EDIT to nograpes answer:
## reorder the factor that is Subject by setting levels to the reverse order of rownames
H.df$Subject <- factor(H.df$Subject, levels = rev(rownames(H.mat)))
# If you want those exact colours the author used:
col<-palette[match(ordered(H.df$value),levels(ordered(H.df$value)))]
ggplot(H.df,aes(x=Time,y=Subject,fill=col)) +
geom_tile(colour='black') + scale_fill_identity() +
## add title:
ggtitle("(A) Initial Lasagna Plot")+
## adjust size and title position:
theme(axis.text=element_text(size=30),
axis.title=element_text(size=14,face="bold"),
plot.title = element_text(hjust = 0))+
## get all the breaks:
scale_x_discrete(breaks=1:6) +
## and for some reason need this line to zoom:
coord_cartesian(xlim=c(.49,6.51))
#Four sorting functions: wr()
, er()
, wc()
, ec()
.
wr()
sorts within-row: if the values being displayed are discrete, usewr.disc()
; continuous usewr.cont()
.lasagna_plain(wr.disc(H.mat))
generates a within-row sortedH.mat
where the smallest value (and corresponding color) is left-most; to control this use thecolorSeq
option:lasagna_plain(wr.disc(H.mat), colorSeq=c(300,100,200))
(see code chunk below for full code; axes, etc.):
Visualizing a within-row sort can be done with the following:
## quick and dirty within-row sort viz:
lasagna(wr.disc(H.mat),col=palette, main="(B) Within-row sorting of (A): Orange Left-most")
However, the axes might need more customization. In that case, consider using lasagna_plain()
(code is below):
And here is an example of sorting dark-purple first (code below):
Code for orange-first within-row sort (default call to wr.disc()
because orange is coded to lowest value):
## within-row
lasagna_plain(wr.disc(H.mat),
col=palette,
axes=F,
xlab = "",
ylab = "", cex.lab=fig02.lab, tck=0, mgp=c(0,.50,0))
box()
title("(B) Within-row sorting of (A): Orange Left-most", adj=0)
## axis(1, seq(0,1,1/5), 1:6, cex.axis=fig02.axis, tck=0, mgp=c(0,.1,0))
axis(1, c(1/10,3/10,5/10,7/10,9/10),c("1/6","1/3","1/2","2/3","5/6") , cex.axis=fig02.axis, tck=0, mgp=c(0,.50,0))
axis(2,
seq(0,1,1/3),
rev(c("P1",
"T1",
"P2",
"T2")),
las=1,
cex.axis=fig02.axis, tck=0, mgp=c(0,.2,0))
axis(1,
c(1/10,3/10,5/10,7/10,9/10),
lab=NA,
tck=1,
lty=1,
col="black") # grid lines
axis(2,
c(1/6,3/6,5/6),
lab=NA,
tck=1,
lty=1,
col="black") # grid lines
Code for dark-purple-first within-row sort (call to wr.disc()
with colorSeq
because dark-purple is coded to highest value):
## within-row with colorSeq order control:
lasagna_plain(wr.disc(H.mat, colorSeq=c(300,100,200)),
col=palette,
axes=F,
xlab = "",
ylab = "", cex.lab=fig02.lab, tck=0, mgp=c(0,.50,0))
box()
title("(B) Within-row sorting of (A): Dark Purple Left-most", adj=0)
## axis(1, seq(0,1,1/5), 1:6, cex.axis=fig02.axis, tck=0, mgp=c(0,.1,0))
axis(1, c(1/10,3/10,5/10,7/10,9/10),c("1/6","1/3","1/2","2/3","5/6") , cex.axis=fig02.axis, tck=0, mgp=c(0,.50,0))
axis(2,
seq(0,1,1/3),
rev(c("P1",
"T1",
"P2",
"T2")),
las=1,
cex.axis=fig02.axis, tck=0, mgp=c(0,.2,0))
axis(1,
c(1/10,3/10,5/10,7/10,9/10),
lab=NA,
tck=1,
lty=1,
col="black") # grid lines
axis(2,
c(1/6,3/6,5/6),
lab=NA,
tck=1,
lty=1,
col="black") # grid lines
er()
sorts entire rows, preserving the temporal (columns) information. The default is to sort based on the percentage of occurences of the lowest value, and then the subject with the highest percentage will be the bottom row; the lowest will be the top.
Visualizing an entire-row sort can be done with the following, where orange is the color coded to the lowest value:
## quick and dirty entire-row sort viz:
lasagna(er(H.mat),col=palette, main="(C) Entire-row sorting of (A)")
For dark-purple:
## quick and dirty entire-row sort viz:
lasagna(er(H.mat, orderVar=300),col=palette, main="(C) Entire-row sorting of (A)")
Also, do not need er()
whatsoever. Below we calculate the mean value for each subject and then entire-row sort where the order is based on the subject-specific mean:
## quick and dirty within-row sort viz:
rM = rowMeans(H.mat)
## see the ordering
sort(rM)
## P1 P2 T1 T2
## 133.3 166.7 200.0 233.3
## visualize lasagna plot with corresponding ordering
lasagna(H.mat[order(rM),],col=palette, main="(C) Entire-row sorting of (A)")
wc()
sorts within-columns, scrambling the subject-specific nature of the rows but revealing a group-level temporal patterns. As withwr()
there is a discrete and continuous implementationwc.disc()
andwc.cont()
.
Visualizing a within-column sort can be done with the following, where orange is the color coded to the lowest value:
## quick and dirty within-column sort viz:
lasagna(wc.disc(H.mat),col=palette, main="(D) Within-column sorting of (A)")
For orange (100) on top, dark-purple (300) middle, light-purple (200) on bottom:
## specify colorSeq top to bottom:
lasagna(wc.disc(H.mat, colorSeq=c(100,300,200)),col=palette, main="(D) Within-column sorting of (A)")
#Note: in progress below this line. ##Note: in progress below this line. ###Note: in progress below this line.
Something came to my attention while working on er(): no one wants to handle the axes comments themselves ---- rewrite lasagna to include these and do automatic sorting (for instance, if you don't include the axes comments in lasagna, you'll have to do the sorting of the subject ids outside of the er() sorting...I mean, what a mess. )
er()
sorts entire-row: if the values being displayed are discretelasagna_plain(er(H.mat))
generates an entire-row sortedH.mat
where the smallest value (and corresponding color) is left-most; to control this use theorderVar
option:lasagna_plain(wr.disc(H.mat), colorSeq=c(300,100,200))
(see code chunk below for full code; axes, etc.):
## within-row
lasagna_plain(wr.disc(H.mat),
col=palette,
axes=F,
xlab = "",
ylab = "", cex.lab=fig02.lab, tck=0, mgp=c(0,.50,0))
box()
title("(B) Within-row sorting of (A): Orange Left-most", adj=0)
## axis(1, seq(0,1,1/5), 1:6, cex.axis=fig02.axis, tck=0, mgp=c(0,.1,0))
axis(1, c(1/10,3/10,5/10,7/10,9/10),c("1/6","1/3","1/2","2/3","5/6") , cex.axis=fig02.axis, tck=0, mgp=c(0,.50,0))
axis(2,
seq(0,1,1/3),
rev(c("P1",
"T1",
"P2",
"T2")),
las=1,
cex.axis=fig02.axis, tck=0, mgp=c(0,.2,0))
axis(1,
c(1/10,3/10,5/10,7/10,9/10),
lab=NA,
tck=1,
lty=1,
col="black") # grid lines
axis(2,
c(1/6,3/6,5/6),
lab=NA,
tck=1,
lty=1,
col="black") # grid lines
## within-row with colorSeq order control:
lasagna_plain(wr.disc(H.mat, colorSeq=c(300,100,200)),
col=palette,
axes=F,
xlab = "",
ylab = "", cex.lab=fig02.lab, tck=0, mgp=c(0,.50,0))
box()
title("(B) Within-row sorting of (A): Dark Purple Left-most", adj=0)
## axis(1, seq(0,1,1/5), 1:6, cex.axis=fig02.axis, tck=0, mgp=c(0,.1,0))
axis(1, c(1/10,3/10,5/10,7/10,9/10),c("1/6","1/3","1/2","2/3","5/6") , cex.axis=fig02.axis, tck=0, mgp=c(0,.50,0))
axis(2,
seq(0,1,1/3),
rev(c("P1",
"T1",
"P2",
"T2")),
las=1,
cex.axis=fig02.axis, tck=0, mgp=c(0,.2,0))
axis(1,
c(1/10,3/10,5/10,7/10,9/10),
lab=NA,
tck=1,
lty=1,
col="black") # grid lines
axis(2,
c(1/6,3/6,5/6),
lab=NA,
tck=1,
lty=1,
col="black") # grid lines