# Classical package: H. Wickham’s ggplot2 Vignette

<h6>Working on Housing Prices</h6>

In [None]:
housing <- read.csv("landdata-states.csv")
head(housing[1:5])

<h6>Base graphics histogram example:</h6>

In [None]:
hist(housing$Home.Value)

<h6>ggplot2 histogram example:</h6>

In [None]:
library(ggplot2)
library(scales)
ggplot(housing, aes(x = Home.Value)) + geom_histogram()

<h6>Base colored scatter plot example:</h6>

In [None]:
plot(Home.Value ~ Date, data=subset(housing, State == "MA"))
points(Home.Value ~ Date, col="red", data=subset(housing, State == "TX"))
legend(1975, 400000, c("MA", "TX"), title="State", col=c("black", "red"), pch=c(1, 1))

<h6>ggplot2 colored scatter plot example:</h6>

In [None]:
ggplot(subset(housing, State %in% c("MA", "TX")), aes(x=Date, y=Home.Value, color=State))+ geom_point()

# Geometric Objects And Aesthetics

<h5>Points (Scatterplot)</h5>

In [None]:
hp2001Q1 <- subset(housing, Date == 2001.25) 
ggplot(hp2001Q1, aes(y = Structure.Cost, x = Land.Value)) + geom_point()

In [None]:
ggplot(hp2001Q1, aes(y = Structure.Cost, x = log(Land.Value))) + geom_point()

<h6>Lines (Prediction Line)</h6>

In [None]:
hp2001Q1$pred.SC <- predict(lm(Structure.Cost ~ log(Land.Value), data = hp2001Q1))
p1 <- ggplot(hp2001Q1, aes(x = log(Land.Value), y = Structure.Cost))
p1 + geom_point(aes(color = Home.Value)) + geom_line(aes(y = pred.SC))

<h6>Smoothers</h6>

In [None]:
p1 + geom_point(aes(color = Home.Value)) + geom_smooth()

<h6>Text (Label Points)</h6>

In [None]:
p1 + geom_text(aes(label=State), size = 3)

In [None]:
library("ggrepel")
p1 + geom_point() + geom_text_repel(aes(label=State), size = 3)

<h6>Aesthetic Mapping VS Assignment</h6>

In [None]:
p1 + geom_point(aes(size = 2),# incorrect! 2 is not a variable
             color="red") # this is fine -- all points red

<h6>Mapping Variables To Other Aesthetics</h6>

In [None]:
p1 + geom_point(aes(color=Home.Value, shape = region))

In [None]:
dat <- read.csv("Rgraphics/dataSets/EconomistData.csv")
head(dat)
ggplot(dat, aes(x = CPI, y = HDI, size = HDI.Rank)) + geom_point()

# Statistical Transformations

In [None]:
args(geom_histogram)
args(stat_bin)

<h6>Setting Statistical Transformation Arguments</h6>

In [None]:
p2 <- ggplot(housing, aes(x = Home.Value))
p2 + geom_histogram()

In [None]:
p2 + geom_histogram(stat = "bin", binwidth=4000)

<h6>Changing The Statistical Transformation</h6>

In [None]:
housing.sum <- aggregate(housing["Home.Value"], housing["State"], FUN=mean)
rbind(head(housing.sum), tail(housing.sum))

In [None]:
ggplot(housing.sum, aes(x=State, y=Home.Value)) + geom_bar(stat="identity")

# Scales

<h6>Scale Modification Examples</h6>

In [None]:
p3 <- ggplot(housing, aes(x = State, y = Home.Price.Index)) + theme(legend.position="top", axis.text=element_text(size = 6))
(p4 <- p3 + geom_point(aes(color = Date), alpha = 0.5, size = 1.5,  position = position_jitter(width = 0.25, height = 0)))

In [None]:
p4 + scale_x_discrete(name="State Abbreviation") +
  scale_color_continuous(name="", breaks = c(1976, 1994, 2013),labels = c("'76", "'94", "'13"))

In [None]:
p4 +
  scale_x_discrete(name="State Abbreviation") +
  scale_color_continuous(name="", breaks = c(1976, 1994, 2013), labels = c("'76", "'94", "'13"), low = "blue", high = "red")

In [None]:
p4 +
  scale_color_continuous(name="", breaks = c(1976, 1994, 2013), labels = c("'76", "'94", "'13"), low = muted("blue"), high = muted("red"))

<h6>Using different color scales</h6>

In [None]:
p4 +
  scale_color_gradient2(name="", breaks = c(1976, 1994, 2013), labels = c("'76", "'94", "'13"), low = muted("blue"),
high = muted("red"), mid = "gray60", midpoint = 1994)

# Faceting

In [None]:
p5 <- ggplot(housing, aes(x = Date, y = Home.Value))
p5 + geom_line(aes(color = State))

In [None]:
(p5 <- p5 + geom_line() + facet_wrap(~State, ncol = 10))

# Themes

In [None]:
p5 + theme_linedraw()

In [None]:
p5 + theme_light()

<h6>Overriding theme defaults</h6>

In [None]:
p5 + theme_minimal() + theme(text = element_text(color = "turquoise"))

<h6>Creating and saving new themes</h6>

In [None]:
theme_new <- theme_bw() +
  theme(plot.background = element_rect(size = 1, color = "blue", fill = "black"),
        text=element_text(size = 12, family = "Serif", color = "ivory"),
        axis.text.y = element_text(colour = "purple"),
        axis.text.x = element_text(colour = "red"),
        panel.background = element_rect(fill = "pink"),
        strip.background = element_rect(fill = muted("orange")))

p5 + theme_new

<h6>Map Aesthetic To Different Columns</h6>

In [None]:
library(tidyr)
housing.byyear <- aggregate(cbind(Home.Value, Land.Value) ~ Date, data = housing, mean)
home.land.byyear <- gather(housing.byyear, value = "value", key = "type", Home.Value, Land.Value)
ggplot(home.land.byyear, aes(x=Date, y=value, color=type)) + geom_line()