In [None]:
library(tidyverse)
library(dslabs)

In [None]:
data(gapminder)

In [None]:
gapminder %>% as_tibble()

In [None]:
gapminder %>% filter (year==2015 & country %in% c("Sri Lanka","Turkey","Poland","South Korea","Malaysia","Russia","Pakistan","Vitnam","Thailand","South Africa")) %>% 
                      select(country,infant_mortality) %>% arrange(desc(infant_mortality))

In [None]:
#scatter plot
p<- gapminder %>%filter(year==1962) %>% ggplot(aes(fertility,life_expectancy))+geom_point()
print(p)

In [None]:
p<-p+geom_point(aes(color=continent))
print(p)

In [None]:
#with 2012 data
p1<-gapminder %>% filter(year==2012) %>% ggplot(aes(fertility,life_expectancy,color=continent))+geom_point()
print(p1)


In [None]:
#Facetting
p<- gapminder %>%filter(year %in% c(1962,2012)) %>% 
ggplot(aes(fertility,life_expectancy,col=continent))+
geom_point()+
facet_grid(continent~year)
print(p)


In [None]:
#facet Wrap
years<-c(1962,1980,1990,2000,2012)
continents<-c("Europe","Asia","Americas")
p<-gapminder %>% filter(year %in% years & continent %in% continents)%>% 
ggplot(aes(fertility,life_expectancy,color=continent))
p<-p+geom_point()+
facet_wrap(.~year,scales="free")
print(p)

In [None]:
#time series plot
p2<- gapminder %>% filter(country=="United States")%>%
ggplot(aes(year,fertility))+
geom_line()
print(p2)

In [None]:
countries<-c("South Korea","Germany")
p2<-gapminder %>% filter(country %in% countries) %>% 
ggplot(aes(year,fertility,color=country))+
geom_line()
print(p2)

In [None]:
labels<-data.frame(country=countries,x=c(1967,1974),y=c(6,2.5))
p4<-p2+geom_text(data=labels,aes(x,y,label=country),size=4)+
theme(legend.position = "none")
print(p4)

In [None]:
labels2<-data.frame(country=countries,x=c(1965,1976),y=c(62,71))
p3<-gapminder %>%filter(country %in% countries) %>% 
ggplot(aes(year,life_expectancy,col=country))+
geom_line()+
geom_text(data=labels2,aes(x,y,label=country),size=4)+
theme(legend.position = "none")
print(p3)

In [None]:
#data transformation
gapminder <-gapminder %>% mutate(dollars_per_day=gdp/population/365)

In [None]:
past_year<-1970
gapminder %>% filter(year==past_year & !is.na(gdp)) %>%
ggplot(aes(log2(dollars_per_day)))+
geom_histogram(binwidth=0.2,color="blue")

In [None]:
gapminder %>% filter(year==past_year) %>%
ggplot(aes(log10(population)))+
geom_histogram(binwidth=0.3,color="black")

In [None]:
filter(gapminder,year==past_year) %>%
summarise(min=min(population),max=max(population))

In [None]:
#Comparing multiple distributions with boxplots and ridge plots
gapminder %>%
   filter(year==past_year & !is.na(gdp)) %>%
   mutate(region=reorder(region,dollars_per_day,FUN=median)) %>%
   ggplot(aes(dollars_per_day,region))+
   geom_point()+
   scale_x_continuous(trans="log2")

In [None]:
gapminder<-gapminder %>% mutate(group=case_when(
                            region %in% c("Western Europe","Northern Europe","Southern Europe","Northern America","Australia and New zeland")~"West",
                            region %in% c("Eastern Asia","South-Eastern Asia")~"East Asia",
                            region %in% c("Caribbean","Central America","South America")~"Latin America",
                            continent=="Africa" & region !="Northern Africa"~"Sub-Saharan",
                            TRUE~"others"))

gapminder<-gapminder %>%mutate(group=factor(group,levels=c("others","Latin America","East Asia","Sub-Saharan","West")))

In [None]:
p5<-gapminder %>% filter(year==past_year & !is.na(gdp)) %>% 
                  ggplot(aes(group,dollars_per_day,fill=group))+
                  geom_boxplot()+
                  scale_y_continuous(trans="log2")+
                  xlab("Country Zone")+
                  theme(axis.text.x=element_text(angle=90,hjust=1))
print(p5)

In [None]:
p5 +geom_point(alpha=0.5)

In [None]:
library(ggridges)
p6<-gapminder %>% 
     filter(year==past_year & !is.na(dollars_per_day)) %>% 
     ggplot(aes(dollars_per_day,group))+
     scale_x_continuous(trans="log2")
p6+geom_density_ridges(aes(fill=group))