In [None]:
library(gmodels)
library(ggmosaic)
library(dplyr)
library(tidyverse)

## default theme for ggplot
theme_set(theme_bw())

## setting default parameters for mosaic plots
mosaic_theme = theme(axis.text.x = element_text(angle = 90,
                                                hjust = 1,
                                                vjust = 0.5),
                     axis.text.y = element_blank(),
                     axis.ticks.y = element_blank())

In [None]:
data_dir <- "../data/bank-full.csv"
bank_data <- read.csv(data_dir)

In [None]:
sprintf("# rows: %d - # columns: %d", nrow(bank_data), ncol(bank_data))
head(bank_data,10)

In [None]:
summary(bank_data)

In [None]:
CrossTable(bank_data$y) # from gmodels

In [None]:
hist(bank_data$age)

In [None]:
bank_data %>% 
  summarise_all(list(~sum(. == "unknown"))) %>%
  gather(key = "variable", value = "nr_unknown") %>% 
  arrange(-nr_unknown)

In [None]:
# show histograms for the 'age' feature for the different values of the 'y' column
bank_data %>% 
  ggplot() +
  aes(x = age) +
  geom_bar() +
  geom_vline(xintercept = c(30, 60), 
             col = "red",
             linetype = "dashed") +
  facet_grid(y ~ .,
             scales = "free_y") +
  scale_x_continuous(breaks = seq(0, 100, 5))

In [None]:
bank_data %>% 
  ggplot() +
  geom_mosaic(aes(x = product(y, job), fill = y)) +
  mosaic_theme +
  xlab("Job") +
  ylab(NULL)