Importing relevant libraries

In [None]:
library(tidyverse)
library(readxl)
library(repr)
options(repr.matrix.max.rows = 10)

Loading data

In [None]:
cius_2020_dem <- read_excel("data/CIUS_2020_DEM.xlsx") 
cius_2018_dem <- read_excel("data/CIUS_2018_DEM.xlsx")
cius_2020_dem
cius_2018_dem

Cleaning and filtering data

In [None]:
cius_2020_dem_clean <- cius_2020_dem |> filter(UI_060A <= 6) |> select(GENDER, AGE_GRP)
cius_2018_dem_clean <- cius_2018_dem |> mutate(AGE_GRP = as.numeric(AGE_GRP)) |> filter(UI_210A <= 5) |> select(GENDER, AGE_GRP)
cius_2020_dem_clean
cius_2018_dem_clean

Determining number of respondents by gender in 2018

In [None]:
gender_2018 <- cius_2018_dem_clean |> group_by(GENDER) |>
    summarize(count = n()) |>
bind_cols("GENDER_LABEL" = c("male", "female"))
gender_2018


Determining number of respondents by gender in 2020

In [None]:
gender_2020 <- cius_2020_dem_clean |> group_by(GENDER) |>
    summarize(count = n()) |>
bind_cols("GENDER_LABEL" = c("male", "female"))
gender_2020

Plotting gender demographics data

In [None]:
gender_2018_plot <- ggplot(gender_2018, aes(x = GENDER_LABEL, y = count, fill = GENDER_LABEL)) + 
    geom_bar(stat = "identity") + 
   xlab("Gender") +
   ylab("Number of respondents") +
   labs(fill = "Gender") +
  theme(text = element_text(size = 16)) +
ggtitle("Gender distribution of respondents in 2018")
gender_2018_plot
ggsave("images/gender_2018_plot.png", gender_2018_plot)

In [None]:
gender_2020_plot <- ggplot(gender_2020, aes(x = GENDER_LABEL, y = count, fill = GENDER_LABEL)) +
    geom_bar(stat = "identity") + 
   xlab("Gender") +
   ylab("Number of respondents") +
   labs(fill = "Gender") +
  theme(text = element_text(size = 16)) +
ggtitle("Gender distribution of respondents in 2020")
gender_2020_plot
ggsave("images/gender_2020_plot.png", gender_2020_plot)

Determining number of respondents by age group in 2018

In [None]:
age_2018 <- cius_2018_dem_clean |> group_by(AGE_GRP) |>
    summarize(count = n()) |>
bind_cols("AGE_LABEL" = c("15-24", "25-34", "35-44", "45-54", "55-64", "65+"))
age_2018

Determining number of respondents by age group in 2020

In [None]:
age_2020 <- cius_2020_dem_clean |> group_by(AGE_GRP) |>
    summarize(count = n()) |>
bind_cols("AGE_LABEL" = c("15-24", "25-34", "35-44", "45-54", "55-64", "65+"))
age_2020

Plotting age group demographics data

In [None]:
age_2018_plot <- ggplot(age_2018, aes(x = AGE_LABEL, y = count, fill = AGE_LABEL)) + 
    geom_bar(stat = "identity") + 
   xlab("Age group") +
   ylab("Number of respondents") +
   labs(fill = "Age group") +
  theme(text = element_text(size = 16)) +
ggtitle("Age distribution of respondents in 2018")
age_2018_plot
ggsave("images/age_2018_plot.png", age_2018_plot)

In [None]:
age_2020_plot <- ggplot(age_2020, aes(x = AGE_LABEL, y = count, fill = AGE_LABEL)) + 
    geom_bar(stat = "identity") + 
   xlab("Age group") +
   ylab("Number of respondents") +
   labs(fill = "Age group") +
  theme(text = element_text(size = 16)) +
ggtitle("Age distribution of respondents in 2020")
age_2020_plot
ggsave("images/age_2020_plot.png", age_2020_plot)