In [None]:
# Define a vector of package names to be used in the script
package_names_vec <- c(
  "tidyverse",
  "readxl"
)

# Define a function to install and load required packages
package_prep <- function(names_vec) {
  # Iterate through each package name in the provided vector
  for (name in names_vec) {
    # Check if the package is already installed
    if (!require(name, character.only = TRUE)) {
      # Install the package using renv if not already installed
      install.packages(name)
    }
  }
}

# Pipe the vector of package names into the package_prep function
package_names_vec |> package_prep()





if (grepl("kaggle", getwd())) {
  paths <- list(
    data = file.path("/kaggle", "input", "gfi-iran"),
    output = file.path("/kaggle", "working", "output")
  )
} else {
  paths <- list(
    source = file.path("..", "src"),
    data = file.path("..", "data"),
    output = file.path("..", "output")
  )
  for (i in list.files(paths$data, full.names = TRUE)) {
    unzip(i, exdir = paths$data)
  }
  # file.rename(
  #  paths$data |> file.path("IRN_2017_FINDEX_v02_M_EXCEL\\micro_irn_varlabel.xls"),
  #  paths$data |> file.path("micro_irn_varlabel.xls")
  # )
  file.rename(
    paths$data |> file.path("IRN_2017_FINDEX_v02_M_EXCEL\\micro_irn_varname.xls"),
    paths$data |> file.path("micro_irn_varname.xls")
  )
}
if (!dir.exists(paths$data)) {
  stop("INPUT DATA NOT FOUND\n DO NOT RUN THIS CODE.")
} else {
  for (dirs in paths) {
    if (!dir.exists(dirs)) {
      dir.create(dirs)
    }
  }
}


# Load the data and clean

In [None]:
# Define a function to load and process a table
# Parameters:
# - name: Name of the table to be assigned in the global environment
# - path: Path to the file to be read
# - argyear: Year to be added as a column in the resulting table
# - reading_func: Function to read the file (e.g., read_csv, read_excel)
load_tbl <- function(name, path, argyear, reading_func) {
  # Read the file, process it, and store it in a variable 'result'
  result <- file.path(path) |> # Generate the full file path
    reading_func() |> # Read the file using the provided reading function
    mutate(
      year = argyear, # Add a 'year' column with the specified year
      ID = row_number() # Add an 'ID' column with sequential row numbers
    ) |>
    select(where(~ !all(is.na(.x)))) # Select only the columns that are not entirely NA

  # Assign the processed table to the specified name in the global environment
  assign(name, result, envir = .GlobalEnv)
}

# Call the 'load_tbl' function to load and process data for the year 2021
load_tbl(
  "gfi_2021_tbl", # Name of the output table
  file.path(paths$data, "micro_irn.csv"), # File path
  2021, # Year to assign
  read_csv # Reading function
)

# Call the 'load_tbl' function to load and process data for the year 2017 (variable labels)
load_tbl(
  "gfi_2017_tbl", # Name of the output table
  file.path(paths$data, "micro_irn_varname.xls"), # File path
  2017, # Year to assign
  read_excel # Reading function
)


In [None]:
gfi_2021_cleaned_tbl <- gfi_2021_tbl |>
  filter(female == 1) |>
  mutate(
    anydigpayment_s = as.integer(
      account == 1 & (
        account_mob == 1 | # 1
          fin4 == 1 | # 2
          fin8 == 1 | # 3
          (fin31a == 1 | (fin31b == 1 & account_mob == 1)) | # 4
          fin5 == 1 | # 5
          # fin27_1 == 1 | # 6
          (fin14a1 == 1 | fin14a == 1) | # (fin14b == 1 & (fin14c == 1 | fin14c == 3))) | # 7
          # fin29_1 == 1 | # 8
          # (fin43a == 1 | fin43b == 1) | # 9
          (fin34a == 1 | fin34b == 1 | fin34e == 1) | # 10
          ((fin37 == 1 & fin38 != 1) &
            ((fin39a == 1 | fin39e == 1) | fin39b == 1)) | # 11
          ((fin38 == 1) &
            ((fin39a == 1 | fin39e == 1) | fin39b == 1)) | # 12
          (fin14_1 == 1) # | fin8a == 1| fin4a == 1 13
      )
    ),
    pay_utilities_s = case_when(
      fin31a == 1 & !is.na(account_fin) ~ 1L, # from a FI account
      fin31b == 1 & !is.na(account_mob) ~ 1L, # …or from mobile money
      fin31c == 1 ~ 2L, # cash only
      fin30 == 1 ~ 3L, # other (non-cash, non-digital)
      fin30 == 2 ~ 4L, # did not pay
      fin30 %in% c(3, 4) ~ 5L, # DK / refused
      TRUE ~ NA_integer_
    ),
    receive_transfers_s = case_when(
      fin37 == 1 & fin39a == 1 & !is.na(account_fin) ~ 1L, # into FI account
      fin37 == 1 & fin39e == 1 & !is.na(account_fin) ~ 1L, # …or to a card
      fin37 == 1 & fin39b == 1 & !is.na(account_mob) ~ 1L, # …or mobile money
      fin37 == 1 & fin39d == 1 ~ 2L, # cash only
      fin37 == 1 ~ 3L, # other (non-cash, non-digital)
      fin37 == 2 ~ 4L, # did not receive
      fin37 %in% c(3, 4) ~ 5L, # DK / refused
      TRUE ~ NA_integer_
    ),
    receive_pension_s = case_when(
      fin38 == 1 & fin39a == 1 & !is.na(account_fin) ~ 1L, # into FI account
      fin38 == 1 & fin39e == 1 & !is.na(account_fin) ~ 1L, # …or to a card
      fin38 == 1 & fin39b == 1 & !is.na(account_mob) ~ 1L, # …or mobile money
      fin38 == 1 & fin39d == 1 ~ 2L, # cash only
      fin38 == 1 ~ 3L, # other (non-cash, non-digital)
      fin38 == 2 ~ 4L, # did not receive
      fin38 %in% c(3, 4) ~ 5L, # DK / refused
      TRUE ~ NA_integer_
    ),
    receive_wages_s = case_when(
      # 1: received into an account, card, or mobile
      (fin34a == 1 & !is.na(account_fin)) |
        (fin34e == 1 & !is.na(account_fin)) |
        (fin34b == 1 & !is.na(account_mob)) ~ 1,

      # 2: cash only (but only if not already coded 1)
      fin34d == 1 ~ 2,

      # 3: other methods
      fin32 == 1 ~ 3,

      # 4: did not receive payments
      fin32 == 2 ~ 0,
      TRUE ~ NA_real_
    )
  ) |>
  select(
    year, ID, wgt, age, educ, emp_in, inc_q, fin24,
    account, account_fin, account_mob, fin22a, borrowed,
    fin38, receive_pension_s,fin5,
    receive_transfers_s, pay_utilities_s, anydigpayment_s, receive_wages_s,
    mobileowner, saved, fin9, fin10, fin11a, fin11b, fin11c, fin11d, fin11e, fin11f, fin11g, fin11h,fin22b
  ) |>
  rename(
    id = ID,
    weight = wgt,
    respondent_age = age,
    respondent_education_level = educ,
    household_income_quintile = inc_q,
    in_the_workforce = emp_in,
    main_source_emergency_funds = fin24,
    payments_utility_bills = pay_utilities_s,
    has_account = account,
    has_financial_institution_account = account_fin,
    has_mobile_money_account = account_mob,
    used_phone_for_finance = fin5,
    borrowed_in_past_year = borrowed,
    borrowed_from_financial_institution = fin22a,
    borrowed_from_family_friends = fin22b,
    made_or_received_digital_payment = anydigpayment_s,
    payments_wage_payments = receive_wages_s,
    received_government_pension = fin38,
    received_government_transfers = receive_transfers_s,
    received_government_pension_payment = receive_pension_s,
    owns_mobile_phone = mobileowner,
    saved_in_past_year = saved,
    any_deposit_into_account = fin9,
    any_withdrawal_from_account = fin10,
    reason_too_far = fin11a,
    reason_too_expensive = fin11b,
    reason_no_documentation = fin11c,
    reason_no_trust = fin11d,
    reason_religious = fin11e,
    reason_insufficient_funds = fin11f,
    reason_family_member = fin11g,
    reason_no_need = fin11h
  ) |>
  mutate(
    respondent_education_level = respondent_education_level - 1,
    household_income_quintile = household_income_quintile - 1,
    main_source_emergency_funds = main_source_emergency_funds |> case_match(
      1 ~ 1,
      2 ~ 2,
      3 ~ 3,
      4 ~ 4,
      5 ~ 5,
      6 ~ 6,
      7 ~ 0,
      .default = NA
    ),
    across(
      c(
        payments_utility_bills, received_government_transfers, received_government_pension_payment
      ),
      ~ case_match(
        .x,
        1 ~ 1,
        2 ~ 2,
        3 ~ 3,
        4 ~ 0,
        .default = NA
      )
    ),
    reason_no_account = case_when(
      reason_too_far == 1 ~ 0L,
      reason_too_expensive == 1 ~ 1L,
      reason_no_documentation == 1 ~ 2L,
      reason_no_trust == 1 ~ 3L,
      reason_religious == 1 ~ 4L,
      reason_insufficient_funds == 1 ~ 5L,
      reason_family_member == 1 ~ 6L,
      reason_no_need == 1 ~ 7L,
      TRUE ~ NA_integer_
    ),
    across(
      c(
        received_government_pension,
        borrowed_from_financial_institution, in_the_workforce,
        owns_mobile_phone, saved_in_past_year, any_deposit_into_account, any_withdrawal_from_account,
        borrowed_from_family_friends,used_phone_for_finance
      ),
      ~ case_match(.x, 1 ~ 1, 2 ~ 0, 0 ~ 0, .default = NA_integer_)
    )
  ) |>
  mutate(
    borrowed_in_the_past_year = case_when(
      borrowed_in_past_year == 0 ~ 0,
      borrowed_from_family_friends == 1 & borrowed_from_financial_institution == 0 ~ 1,
      borrowed_from_family_friends == 0 & borrowed_from_financial_institution == 1 ~ 2,
      borrowed_from_financial_institution == 1 & borrowed_from_family_friends ==1 ~ 3,
      borrowed_in_past_year == 1 & borrowed_from_family_friends == 0 & borrowed_from_financial_institution == 0 ~ 4,
      TRUE ~ NA_integer_
    )
  ) |>
  select(-c(
    borrowed_in_past_year, borrowed_from_financial_institution, , reason_too_far, reason_too_expensive,
    reason_no_documentation, reason_no_trust, reason_religious, reason_insufficient_funds,
    reason_family_member, reason_no_need,borrowed_from_family_friends
  ))


In [None]:
gfi_2017_cleaned_tbl <- gfi_2017_tbl |>
  filter(female == "Female") |>
  mutate(
    account_mob_s = account_mob,
    across(
      c(
        fin4, fin8, fin5, account, fin31a, fin31b,
        fin14a, fin34a, fin34b, fin34c2, fin37, fin38,
        fin39a, fin39b, fin39c2, fin34a, fin34c2, fin34b, account_mob_s, fin34c1, fin32
      ),
      ~ case_match(.x, c("yes","Yes") ~ 1, c("no","No", "0") ~ 0, .default = NA)
    ),
    anydigpayment_s = as.integer(
      account == 1 & (
        # Card and online account usage
        fin4 == 1 | # used debit card in past 12 months :contentReference[oaicite:0]{index=0}
          fin8 == 1 | # used credit card in past 12 months :contentReference[oaicite:1]{index=1}
          fin5 == 1 | # accessed FI account via mobile/internet :contentReference[oaicite:2]{index=2}

          # Utility‐bill payments
          fin31a == 1 | # paid utility by account :contentReference[oaicite:3]{index=3}
          fin31b == 1 | # paid utility by mobile phone :contentReference[oaicite:4]{index=4}

          # Internet bill payments
          fin14a == 1 | # made bill payments online :contentReference[oaicite:5]{index=5}

          # Wage‐payment channels
          fin34a == 1 | # wages into an account :contentReference[oaicite:6]{index=6}
          fin34b == 1 | # wages via mobile phone :contentReference[oaicite:7]{index=7}
          fin34c2 == 1 | # wages to a card :contentReference[oaicite:8]{index=8}

          # Government transfers (non-pension) via digital channels
          (fin37 == 1 & fin38 != 1) &
            # government transfer (not pension) :contentReference[oaicite:9]{index=9}
            (fin39a == 1 | fin39b == 1 | fin39c2 == 1) |

          # Government pensions via digital channels
          (fin38 == 1) &
            (fin39a == 1 | fin39b == 1 | fin39c2 == 1) # government pension :contentReference[oaicite:10]{index=10}
      )
    ),
    receive_wages_s = case_when(
      # 1: received into an account
      fin34a == 1 & !is.na(account_fin) ~ 1,
      # also 1: received onto a card
      fin34c2 == 1 & !is.na(account_fin) ~ 1,
      # also 1: received via mobile
      fin34b == 1 & !is.na(account_mob_s) ~ 1,
      # 2: cash only (only reached if none of the above matched)
      fin34c1 == 1 ~ 2,
      # 3: other methods only
      fin32 == 1 ~ 3,
      # 4: did not receive payments
      fin32 == 0 ~ 0,
      TRUE ~ NA_real_
    ),
  ) |>
  select(
    year, ID, wgt, age, educ, emp_in, inc_q, fin25, pay_utilities,
    account, account_fin, account_mob, fin22a, borrowed, anydigpayment_s,
    receive_wages_s, fin38, receive_transfers, receive_pension,fin5,
    mobileowner, saved, fin9, fin10, fin11a, fin11b, fin11c, fin11d, fin11e, fin11f, fin11g, fin11h,fin22b
  ) |>
  rename(
    id = ID,
    weight = wgt,
    respondent_age = age,
    respondent_education_level = educ,
    household_income_quintile = inc_q,
    in_the_workforce = emp_in,
    main_source_emergency_funds = fin25,
    payments_utility_bills = pay_utilities,
    has_account = account,
    has_financial_institution_account = account_fin,
    has_mobile_money_account = account_mob,
    used_phone_for_finance = fin5,
    borrowed_in_past_year = borrowed,
    borrowed_from_financial_institution = fin22a,
    borrowed_from_family_friends = fin22b,
    payments_wage_payments = receive_wages_s,
    received_government_pension = fin38,
    received_government_transfers = receive_transfers,
    received_government_pension_payment = receive_pension,
    made_or_received_digital_payment = anydigpayment_s,
    owns_mobile_phone = mobileowner,
    saved_in_past_year = saved,
    any_deposit_into_account = fin9,
    any_withdrawal_from_account = fin10,
    reason_too_far = fin11a,
    reason_too_expensive = fin11b,
    reason_no_documentation = fin11c,
    reason_no_trust = fin11d,
    reason_religious = fin11e,
    reason_insufficient_funds = fin11f,
    reason_family_member = fin11g,
    reason_no_need = fin11h
  ) |>
  mutate(
    respondent_education_level = respondent_education_level |> case_match(
      c("(rf)", "(dk)") ~ NA,
      "completed primary or less" ~ 0,
      "secondary" ~ 1,
      "completed tertiary or more" ~ 2
    ),
    household_income_quintile = household_income_quintile |> case_match(
      "Poorest 20%" ~ 0,
      "Middle 20%" ~ 1,
      "Fourth 20%" ~ 2,
      "Second 20%" ~ 3,
      "Richest 20%" ~ 4
    ),
    in_the_workforce = in_the_workforce |> case_match(
      "out of workforce" ~ 0,
      "in workforce" ~ 1
    ),
    main_source_emergency_funds = case_when(
      is.na(main_source_emergency_funds) ~ "0",
      TRUE ~ main_source_emergency_funds
    ),
    main_source_emergency_funds = main_source_emergency_funds |> case_match(
      "0" ~ 0,
      "Main source: Family or friends" ~ 2,
      "Main source: Borrowing from a bank, empoyer, or private lender" ~ 4,
      "Main source: Savings" ~ 1,
      "Main source: Money from working" ~ 3,
      "Main source: Selling assets" ~ 5,
      "Main source: Some other source" ~ 6,
      .default = NA
    ),
    across(
      c(received_government_pension_payment, payments_utility_bills, received_government_transfers),
      ~ case_match(.x, "1" ~ 1, "2" ~ 2, "3" ~ 3, "4" ~ 0, .default = NA)
    ),
    # recieve government pension payments
    # 1 "into an account" 2 "in cash" 3 "using other methods only" 0 "did not receive payments" NA "(dk)/ref",
    # recieve government transfers
    # 1 "into an account" 2 "in cash" 3 "using other methods only" 0 "did not receive payments" NA "(dk)/ref"
    # payments utility bills
    # 1 "into an account" 2 "in cash" 3 "using other methods only" 0 "did not receive payments" NA "(dk)/ref",
    across(
      c(
        has_financial_institution_account, has_mobile_money_account,
        borrowed_in_past_year, borrowed_from_financial_institution,
        owns_mobile_phone, saved_in_past_year, any_deposit_into_account, any_withdrawal_from_account,
        reason_too_far, reason_too_expensive, reason_no_documentation, reason_no_trust, reason_religious,
        reason_insufficient_funds, reason_family_member, reason_no_need,borrowed_from_family_friends
      ),
      ~ case_match(
        .x,
        c("No","no", "0") ~ 0,
        c("Yes","yes") ~ 1,
        .default = NA
      )
    ),
    reason_no_account = case_when(
      reason_too_far == 1 ~ 0L,
      reason_too_expensive == 1 ~ 1L,
      reason_no_documentation == 1 ~ 2L,
      reason_no_trust == 1 ~ 3L,
      reason_religious == 1 ~ 4L,
      reason_insufficient_funds == 1 ~ 5L,
      reason_family_member == 1 ~ 6L,
      reason_no_need == 1 ~ 7L,
      TRUE ~ NA_integer_
    ),
    respondent_age = case_when(respondent_age == "99+" ~ "99", TRUE ~ respondent_age),
    across(-c(weight), ~ as.integer(.x))
  ) |>
  mutate(
    borrowed_in_the_past_year = case_when(
      borrowed_in_past_year == 0 ~ 0,
      borrowed_from_family_friends == 1 & borrowed_from_financial_institution == 0 ~ 1,
      borrowed_from_family_friends == 0 & borrowed_from_financial_institution == 1 ~ 2,
      borrowed_from_financial_institution == 1 & borrowed_from_family_friends ==1 ~ 3,
      borrowed_in_past_year == 1 & borrowed_from_family_friends == 0 & borrowed_from_financial_institution == 0 ~ 4,
      TRUE ~ NA_integer_
    )
  ) |>
  select(-c(
    borrowed_in_past_year, borrowed_from_financial_institution, reason_too_far, reason_too_expensive,
    reason_no_documentation, reason_no_trust, reason_religious, reason_insufficient_funds,
    reason_family_member, reason_no_need,borrowed_from_family_friends
  ))


In [None]:
gfi_tbl <- bind_rows(gfi_2017_cleaned_tbl, gfi_2021_cleaned_tbl)

In [None]:
gfi_renamed_tbl <- gfi_tbl |>
  rename(
    # index
    year_b = year,
    id_i = id,

    # continuous / double
    weight_d = weight,
    respondent_age_o = respondent_age,

    # ordinal
    respondent_education_level_o = respondent_education_level,
    household_income_quintile_o = household_income_quintile,

    # binary
    in_the_workforce_b = in_the_workforce,
    has_account_b = has_account,
    has_financial_institution_account_b = has_financial_institution_account,
    has_mobile_money_account_b = has_mobile_money_account,
    made_or_received_digital_payment_b = made_or_received_digital_payment,
    received_government_pension_b = received_government_pension,
    owns_mobile_phone_b = owns_mobile_phone,
    saved_in_past_year_b = saved_in_past_year,
    any_deposit_into_account_b = any_deposit_into_account,
    any_withdrawal_from_account_b = any_withdrawal_from_account,
    used_phone_for_finance_b = used_phone_for_finance,

    # categorical
    main_source_emergency_funds_c = main_source_emergency_funds,
    payments_utility_bills_c = payments_utility_bills,
    payments_wage_payments_c = payments_wage_payments,
    received_government_transfers_c = received_government_transfers,
    received_government_pension_payment_c = received_government_pension_payment,
    reason_no_account_c = reason_no_account,
    borrowed_in_the_past_year_c = borrowed_in_the_past_year,
  )
  gfi_renamed_tbl

In [None]:
gfi_renamed_tbl |> write_excel_csv(paths$output |> file.path("gfi.csv"))

# CodeBook

In [None]:
codebook_tbl <- gfi_renamed_tbl |>
  select(-id_i, -weight_d, -respondent_age_o) |>
  pivot_longer(
    cols      = -year_b,
    names_to  = "vars",
    values_to = "vals"
  ) |>
  count(vars, vals) |>
  select(-n) |>
  filter(!is.na(vals)) |>
  mutate(
    varname = case_when(
      ## 1. binary flags (_b)
      grepl("_b$", vars) & vals == 0 ~ "No",
      grepl("_b$", vars) & vals == 1 ~ "Yes",

      ## 2. education level
      vars == "respondent_education_level_o" & vals == 0 ~ "Completed Primary or less",
      vars == "respondent_education_level_o" & vals == 1 ~ "Secondary",
      vars == "respondent_education_level_o" & vals == 2 ~ "Tertiary or more",

      ## 3. income quintile
      vars == "household_income_quintile_o" & vals == 0 ~ "Poorest 20%",
      vars == "household_income_quintile_o" & vals == 1 ~ "Second 20%",
      vars == "household_income_quintile_o" & vals == 2 ~ "Middle 20%",
      vars == "household_income_quintile_o" & vals == 3 ~ "Fourth 20%",
      vars == "household_income_quintile_o" & vals == 4 ~ "Richest 20%",

      ## 4. borrowing source
      vars == "borrowed_in_the_past_year_c" & vals == 0 ~ "Didn't borrow in the past year",
      vars == "borrowed_in_the_past_year_c" & vals == 1 ~ "Borrowed through family or friends",
      vars == "borrowed_in_the_past_year_c" & vals == 2 ~ "Borrowed from a financial institution",
      vars == "borrowed_in_the_past_year_c" & vals == 3 ~ "Borrowed from both",
      vars == "borrowed_in_the_past_year_c" & vals == 4 ~ "Borrowed from other methods",

      ## 5. emergency funds source
      vars == "main_source_emergency_funds_c" & vals == 0 ~ "Couldn't come up with the money",
      vars == "main_source_emergency_funds_c" & vals == 1 ~ "Savings",
      vars == "main_source_emergency_funds_c" & vals == 2 ~ "Family or friends",
      vars == "main_source_emergency_funds_c" & vals == 3 ~ "Working",
      vars == "main_source_emergency_funds_c" & vals == 4 ~ "Bank / employer / private lender",
      vars == "main_source_emergency_funds_c" & vals == 5 ~ "Selling assets",
      vars == "main_source_emergency_funds_c" & vals == 6 ~ "Some other source",

      ## 6. payment channels (4 vars share same codes)
      (vars %in% c(
        "payments_utility_bills_c",
        "payments_wage_payments_c",
        "received_government_transfers_c",
        "received_government_pension_payment_c"
      )) & vals == 0 ~ "Did not receive payments",
      (vars %in% c(
        "payments_utility_bills_c",
        "payments_wage_payments_c",
        "received_government_transfers_c",
        "received_government_pension_payment_c"
      )) & vals == 1 ~ "Into an account",
      (vars %in% c(
        "payments_utility_bills_c",
        "payments_wage_payments_c",
        "received_government_transfers_c",
        "received_government_pension_payment_c"
      )) & vals == 2 ~ "In cash",
      (vars %in% c(
        "payments_utility_bills_c",
        "payments_wage_payments_c",
        "received_government_transfers_c",
        "received_government_pension_payment_c"
      )) & vals == 3 ~ "Using other methods",

      ## 7. reason for no account
      vars == "reason_no_account_c" & vals == 0 ~ "Too far",
      vars == "reason_no_account_c" & vals == 1 ~ "Too expensive",
      vars == "reason_no_account_c" & vals == 2 ~ "No documentation",
      vars == "reason_no_account_c" & vals == 3 ~ "No trust",
      vars == "reason_no_account_c" & vals == 4 ~ "Religious reasons",
      vars == "reason_no_account_c" & vals == 5 ~ "Insufficient funds",
      vars == "reason_no_account_c" & vals == 6 ~ "Family member has one",
      vars == "reason_no_account_c" & vals == 7 ~ "No need",

      ## fallback
      TRUE ~ NA_character_
    )
  )
  codebook_tbl


In [None]:
codebook_tbl |> write_excel_csv(paths$output |> file.path("codebook.csv"))

# Discriptive Analysis of the Data

In [None]:
# 1. reshape to long
long_df <- gfi_renamed_tbl |>
  select(-id_i, -weight_d, -respondent_age_o) |>
  pivot_longer(
    cols      = -year_b,
    names_to  = "vars",
    values_to = "vals"
  )

# 2. counts + percentages per (vars, year, vals)
freq_tbl <- long_df |>
  count(vars, year_b, vals, name = "count") |>
  group_by(vars, year_b) |>
  mutate(
    percent = count / sum(count) * 100
  ) |>
  ungroup()

# 3. medians + means per (vars, year)
stats_tbl <- long_df |>
  group_by(vars, year_b) |>
  summarise(
    median = median(vals, na.rm = TRUE),
    mean = mean(vals, na.rm = TRUE),
    .groups = "drop"
  )

# 4. grab the two years
yrs <- sort(unique(long_df$year_b))
yr1 <- yrs[1]
yr2 <- yrs[2]

# 5. join, pivot wider, then compute growths
result_tbl <- freq_tbl |>
  left_join(stats_tbl, by = c("vars", "year_b")) |>
  pivot_wider(
    names_from   = year_b,
    values_from  = c(count, percent, median, mean),
    names_glue   = "{.value}_{year_b}"
  ) |>
  mutate(
    growth_count = .data[[paste0("count_", yr2)]] - .data[[paste0("count_", yr1)]],
    pct_growth_count = growth_count / .data[[paste0("count_", yr1)]] * 100,
    pct_growth_avg = (.data[[paste0("mean_", yr2)]] - .data[[paste0("mean_", yr1)]]) /
      .data[[paste0("mean_", yr1)]] * 100
  ) |> 
  left_join(codebook_tbl,by = c("vars","vals")) |> 
  select(vars, varname, vals, everything())

# take a look
result_tbl

In [None]:
result_tbl |> write_excel_csv(paths$output |> file.path("discription.csv"))

# The Unimportant Section

In [None]:
gfi_renamed_tbl |> names()

In [None]:
gfi_2017_tbl |> select(-c(year, ID, wgt, age, educ, emp_in, inc_q, fin25, pay_utilities,
    account, account_fin, account_mob, fin22a, borrowed,# anydigpayment_s,receive_wages_s,
    fin38, receive_transfers, receive_pension,
    mobileowner, saved, fin9, fin10, fin11a, fin11b, fin11c, fin11d, fin11e, fin11f, fin11g, fin11h)) |> 
    select(-c(economy,economycode,wpid_random,female)) |> 
    names()

# Possible todo:
- Look into year-specific special variables?