In [2]:
# Define a vector of package names to be used in the script
package_names_vec <- c(
  "tidyverse",
  "ggtext",
  "showtext",
  "tidymodels",
  "furrr",
  "readxl",
  "tictoc",
  "glmnet",
  "readxl"
)

# Define a function to install and load required packages
package_prep <- function(names_vec) {
  # Iterate through each package name in the provided vector
  for (name in names_vec) {
    # Check if the package is already installed
    if (!require(name, character.only = TRUE)) {
      # Install the package using renv if not already installed
      install.packages(name)
    }
  }
}

# Pipe the vector of package names into the package_prep function
package_names_vec |> package_prep()





if (grepl("kaggle", getwd())) {
  paths <- list(
    data = file.path("/kaggle", "input", "gfi-iran"),
    output = file.path("/kaggle", "working", "output"),
    temp = file.path("/kaggle", "working", "intermediate-data")
  )
} else {
  paths <- list(
    data = file.path("..", "data"),
    output = file.path("..", "output"),
    temp = file.path("..", "intermediate-data")
  )
  for (i in list.files(paths$data, full.names = TRUE)) {
    unzip(i, exdir = paths$data)
  }
  #file.rename(
  #  paths$data |> file.path("IRN_2017_FINDEX_v02_M_EXCEL\\micro_irn_varlabel.xls"),
  #  paths$data |> file.path("micro_irn_varlabel.xls")
  #)
  file.rename(
    paths$data |> file.path("IRN_2017_FINDEX_v02_M_EXCEL\\micro_irn_varname.xls"),
    paths$data |> file.path("micro_irn_varname.xls")
    )
}



# Define a function to load and process a table
# Parameters:
# - name: Name of the table to be assigned in the global environment
# - path: Path to the file to be read
# - argyear: Year to be added as a column in the resulting table
# - reading_func: Function to read the file (e.g., read_csv, read_excel)
load_tbl <- function(name, path, argyear, reading_func) {
  # Read the file, process it, and store it in a variable 'result'
  result <- file.path(path) |> # Generate the full file path
    reading_func() |> # Read the file using the provided reading function
    mutate(
      year = argyear, # Add a 'year' column with the specified year
      ID = row_number() # Add an 'ID' column with sequential row numbers
    ) |>
    select(where(~ !all(is.na(.x)))) # Select only the columns that are not entirely NA

  # Assign the processed table to the specified name in the global environment
  assign(name, result, envir = .GlobalEnv)
}

# Call the 'load_tbl' function to load and process data for the year 2021
load_tbl(
  "gfi_2021_tbl", # Name of the output table
  file.path(paths$data, "micro_irn.csv"), # File path
  2021, # Year to assign
  read_csv # Reading function
)

# Call the 'load_tbl' function to load and process data for the year 2017 (variable labels)
load_tbl(
  "gfi_2017_tbl", # Name of the output table
  file.path(paths$data, "micro_irn_varname.xls"), # File path
  2017, # Year to assign
  read_excel # Reading function
)


“error 1 in extracting from zip file”
“error 1 in extracting from zip file”
“error 1 in extracting from zip file”
“error 1 in extracting from zip file”


[1mRows: [22m[34m1005[39m [1mColumns: [22m[34m84[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m  (2): economy, economycode
[32mdbl[39m (78): wpid_random, wgt, female, age, educ, inc_q, emp_in, account, accou...
[33mlgl[39m  (4): urbanicity_f2f, receive_agriculture, remittances, merchantpay_dig

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


# Load the data and clean

In [55]:
gfi_2021_cleaned_tbl <- gfi_2021_tbl |>
  filter(female == 1) |>
  mutate(
    anydigpayment_s = as.integer(
      account == 1 & (
        account_mob == 1 | # 1
          fin4 == 1 | # 2
          fin8 == 1 | # 3
          (fin31a == 1 | (fin31b == 1 & account_mob == 1)) | # 4
          fin5 == 1 | # 5
          # fin27_1 == 1 | # 6
          (fin14a1 == 1 | fin14a == 1) | # (fin14b == 1 & (fin14c == 1 | fin14c == 3))) | # 7
          # fin29_1 == 1 | # 8
          # (fin43a == 1 | fin43b == 1) | # 9
          (fin34a == 1 | fin34b == 1 | fin34e == 1) | # 10
          ((fin37 == 1 & fin38 != 1) &
            ((fin39a == 1 | fin39e == 1) | fin39b == 1)) | # 11
          ((fin38 == 1) &
            ((fin39a == 1 | fin39e == 1) | fin39b == 1)) | # 12
          (fin14_1 == 1) # | fin8a == 1| fin4a == 1 13
      )
    ),
    pay_utilities_s = case_when(
      fin31a == 1 & !is.na(account_fin) ~ 1L, # from a FI account
      fin31b == 1 & !is.na(account_mob) ~ 1L, # …or from mobile money
      fin31c == 1 ~ 2L, # cash only
      fin30 == 1 ~ 3L, # other (non-cash, non-digital)
      fin30 == 2 ~ 4L, # did not pay
      fin30 %in% c(3, 4) ~ 5L, # DK / refused
      TRUE ~ NA_integer_
    ),
    receive_transfers_s = case_when(
      fin37 == 1 & fin39a == 1 & !is.na(account_fin) ~ 1L, # into FI account
      fin37 == 1 & fin39e == 1 & !is.na(account_fin) ~ 1L, # …or to a card
      fin37 == 1 & fin39b == 1 & !is.na(account_mob) ~ 1L, # …or mobile money
      fin37 == 1 & fin39d == 1 ~ 2L, # cash only
      fin37 == 1 ~ 3L, # other (non-cash, non-digital)
      fin37 == 2 ~ 4L, # did not receive
      fin37 %in% c(3, 4) ~ 5L, # DK / refused
      TRUE ~ NA_integer_
    ),
    receive_pension_s = case_when(
      fin38 == 1 & fin39a == 1 & !is.na(account_fin) ~ 1L, # into FI account
      fin38 == 1 & fin39e == 1 & !is.na(account_fin) ~ 1L, # …or to a card
      fin38 == 1 & fin39b == 1 & !is.na(account_mob) ~ 1L, # …or mobile money
      fin38 == 1 & fin39d == 1 ~ 2L, # cash only
      fin38 == 1 ~ 3L, # other (non-cash, non-digital)
      fin38 == 2 ~ 4L, # did not receive
      fin38 %in% c(3, 4) ~ 5L, # DK / refused
      TRUE ~ NA_integer_
    ),
    receive_wages_s = case_when(
      # 1: received into an account, card, or mobile
      (fin34a == 1 & !is.na(account_fin)) |
        (fin34e == 1 & !is.na(account_fin)) |
        (fin34b == 1 & !is.na(account_mob)) ~ 1,

      # 2: cash only (but only if not already coded 1)
      fin34d == 1 ~ 2,

      # 3: other methods
      fin32 == 1 ~ 3,

      # 4: did not receive payments
      fin32 == 2 ~ 4,

      # 5: don't know / refused
      fin32 %in% c(3, 4) ~ 5,

      # otherwise missing
      TRUE ~ NA_real_
    )
  ) |>
  select(
    year, ID, wgt, age, educ, emp_in, inc_q, fin24, pay_utilities,
    account, account_fin, account_mob, fin22a, borrowed, anydigpayment,
    fin32, fin38, fin30, receive_transfers, receive_pension, receive_pension_s,
    receive_transfers_s, pay_utilities_s, anydigpayment_s, receive_wages_s
  ) |>
  rename(
    id = ID,
    weight = wgt,
    respondent_age = age,
    respondent_education_level = educ,
    household_income_quintile = inc_q,
    in_the_workforce = emp_in,
    main_source_emergency_funds = fin24,
    payments_utility_bills = pay_utilities,
    payments_utility_bills_s = pay_utilities_s,
    has_account = account,
    has_financial_institution_account = account_fin,
    has_mobile_money_account = account_mob,
    borrowed_in_past_year = borrowed,
    borrowed_from_financial_institution = fin22a,
    made_or_received_digital_payment = anydigpayment,
    made_or_received_digital_payment_s = anydigpayment_s,
    payments_wage_payments = receive_wages_s,
    received_government_pension = fin38,
    paid_utility_bill = fin30,
    received_government_transfers = receive_transfers,
    received_government_transfers_s = receive_transfers_s,
    received_government_pension_payment = receive_pension,
    received_government_pension_payment_s = receive_pension_s
  ) |>
  mutate(
    respondent_education_level = respondent_education_level - 1,
    household_income_quintile = household_income_quintile - 1,
    main_source_emergency_funds = main_source_emergency_funds |> case_match(
      2 ~ 0,
      4 ~ 1,
      1 ~ 2,
      3 ~ 3,
      5 ~ 4,
      .default = NA
    ),
    across(
      c(
        payments_utility_bills, payments_utility_bills_s,payments_wage_payments,
        received_government_transfers, received_government_transfers_s,
        received_government_pension_payment, received_government_pension_payment_s
      ),
      ~ case_match(
        .x,
        1 ~ 1,
        2 ~ 2,
        3 ~ 3,
        4 ~ 0,
        .default = NA
      )
    ),
    across(
      c(
        received_government_pension, paid_utility_bill,
        borrowed_from_financial_institution, in_the_workforce
      ),
      ~ case_match(.x, 1 ~ 1, 2 ~ 0)
    )
  )




In [61]:

gfi_2017_cleaned_tbl <- gfi_2017_tbl |>
  filter(female == "Female") |>
  mutate(
    account_mob_s = account_mob,
    across(
      c(
        fin4, fin8, fin5, account, fin31a, fin31b,
        fin14a, fin34a, fin34b, fin34c2, fin37, fin38,
        fin39a, fin39b, fin39c2,fin34a,fin34c2,fin34b,account_mob_s,fin34c1,fin32
      ),
      ~ case_match(.x, "yes" ~ 1, c("no", "0") ~ 0, .default = NA)
    ),
    anydigpayment_s = as.integer(
      account == 1 & (
        # Card and online account usage
        fin4 == 1 | # used debit card in past 12 months :contentReference[oaicite:0]{index=0}
          fin8 == 1 | # used credit card in past 12 months :contentReference[oaicite:1]{index=1}
          fin5 == 1 | # accessed FI account via mobile/internet :contentReference[oaicite:2]{index=2}

          # Utility‐bill payments
          fin31a == 1 | # paid utility by account :contentReference[oaicite:3]{index=3}
          fin31b == 1 | # paid utility by mobile phone :contentReference[oaicite:4]{index=4}

          # Internet bill payments
          fin14a == 1 | # made bill payments online :contentReference[oaicite:5]{index=5}

          # Wage‐payment channels
          fin34a == 1 | # wages into an account :contentReference[oaicite:6]{index=6}
          fin34b == 1 | # wages via mobile phone :contentReference[oaicite:7]{index=7}
          fin34c2 == 1 | # wages to a card :contentReference[oaicite:8]{index=8}

          # Government transfers (non-pension) via digital channels
          (fin37 == 1 & fin38 != 1) &
            (fin39a == 1 | fin39b == 1 | fin39c2 == 1) | # government transfer (not pension) :contentReference[oaicite:9]{index=9}

          # Government pensions via digital channels
          (fin38 == 1) &
            (fin39a == 1 | fin39b == 1 | fin39c2 == 1) # government pension :contentReference[oaicite:10]{index=10}
      )
    ),
    receive_wages_s = case_when(
      # 1: received into an account
      fin34a == 1 & !is.na(account_fin) ~ '1',
      # also 1: received onto a card
      fin34c2 == 1 & !is.na(account_fin) ~ '1',
      # also 1: received via mobile
      fin34b == 1 & !is.na(account_mob_s) ~ '1',
      # 2: cash only (only reached if none of the above matched)
      fin34c1 == 1 ~ '2',
      # 3: other methods only
      fin32 == 1 ~ '3',
      # 4: did not receive payments
      fin32 == 0 ~ '4',
      # 5: don't know / refused
      is.na(fin32) ~ '5',
      # else: missing
      TRUE ~ NA_character_
    ),
  ) |>
  select(
    year, ID, wgt, age, educ, emp_in, inc_q, fin25, pay_utilities,
    account, account_fin, account_mob, fin22a, borrowed, anydigpayment_s,
    receive_wages_s, fin38, fin30, receive_transfers, receive_pension
  ) |>
  rename(
    id = ID,
    weight = wgt,
    respondent_age = age,
    respondent_education_level = educ,
    household_income_quintile = inc_q,
    in_the_workforce = emp_in,
    main_source_emergency_funds = fin25,
    payments_utility_bills = pay_utilities,
    has_account = account,
    has_financial_institution_account = account_fin,
    has_mobile_money_account = account_mob,
    borrowed_in_past_year = borrowed,
    borrowed_from_financial_institution = fin22a,
    payments_wage_payments = receive_wages_s,
    received_government_pension = fin38,
    paid_utility_bill = fin30,
    received_government_transfers = receive_transfers,
    received_government_pension_payment = receive_pension,
    made_or_received_digital_payment_s = anydigpayment_s
  ) |>
  mutate(
    respondent_education_level = respondent_education_level |> case_match(
      c("(rf)", "(dk)") ~ NA,
      "completed primary or less" ~ 0,
      "secondary" ~ 1,
      "completed tertiary or more" ~ 2
    ),
    household_income_quintile = household_income_quintile |> case_match(
      "Poorest 20%" ~ 0,
      "Middle 20%" ~ 1,
      "Fourth 20%" ~ 2,
      "Second 20%" ~ 3,
      "Richest 20%" ~ 4
    ),
    in_the_workforce = in_the_workforce |> case_match(
      "out of workforce" ~ 0,
      "in workforce" ~ 1
    ),
    main_source_emergency_funds = main_source_emergency_funds |> case_match(
      "Main source: Family or friends" ~ 0,
      "Main source: Borrowing from a bank, empoyer, or private lender" ~ 1,
      "Main source: Savings" ~ 2,
      "Main source: Money from working" ~ 3,
      "Main source: Selling assets" ~ 4,
      .default = NA
    ),
    across(
      c(received_government_pension_payment, payments_utility_bills, received_government_transfers,payments_wage_payments),
      ~ case_match(.x, "1" ~ 1, "2" ~ 2, "3" ~ 3, "4" ~ 0, .default = NA)
    ),
    # recieve government pension payments
    # 1 "into an account" 2 "in cash" 3 "using other methods only" 0 "did not receive payments" NA "(dk)/ref",
    # recieve government transfers
    # 1 "into an account" 2 "in cash" 3 "using other methods only" 0 "did not receive payments" NA "(dk)/ref"
    # payments utility bills
    # 1 "into an account" 2 "in cash" 3 "using other methods only" 0 "did not receive payments" NA "(dk)/ref",
    across(
      c(
        paid_utility_bill, has_financial_institution_account, has_mobile_money_account,
        borrowed_in_past_year, borrowed_from_financial_institution
      ),
      ~ case_match(
        .x,
        c("no", "0") ~ 0,
        "yes" ~ 1,
        .default = NA
      )
    ),
    respondent_age = case_when(respondent_age == "99+" ~ "99", TRUE ~ respondent_age),
    across(-c(weight), ~ as.integer(.x))
  )



In [62]:


gfi_tbl <- bind_rows(gfi_2017_cleaned_tbl, gfi_2021_cleaned_tbl)


In [68]:
gfi_tbl |> map(~unique(.x))

# Models

## Paremtric without Bootsrtap

In [None]:
weighted_sumw <- combined_tbl |>
  mutate(
    Within_economy_household_income_quintile = factor(
      Within_economy_household_income_quintile,
      levels = c("Poorest 20%", "Second 20%", "Middle 20%", "Fourth 20%", "Richest 20%"),
      ordered = TRUE
    ),
    Respondent_education_level = factor(
      Respondent_education_level,
      levels = c("completed primary or less", "secondary", "completed tertiary or more"),
      ordered = TRUE
    )
  )
workflow() |>
  add_model(
    logistic_reg() |> set_engine("glm") |> set_mode("classification")
  ) |>
  add_recipe(
    recipe(
      Respondent_is_in_the_workforce ~
        Respondent_is_female +
        Respondent_age +
        Has_an_account_at_a_financial_institution +
        Has_a_mobile_money_account +
        Within_economy_household_income_quintile +
        year +
        Respondent_education_level,
      data = weighted_sumw,
      weights = weight
    ) |>
      step_dummy(c(
        year, Respondent_is_female, Has_an_account_at_a_financial_institution, Has_a_mobile_money_account
        )) |>
      step_ordinalscore(c(Within_economy_household_income_quintile, Respondent_education_level)) |>
      step_interact(~ .:starts_with("year"))
  ) |>
  fit(data = weighted_sumw) |>
  extract_fit_parsnip() |>
  tidy()


term,estimate,std.error,statistic,p.value
<chr>,<dbl>,<dbl>,<dbl>,<dbl>
(Intercept),-0.630387769,0.05791085,-10.88549,1.351747e-27
Respondent_age,-0.007826444,0.0006264152,-12.49402,8.048189e-36
Within_economy_household_income_quintile,-0.030123903,0.0068091,-4.424065,9.686082e-06
Respondent_education_level,0.55985779,0.01479642,37.83738,0.0
year_X2021,-18.93568075,274.4874,-0.06898561,0.9450011
Respondent_is_female_Female,-1.696339211,0.01908269,-88.89412,0.0
Has_an_account_at_a_financial_institution_Yes,0.749857829,0.04148945,18.07346,5.1576520000000003e-73
Has_a_mobile_money_account_Yes,0.461204722,0.02173457,21.21987,6.259682999999999e-100
Respondent_age_x_year_X2021,0.007826444,3.088155,0.002534343,0.9979779
Within_economy_household_income_quintile_x_year_X2021,0.030123903,35.13876,0.0008572843,0.999316
