In [132]:
import pandas as pd
import networkx as nx
import statsmodels.formula.api as smf

In [133]:
# Load datasets
africa_visa_openness_2019 = pd.read_csv("africa_visa_openness_2019.csv")
africa_visa_openness_2023 = pd.read_csv("africa_visa_openness_2023.csv")
passport_index_2019 = pd.read_csv("passport-index-2019.csv")
passport_index_2023 = pd.read_csv("passport-index-2023.csv")
henley_passport_index_2025 = pd.read_csv("henley_passport_index_2025.csv")
gdp_per_capita_2019_2023 = pd.read_csv("gdp_per_capita_2019_2023.csv")
country_region_income_group = pd.read_csv("country_region_income_group.csv")

In [134]:
# Rename columns
africa_visa_openness_2019 = africa_visa_openness_2019.rename(columns={
    "Country": "country",
    "ISO3": "iso3",
    "No visa required": "avoi_no_visa_2019",
    "Visa on arrival": "avoi_visa_on_arrival_2019",
    "Visa required": "avoi_visa_required_2019",
    "Score": "avoi_score_2019",
    "Rank": "avoi_rank_2019"
})

africa_visa_openness_2023 = africa_visa_openness_2023.rename(columns={
    "Country": "country",
    "ISO3": "iso3",
    "No visa required": "avoi_no_visa_2023",
    "Visa on arrival": "avoi_visa_on_arrival_2023",
    "Visa required": "avoi_visa_required_2023",
    "Score": "avoi_score_2023",
    "Rank": "avoi_rank_2023"
})

passport_index_2019 = passport_index_2019.rename(columns={
    "Passport": "origin_iso3",
    "Destination": "destination_iso3",
    "Code": "code",
})

passport_index_2023 = passport_index_2023.rename(columns={
    "Passport": "origin_iso3",
    "Destination": "destination_iso3",
    "Requirement": "requirement",
})

henley_passport_index_2025 = henley_passport_index_2025.rename(columns={
    "Passport": "country",
    "ISO3": "iso3",
    "Visa-free score": "henley_score_2025",
    "Rank": "henley_rank_2025"
})

gdp_per_capita_2019_2023 = gdp_per_capita_2019_2023.rename(columns={
    "Country Code": "iso3",
    "2019": "gdp_ppc_2019",
    "2020": "gdp_ppc_2020",
    "2021": "gdp_ppc_2021",
    "2022": "gdp_ppc_2022",
    "2023": "gdp_ppc_2023"
})

country_region_income_group = country_region_income_group.rename(columns={
    "Country Code": "iso3",
    "Region": "region",
    "IncomeGroup": "income_group"
})

In [135]:
# Recode passport index
code_to_requirement_2019 = {
    3: 'visa free',
    2: 'visa on arrival',
    1: 'e-visa',
    0: 'visa required',
   -1: 'self-passport'
}

requirement_to_code_2023 = {
    'visa free': 3,
    'visa on arrival': 2,
    'e-visa': 1,
    'visa required': 0,
    'covid ban': -2,
    'no admission': -3,
    'Hayya Entry Permit': -4,
    '-1': -1
}

passport_index_2019["requirement"] = passport_index_2019["code"].map(code_to_requirement_2019).astype("category")
passport_index_2023["code"] = passport_index_2023["requirement"].map(requirement_to_code_2023).astype("Int64")

In [136]:
# Build graphs for passport index
graph_2019 = nx.DiGraph()
graph_2023 = nx.DiGraph()

# Only include edges where the code is greater than zero
for _, row in passport_index_2019[passport_index_2019["code"] > 0].iterrows():
    origin = row["origin_iso3"]
    destination = row["destination_iso3"]
    graph_2019.add_edge(origin, destination)

for _, row in passport_index_2023[passport_index_2023["code"] > 0].iterrows():
    origin = row["origin_iso3"]
    destination = row["destination_iso3"]
    graph_2023.add_edge(origin, destination)

# Compute centrality measures
katz_2019 = nx.katz_centrality_numpy(graph_2019, alpha=0.005, beta=1.0)
pagerank_2019 = nx.pagerank(graph_2019)
betweenness_2019 = nx.betweenness_centrality(graph_2019)
closeness_2019 = nx.closeness_centrality(graph_2019)

katz_2023 = nx.katz_centrality_numpy(graph_2023, alpha=0.005, beta=1.0)
pagerank_2023 = nx.pagerank(graph_2023)
betweenness_2023 = nx.betweenness_centrality(graph_2023)
closeness_2023 = nx.closeness_centrality(graph_2023)

nodes_2019 = list(graph_2019.nodes())
nodes_2023 = list(graph_2023.nodes())

centrality_2019 = pd.DataFrame({
    "iso3": nodes_2019,
    "passport_index_katz_2019": [katz_2019.get(n, None) for n in nodes_2019],
    "passport_index_pagerank_2019": [pagerank_2019.get(n, None) for n in nodes_2019],
    "passport_index_betweenness_2019": [betweenness_2019.get(n, None) for n in nodes_2019],
    "passport_index_closeness_2019": [closeness_2019.get(n, None) for n in nodes_2019],
})

centrality_2023 = pd.DataFrame({
    "iso3": nodes_2023,
    "passport_index_katz_2023": [katz_2023.get(n, None) for n in nodes_2023],
    "passport_index_pagerank_2023": [pagerank_2023.get(n, None) for n in nodes_2023],
    "passport_index_betweenness_2023": [betweenness_2023.get(n, None) for n in nodes_2023],
    "passport_index_closeness_2023": [closeness_2023.get(n, None) for n in nodes_2023]
})

In [137]:
# Merge datasets
merged = (africa_visa_openness_2019
    .merge(africa_visa_openness_2023, on="iso3", how="outer", suffixes=("", "_dup1"))
    .merge(henley_passport_index_2025, on="iso3", how="outer", suffixes=("", "_dup2"))
    .merge(centrality_2019, on="iso3", how="outer")
    .merge(centrality_2023, on="iso3", how="outer")
    .merge(gdp_per_capita_2019_2023, on="iso3", how="outer")
    .merge(country_region_income_group, on="iso3", how="left")
)

In [138]:
# Handle duplicate country names
merged["country"] = (
    merged["country"]
    .combine_first(merged.get("country_dup1"))
    .combine_first(merged.get("country_dup2"))
)

In [139]:
# Drop duplicate columns
merged = merged.drop(columns=[col for col in merged.columns if col.startswith("country_dup")])

In [140]:
# Drop duplicate rows
merged = merged.drop_duplicates()

In [141]:
# Remove rows with no country, iso3, or region
merged_cleaned = merged.dropna(subset=["country", "iso3", "region"])

In [142]:
# Remove rows with no GDP per capita
merged_cleaned = merged_cleaned.dropna(subset=["gdp_ppc_2019", "gdp_ppc_2020", "gdp_ppc_2021", "gdp_ppc_2022", "gdp_ppc_2023"])

In [143]:
# Remove GDP columns other than 2019 and 2023
merged_cleaned = merged_cleaned.drop(columns=["gdp_ppc_2020", "gdp_ppc_2021", "gdp_ppc_2022"])

In [None]:
# Add log GDP per capita
merged_cleaned["log_gdp_ppc_2019"] = pd.np.log(merged_cleaned["gdp_ppc_2019"])
merged_cleaned["log_gdp_ppc_2023"] = pd.np.log(merged_cleaned["gdp_ppc_2023"])

In [None]:
# Adding differences between 2023 and 2019
merged_cleaned["avoi_no_visa_change"] = merged_cleaned["avoi_no_visa_2023"] - merged_cleaned["avoi_no_visa_2019"]
merged_cleaned["avoi_visa_on_arrival_change"] = merged_cleaned["avoi_visa_on_arrival_2023"] - merged_cleaned["avoi_visa_on_arrival_2019"]
merged_cleaned["avoi_visa_required_change"] = merged_cleaned["avoi_visa_required_2023"] - merged_cleaned["avoi_visa_required_2019"]
merged_cleaned["avoi_score_change"] = merged_cleaned["avoi_score_2023"] - merged_cleaned["avoi_score_2019"]
merged_cleaned["avoi_rank_change"] = merged_cleaned["avoi_rank_2023"] - merged_cleaned["avoi_rank_2019"]
merged_cleaned["gdp_ppc_change"] = merged_cleaned["gdp_ppc_2023"] - merged_cleaned["gdp_ppc_2019"]
merged_cleaned["log_gdp_ppc_change"] = merged_cleaned["log_gdp_ppc_2023"] - merged_cleaned["log_gdp_ppc_2019"]

merged_cleaned["passport_index_katz_change"] = merged_cleaned["passport_index_katz_2023"] - merged_cleaned["passport_index_katz_2019"]
merged_cleaned["passport_index_pagerank_change"] = merged_cleaned["passport_index_pagerank_2023"] - merged_cleaned["passport_index_pagerank_2019"]
merged_cleaned["passport_index_betweenness_change"] = merged_cleaned["passport_index_betweenness_2023"] - merged_cleaned["passport_index_betweenness_2019"]
merged_cleaned["passport_index_closeness_change"] = merged_cleaned["passport_index_closeness_2023"] - merged_cleaned["passport_index_closeness_2019"]


In [146]:
merged_cleaned.head()

Unnamed: 0,country,avoi_no_visa_2019,avoi_visa_on_arrival_2019,avoi_visa_required_2019,avoi_score_2019,avoi_rank_2019,iso3,avoi_no_visa_2023,avoi_visa_on_arrival_2023,avoi_visa_required_2023,avoi_score_2023,avoi_rank_2023,henley_rank_2025,henley_score_2025,passport_index_katz_2019,passport_index_pagerank_2019,passport_index_betweenness_2019,passport_index_closeness_2019,passport_index_katz_2023,passport_index_pagerank_2023,passport_index_betweenness_2023,passport_index_closeness_2023,gdp_ppc_2019,gdp_ppc_2023,region,income_group,avoi_no_visa_change,avoi_visa_on_arrival_change,avoi_visa_required_change,avoi_score_change,avoi_rank_change,gdp_ppc_change,passport_index_katz_change,passport_index_pagerank_change,passport_index_betweenness_change,passport_index_closeness_change
2,Afghanistan,,,,,,AFG,,,,,,105.0,25.0,0.031939,0.000754,0.0,0.0,0.042792,0.000754,0.0,0.0,2927.245144,1992.424394,South Asia,Low income,,,,,,-934.82075,0.010853,0.0,0.0,0.0
4,Angola,10.0,7.0,36.0,0.294,34.0,AGO,10.0,0.0,43.0,0.189,41.0,89.0,49.0,0.055551,0.00326,0.000229,0.607362,0.044244,0.000983,5e-05,0.491315,8274.542562,7244.893053,Sub-Saharan Africa,Lower middle income,0.0,-7.0,7.0,-0.105,7.0,-1029.649509,-0.011307,-0.002276,-0.000179,-0.116047
5,Albania,,,,,,ALB,,,,,,44.0,122.0,0.05972,0.0027,0.000441,0.63871,0.083346,0.010026,0.001777,0.682759,15065.74957,17975.79839,Europe & Central Asia,Upper middle income,,,,,,2910.04882,0.023626,0.007326,0.001336,0.044049
6,Andorra,,,,,,AND,,,,,,16.0,171.0,0.063283,0.003165,0.000669,0.653465,0.050859,0.001058,0.000408,0.378585,63215.89979,64631.29639,Europe & Central Asia,High income,,,,,,1415.3966,-0.012424,-0.002107,-0.000261,-0.27488
8,United Arab Emirates,,,,,,ARE,,,,,,9.0,183.0,0.056267,0.002449,0.005887,0.62069,0.045092,0.001209,0.010504,0.507692,68887.84541,68577.54057,Middle East & North Africa,High income,,,,,,-310.30484,-0.011175,-0.00124,0.004617,-0.112997


In [147]:
merged_cleaned.columns

Index(['country', 'avoi_no_visa_2019', 'avoi_visa_on_arrival_2019',
       'avoi_visa_required_2019', 'avoi_score_2019', 'avoi_rank_2019', 'iso3',
       'avoi_no_visa_2023', 'avoi_visa_on_arrival_2023',
       'avoi_visa_required_2023', 'avoi_score_2023', 'avoi_rank_2023',
       'henley_rank_2025', 'henley_score_2025', 'passport_index_katz_2019',
       'passport_index_pagerank_2019', 'passport_index_betweenness_2019',
       'passport_index_closeness_2019', 'passport_index_katz_2023',
       'passport_index_pagerank_2023', 'passport_index_betweenness_2023',
       'passport_index_closeness_2023', 'gdp_ppc_2019', 'gdp_ppc_2023',
       'region', 'income_group', 'avoi_no_visa_change',
       'avoi_visa_on_arrival_change', 'avoi_visa_required_change',
       'avoi_score_change', 'avoi_rank_change', 'gdp_ppc_change',
       'passport_index_katz_change', 'passport_index_pagerank_change',
       'passport_index_betweenness_change', 'passport_index_closeness_change'],
      dtype='object')

In [148]:
merged_cleaned.describe()

Unnamed: 0,avoi_no_visa_2019,avoi_visa_on_arrival_2019,avoi_visa_required_2019,avoi_score_2019,avoi_rank_2019,avoi_no_visa_2023,avoi_visa_on_arrival_2023,avoi_visa_required_2023,avoi_score_2023,avoi_rank_2023,henley_rank_2025,henley_score_2025,passport_index_katz_2019,passport_index_pagerank_2019,passport_index_betweenness_2019,passport_index_closeness_2019,passport_index_katz_2023,passport_index_pagerank_2023,passport_index_betweenness_2023,passport_index_closeness_2023,gdp_ppc_2019,gdp_ppc_2023,avoi_no_visa_change,avoi_visa_on_arrival_change,avoi_visa_required_change,avoi_score_change,avoi_rank_change,gdp_ppc_change,passport_index_katz_change,passport_index_pagerank_change,passport_index_betweenness_change,passport_index_closeness_change
count,52.0,52.0,52.0,52.0,52.0,52.0,52.0,52.0,52.0,52.0,184.0,184.0,184.0,184.0,184.0,184.0,184.0,184.0,184.0,184.0,184.0,184.0,52.0,52.0,52.0,52.0,52.0,184.0,184.0,184.0,184.0,184.0
mean,13.5,14.403846,24.961538,0.474712,26.326923,15.192308,14.211538,23.596154,0.501173,26.307692,50.163043,110.423913,0.06933,0.005212,0.002446,0.720124,0.067465,0.005185,0.003558,0.552368,24773.420524,25666.785919,1.692308,-0.192308,-1.365385,0.026462,-0.019231,893.365395,-0.001865,-2.7e-05,0.001112,-0.167755
std,10.861878,18.87031,19.184309,0.304847,15.415451,13.244368,18.693961,18.955324,0.308059,15.549464,31.60576,53.61301,0.01958,0.003288,0.003362,0.16849,0.02412,0.004748,0.010194,0.296483,25672.841591,26033.410823,6.238836,12.541755,11.575886,0.178786,8.779411,4092.96325,0.023794,0.004189,0.010211,0.279225
min,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.019,1.0,1.0,25.0,0.031939,0.000754,0.0,0.0,0.042792,0.000754,0.0,0.0,855.73272,828.861074,-5.0,-36.0,-47.0,-0.476,-34.0,-28587.5445,-0.056361,-0.010051,-0.015792,-1.0
25%,6.0,0.0,1.0,0.25625,13.75,6.75,0.0,1.0,0.25175,13.75,18.75,60.75,0.057488,0.002997,0.000664,0.62859,0.045239,0.001036,4.2e-05,0.375712,6039.42114,6082.314148,0.0,-0.25,-1.0,-0.001,-0.25,-91.698706,-0.012929,-0.002131,-0.000941,-0.277754
50%,14.0,2.0,31.5,0.402,26.5,14.0,2.0,29.5,0.4225,26.5,52.5,95.0,0.063283,0.003514,0.000978,0.653465,0.055167,0.002602,0.001129,0.557746,15393.80985,16067.446905,0.0,0.0,0.0,0.0,1.0,349.553402,-0.004099,-0.000699,-0.000133,-0.120508
75%,17.0,32.5,39.25,0.80575,39.25,19.0,26.0,39.25,0.80275,39.25,79.25,165.25,0.088871,0.008705,0.002659,0.848901,0.091909,0.01003,0.003652,0.804878,35532.983287,39317.201222,1.0,0.0,0.25,0.019,3.0,1528.000647,0.008961,0.001826,0.001873,-0.005025
max,53.0,53.0,53.0,1.0,54.0,53.0,53.0,52.0,1.0,54.0,105.0,193.0,0.099153,0.010804,0.017582,1.0,0.108631,0.012932,0.094845,1.0,133549.188,130373.4779,36.0,47.0,32.0,0.71,25.0,35912.7388,0.074349,0.011883,0.086619,0.528143


In [86]:
# Regression 1: GDP per capita 2023 on Henley score 2025
model1 = smf.ols("gdp_ppc_2023 ~ henley_score_2025", data=merged_cleaned).fit()

print("Regression 1: gdp_ppc_2023 ~ henley_score_2025")
print(model1.summary())

Regression 1: gdp_ppc_2023 ~ henley_score_2025
                            OLS Regression Results                            
Dep. Variable:           gdp_ppc_2023   R-squared:                       0.510
Model:                            OLS   Adj. R-squared:                  0.508
Method:                 Least Squares   F-statistic:                     189.7
Date:                Tue, 25 Mar 2025   Prob (F-statistic):           4.89e-30
Time:                        13:21:32   Log-Likelihood:                -2065.6
No. Observations:                 184   AIC:                             4135.
Df Residuals:                     182   BIC:                             4142.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------

In [88]:
# Regression 2: Add controls for region and income_group
model2 = smf.ols("gdp_ppc_2023 ~ henley_score_2025 + C(region)", data=merged_cleaned).fit()

print("\nRegression 2: gdp_ppc_2023 ~ henley_score_2025 + C(region)")
print(model2.summary())


Regression 2: gdp_ppc_2023 ~ henley_score_2025 + C(region)
                            OLS Regression Results                            
Dep. Variable:           gdp_ppc_2023   R-squared:                       0.612
Model:                            OLS   Adj. R-squared:                  0.596
Method:                 Least Squares   F-statistic:                     39.63
Date:                Tue, 25 Mar 2025   Prob (F-statistic):           4.66e-33
Time:                        15:04:39   Log-Likelihood:                -2044.3
No. Observations:                 184   AIC:                             4105.
Df Residuals:                     176   BIC:                             4130.
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                                              coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------

In [90]:
# Regression 2: Add controls for region and income_group
model3 = smf.ols("gdp_ppc_2023 ~ henley_score_2025 + C(income_group)", data=merged_cleaned).fit()

print("\nRegression 2: gdp_ppc_2023 ~ henley_score_2025 + C(income_group)")
print(model3.summary())


Regression 2: gdp_ppc_2023 ~ henley_score_2025 + C(income_group)
                            OLS Regression Results                            
Dep. Variable:           gdp_ppc_2023   R-squared:                       0.683
Model:                            OLS   Adj. R-squared:                  0.676
Method:                 Least Squares   F-statistic:                     96.33
Date:                Tue, 25 Mar 2025   Prob (F-statistic):           1.45e-43
Time:                        15:08:29   Log-Likelihood:                -2025.7
No. Observations:                 184   AIC:                             4061.
Df Residuals:                     179   BIC:                             4077.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                                             coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------

In [None]:
# Regression 2: Add controls for region and income_group
model3 = smf.ols("gdp_ppc_2023 ~ avoi_score_2023 + C(income_group)", data=merged_cleaned).fit()

print("\nRegression 2: gdp_ppc_2023 ~ avoi_score_2023 + C(income_group)")
print(model3.summary())


Regression 2: gdp_ppc_2023 ~ avoi_score_2023
                            OLS Regression Results                            
Dep. Variable:           gdp_ppc_2023   R-squared:                       0.009
Model:                            OLS   Adj. R-squared:                 -0.011
Method:                 Least Squares   F-statistic:                    0.4566
Date:                Tue, 25 Mar 2025   Prob (F-statistic):              0.502
Time:                        16:11:41   Log-Likelihood:                -529.27
No. Observations:                  52   AIC:                             1063.
Df Residuals:                      50   BIC:                             1066.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------

In [91]:
# Regression 2: Add controls for region and income_group
model4 = smf.ols("gdp_ppc_2023 ~ henley_score_2025 + C(region) + C(income_group)", data=merged_cleaned).fit()

print("\nRegression 2: gdp_ppc_2023 ~ henley_score_2025 + C(region) + C(income_group)")
print(model4.summary())


Regression 2: gdp_ppc_2023 ~ henley_score_2025 + C(region) + C(income_group)
                            OLS Regression Results                            
Dep. Variable:           gdp_ppc_2023   R-squared:                       0.712
Model:                            OLS   Adj. R-squared:                  0.696
Method:                 Least Squares   F-statistic:                     42.87
Date:                Tue, 25 Mar 2025   Prob (F-statistic):           1.02e-41
Time:                        15:08:30   Log-Likelihood:                -2016.7
No. Observations:                 184   AIC:                             4055.
Df Residuals:                     173   BIC:                             4091.
Df Model:                          10                                         
Covariance Type:            nonrobust                                         
                                              coef    std err          t      P>|t|      [0.025      0.975]
------------------------

In [156]:
merged_cleaned.columns.tolist()

['country',
 'avoi_no_visa_2019',
 'avoi_visa_on_arrival_2019',
 'avoi_visa_required_2019',
 'avoi_score_2019',
 'avoi_rank_2019',
 'iso3',
 'avoi_no_visa_2023',
 'avoi_visa_on_arrival_2023',
 'avoi_visa_required_2023',
 'avoi_score_2023',
 'avoi_rank_2023',
 'henley_rank_2025',
 'henley_score_2025',
 'passport_index_katz_2019',
 'passport_index_pagerank_2019',
 'passport_index_betweenness_2019',
 'passport_index_closeness_2019',
 'passport_index_katz_2023',
 'passport_index_pagerank_2023',
 'passport_index_betweenness_2023',
 'passport_index_closeness_2023',
 'gdp_ppc_2019',
 'gdp_ppc_2023',
 'region',
 'income_group',
 'avoi_no_visa_change',
 'avoi_visa_on_arrival_change',
 'avoi_visa_required_change',
 'avoi_score_change',
 'avoi_rank_change',
 'gdp_ppc_change',
 'passport_index_katz_change',
 'passport_index_pagerank_change',
 'passport_index_betweenness_change',
 'passport_index_closeness_change']

In [154]:
smf.ols(
    "gdp_ppc_2023 ~ passport_index_betweenness_2023 + C(income_group)",
    data=merged_cleaned,
).fit().summary()

0,1,2,3
Dep. Variable:,gdp_ppc_2023,R-squared:,0.676
Model:,OLS,Adj. R-squared:,0.668
Method:,Least Squares,F-statistic:,93.22
Date:,"Tue, 25 Mar 2025",Prob (F-statistic):,1.0599999999999999e-42
Time:,17:32:49,Log-Likelihood:,-2027.7
No. Observations:,184,AIC:,4065.0
Df Residuals:,179,BIC:,4082.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,5.466e+04,1978.631,27.625,0.000,5.08e+04,5.86e+04
C(income_group)[T.Low income],-5.253e+04,3797.506,-13.832,0.000,-6e+04,-4.5e+04
C(income_group)[T.Lower middle income],-4.804e+04,2875.917,-16.703,0.000,-5.37e+04,-4.24e+04
C(income_group)[T.Upper middle income],-3.585e+04,2824.401,-12.693,0.000,-4.14e+04,-3.03e+04
passport_index_betweenness_2023,-2.096e+04,1.09e+05,-0.192,0.848,-2.37e+05,1.95e+05

0,1,2,3
Omnibus:,104.781,Durbin-Watson:,2.238
Prob(Omnibus):,0.0,Jarque-Bera (JB):,808.226
Skew:,2.008,Prob(JB):,3.13e-176
Kurtosis:,12.449,Cond. No.,107.0
