# Beyond The Basic Model



In [2]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf

pd.set_option("mode.copy_on_write", True)

# Load data on infant mortality, gdp per capita, and
# World Bank CPIA public sector transparency, accountability,
# and corruption in the public sector scores
# (1 = low transparency and accountability, 6 = high transparency and accountability).

wdi = pd.read_csv("data/wdi_corruption.csv")

# Check one observation to get a feel for things.
wdi.sample().T

Unnamed: 0,4
country_name,Bolivia
gdp_per_capita_ppp,6444.375115
CPIA_public_sector_rating,3.0
mortality_rate_under5_per_1000,37.5
"Mortality rate, under-5, female (per 1,000 live births)",33.9
"Mortality rate, under-5, male (per 1,000 live births)",40.9
"Population, total",10869730.0
region,Latin America and Caribbean


In [3]:
# Fit model
corruption_model = smf.ols(
    "mortality_rate_under5_per_1000 ~ np.log(gdp_per_capita_ppp) + CPIA_public_sector_rating + region",
    data=wdi,
).fit()

# Get regression result
corruption_model.summary()

0,1,2,3
Dep. Variable:,mortality_rate_under5_per_1000,R-squared:,0.586
Model:,OLS,Adj. R-squared:,0.541
Method:,Least Squares,F-statistic:,13.12
Date:,"Wed, 05 Jun 2024",Prob (F-statistic):,2.11e-10
Time:,13:53:58,Log-Likelihood:,-322.68
No. Observations:,73,AIC:,661.4
Df Residuals:,65,BIC:,679.7
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,169.9397,36.430,4.665,0.000,97.183,242.696
region[T.Europe and Central Asia],-15.9265,12.304,-1.294,0.200,-40.499,8.646
region[T.Latin America and Caribbean],1.9023,9.226,0.206,0.837,-16.523,20.327
region[T.Middle East and North Africa],3.7668,23.057,0.163,0.871,-42.280,49.814
region[T.South Asia],4.9372,9.818,0.503,0.617,-14.671,24.545
region[T.Sub-Saharan Africa],27.8448,7.360,3.783,0.000,13.145,42.544
np.log(gdp_per_capita_ppp),-13.3790,4.547,-2.942,0.005,-22.461,-4.297
CPIA_public_sector_rating,-7.1417,4.387,-1.628,0.108,-15.902,1.619

0,1,2,3
Omnibus:,4.467,Durbin-Watson:,1.617
Prob(Omnibus):,0.107,Jarque-Bera (JB):,4.375
Skew:,0.592,Prob(JB):,0.112
Kurtosis:,2.813,Cond. No.,128.0


In [7]:
model_w_robust_ses = corruption_model.get_robustcov_results(cov_type="HC2")
model_w_robust_ses.summary()

0,1,2,3
Dep. Variable:,mortality_rate_under5_per_1000,R-squared:,0.586
Model:,OLS,Adj. R-squared:,0.541
Method:,Least Squares,F-statistic:,48.92
Date:,"Wed, 05 Jun 2024",Prob (F-statistic):,1.68e-23
Time:,13:55:46,Log-Likelihood:,-322.68
No. Observations:,73,AIC:,661.4
Df Residuals:,65,BIC:,679.7
Df Model:,7,,
Covariance Type:,HC2,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,169.9397,37.846,4.490,0.000,94.357,245.522
region[T.Europe and Central Asia],-15.9265,5.763,-2.764,0.007,-27.436,-4.417
region[T.Latin America and Caribbean],1.9023,6.687,0.284,0.777,-11.453,15.257
region[T.Middle East and North Africa],3.7668,8.304,0.454,0.652,-12.817,20.351
region[T.South Asia],4.9372,9.361,0.527,0.600,-13.759,23.633
region[T.Sub-Saharan Africa],27.8448,7.238,3.847,0.000,13.389,42.300
np.log(gdp_per_capita_ppp),-13.3790,4.550,-2.941,0.005,-22.465,-4.293
CPIA_public_sector_rating,-7.1417,3.966,-1.801,0.076,-15.063,0.779

0,1,2,3
Omnibus:,4.467,Durbin-Watson:,1.617
Prob(Omnibus):,0.107,Jarque-Bera (JB):,4.375
Skew:,0.592,Prob(JB):,0.112
Kurtosis:,2.813,Cond. No.,128.0


In [30]:
corruption_model_reg = smf.ols(
    "mortality_rate_under5_per_1000 ~ np.log(gdp_per_capita_ppp) + CPIA_public_sector_rating + region",
    data=wdi,
).fit_regularized(method="elastic_net")
corruption_model_reg.summary()

NotImplementedError: 

In [26]:
x = model_w_robust_ses.summary(
    xname=[
        "Intercept",
        "Europe and Central Asia",
        "Latin America",
        "Middle East and North Africa",
        "South Asia",
        "SSA",
        "Log GDP per Capita",
        "CPIA Public Sector Score",
    ]
)
print(x.tables[1].as_latex_tabular())

\begin{center}
\begin{tabular}{lcccccc}
\toprule
                                      & \textbf{coef} & \textbf{std err} & \textbf{t} & \textbf{P$> |$t$|$} & \textbf{[0.025} & \textbf{0.975]}  \\
\midrule
\textbf{Intercept}                    &     169.9397  &       37.846     &     4.490  &         0.000        &       94.357    &      245.522     \\
\textbf{Europe and Central Asia}      &     -15.9265  &        5.763     &    -2.764  &         0.007        &      -27.436    &       -4.417     \\
\textbf{Latin America}                &       1.9023  &        6.687     &     0.284  &         0.777        &      -11.453    &       15.257     \\
\textbf{Middle East and North Africa} &       3.7668  &        8.304     &     0.454  &         0.652        &      -12.817    &       20.351     \\
\textbf{South Asia}                   &       4.9372  &        9.361     &     0.527  &         0.600        &      -13.759    &       23.633     \\
\textbf{SSA}                          &      27.8

## Prediction

