In [1]:
import numpy as np
import statsmodels.api as sm

In [8]:
df = sm.datasets.ccard.load_pandas().data
df.head()

Unnamed: 0,AVGEXP,AGE,INCOME,INCOMESQ,OWNRENT,intercept
0,124.98,38.0,4.52,20.4304,1.0,1
1,9.85,33.0,2.42,5.8564,0.0,1
2,15.00,34.0,4.50,20.2500,1.0,1
3,137.87,31.0,2.54,6.4516,0.0,1
4,546.50,32.0,9.79,95.8441,1.0,1
...,...,...,...,...,...,...
67,68.38,43.0,2.40,5.7600,0.0,1
68,474.15,33.0,6.00,36.0000,1.0,1
69,234.05,25.0,3.60,12.9600,0.0,1
70,451.20,26.0,5.00,25.0000,1.0,1


# Logistic Regression

In [10]:
x = df[['intercept', 'AGE', 'INCOME']]
x = sm.add_constant(x)
x.head()

Unnamed: 0,intercept,AGE,INCOME
0,1,38.0,4.52
1,1,33.0,2.42
2,1,34.0,4.50
3,1,31.0,2.54
4,1,32.0,9.79
...,...,...,...
67,1,43.0,2.40
68,1,33.0,6.00
69,1,25.0,3.60
70,1,26.0,5.00


In [11]:
model = sm.Logit(df.OWNRENT, x)
result = model.fit()
result.summary()

Optimization terminated successfully.
         Current function value: 0.492144
         Iterations 6


0,1,2,3
Dep. Variable:,OWNRENT,No. Observations:,72.0
Model:,Logit,Df Residuals:,69.0
Method:,MLE,Df Model:,2.0
Date:,"Mon, 18 Jul 2022",Pseudo R-squ.:,0.2561
Time:,11:30:56,Log-Likelihood:,-35.434
converged:,True,LL-Null:,-47.633
Covariance Type:,nonrobust,LLR p-value:,5.039e-06

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,-6.0978,1.570,-3.885,0.000,-9.174,-3.021
AGE,0.1056,0.046,2.300,0.021,0.016,0.196
INCOME,0.6411,0.246,2.605,0.009,0.159,1.123


Variables whose p-value is less than 0.05 are considered to be statistically significant

# Odds Ratios
None of the confidence intervals for the OR includes one, which indicates that all the variables included in the model are significant

In [16]:
np.exp(0.6411)

1.8985681560639371

In [15]:
conf = result.conf_int() # calculates confidence intervals for parameters
conf['OR'] = result.params # identify the model parameter estimates
conf.columns = ['2.5%', '97.5%', 'OR']
print(np.exp(conf))

               2.5%     97.5%        OR
intercept  0.000104  0.048739  0.002248
AGE        1.015729  1.216077  1.111398
INCOME     1.172104  3.075528  1.898642


`INCOME` multiplies by 1.89 the probability of having `OWNRENT`.
Based on our data, we can expect an increase between 1.01 - 1 = 0.1 (10%) and 1.21 - 1 = 0.21 (21%) in the odds of `OWNRENT`.