In [1]:
import pandas as pd
import statsmodels.api as sm
import numpy as np
from sklearn.metrics import confusion_matrix

In [2]:
data = pd.DataFrame({
    'age':   [22, 25, 18, 45, 12, 43, 23, 33],
    'gender': ['female', 'female', 'male', 'male', 'female', 'male', 'male', 'male'],
    'smoker': ['non-smoker', 'smoker', 'smoker', 'non-smoker', 'smoker', 'smoker', 'smoker', 'smoker'],
    'disease': [1, 1, 0, 0, 0, 1, 0, 1]
})

In [3]:
data['gender_male'] = (data['gender'] == 'male').astype(int)
data['smoker_yes'] = (data['smoker'] == 'smoker').astype(int)

In [4]:
X = data[['age', 'gender_male', 'smoker_yes']]
y = data['disease']

In [5]:
X = sm.add_constant(X)

In [6]:
X = X.astype(float)
y = y.astype(float)

In [7]:
model = sm.Logit(y, X)
result = model.fit(disp=False)

In [8]:
print(result.summary())

                           Logit Regression Results                           
Dep. Variable:                disease   No. Observations:                    8
Model:                          Logit   Df Residuals:                        4
Method:                           MLE   Df Model:                            3
Date:                Wed, 10 Sep 2025   Pseudo R-squ.:                  0.4218
Time:                        05:36:53   Log-Likelihood:                -3.2061
converged:                       True   LL-Null:                       -5.5452
Covariance Type:            nonrobust   LLR p-value:                    0.1969
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
const          -7.1622      6.833     -1.048      0.295     -20.554       6.229
age             0.3188      0.291      1.095      0.274      -0.252       0.889
gender_male    -7.0319      6.564     -1.071    

In [9]:
params = result.params
conf = result.conf_int()
odds_ratios = np.exp(params)
conf_odds = np.exp(conf)
print("\nOdds Ratios:\n", odds_ratios)
print("\n95% CI for Odds Ratios:\n", conf_odds)


Odds Ratios:
 const           0.000775
age             1.375409
gender_male     0.000883
smoker_yes     34.414345
dtype: float64

95% CI for Odds Ratios:
                         0             1
const        1.184718e-09    507.435846
age          7.773859e-01      2.433475
gender_male  2.283408e-09    341.635124
smoker_yes   2.528397e-02  46841.826989


In [10]:
data['pred_prob'] = result.predict(X)
data['pred_class'] = (data['pred_prob'] >= 0.5).astype(int)

In [11]:
print("\nPredictions:")
print(data[['age', 'gender', 'smoker', 'disease', 'pred_prob', 'pred_class']])


Predictions:
   age  gender      smoker  disease  pred_prob  pred_class
0   22  female  non-smoker        1   0.462651           0
1   25  female      smoker        1   0.987195           1
2   18    male      smoker        0   0.007260           0
3   45    male  non-smoker        0   0.537349           1
4   12  female      smoker        0   0.550153           1
5   43    male      smoker        1   0.954810           1
6   23    male      smoker        0   0.034744           0
7   33    male      smoker        1   0.465837           0


In [12]:
cm = confusion_matrix(y, data['pred_class'])
accuracy = (data['pred_class'] == y).mean()
print("\nConfusion Matrix:\n", cm)
print(f"\nAccuracy: {accuracy:.2f}")


Confusion Matrix:
 [[2 2]
 [2 2]]

Accuracy: 0.50
