In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.pyplot import subplots
import statsmodels.api as sm
from ISLP import load_data
from ISLP.models import (ModelSpec as MS,
                         summarize)
from ISLP import confusion_table
from ISLP.models import contrast
from sklearn.discriminant_analysis import \
     (LinearDiscriminantAnalysis as LDA,
      QuadraticDiscriminantAnalysis as QDA)
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
import seaborn as sns

In [2]:
default = load_data("Default")

In [14]:
raw_X = default[["income","balance"]]
y = default["default"].transform(lambda x : x == "Yes")

In [8]:
design = MS(['income', 'balance'])
X = design.fit_transform(raw_X)

In [16]:
model = sm.Logit(y, X).fit()

Optimization terminated successfully.
         Current function value: 0.078948
         Iterations 10


In [17]:
model.summary()

0,1,2,3
Dep. Variable:,default,No. Observations:,10000.0
Model:,Logit,Df Residuals:,9997.0
Method:,MLE,Df Model:,2.0
Date:,"Tue, 02 Sep 2025",Pseudo R-squ.:,0.4594
Time:,22:18:10,Log-Likelihood:,-789.48
converged:,True,LL-Null:,-1460.3
Covariance Type:,nonrobust,LLR p-value:,4.541e-292

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,-11.5405,0.435,-26.544,0.000,-12.393,-10.688
income,2.081e-05,4.99e-06,4.174,0.000,1.1e-05,3.06e-05
balance,0.0056,0.000,24.835,0.000,0.005,0.006


In [41]:
def boot_fn(df, idx):
    bootstraped = df.iloc[idx]
    raw_X = bootstraped[["income","balance"]]
    y = bootstraped["default"].transform(lambda x : x == "Yes")
    design = MS(['income', 'balance'])
    X = design.fit_transform(raw_X)
    model = sm.Logit(y, X).fit()
    return model.params[1:]

In [75]:
def gen_bootstrap(dataset, n=None, seed=42):
    n = len(dataset) if n == None else n
    rng = np.random.default_rng(seed)
    idx = rng.choice(dataset.index, n, replace=True)
    return idx

In [76]:
params = []
for i in range(100):
    idx = gen_bootstrap(default, seed=i)
    try:
        param = boot_fn(default, idx)
        params.append(param)
    except:
        pass

Optimization terminated successfully.
         Current function value: 0.075669
         Iterations 10
Optimization terminated successfully.
         Current function value: 0.078323
         Iterations 10
Optimization terminated successfully.
         Current function value: 0.073743
         Iterations 10
Optimization terminated successfully.
         Current function value: 0.075895
         Iterations 10
Optimization terminated successfully.
         Current function value: 0.083063
         Iterations 10
Optimization terminated successfully.
         Current function value: 0.075904
         Iterations 10
Optimization terminated successfully.
         Current function value: 0.076002
         Iterations 10
Optimization terminated successfully.
         Current function value: 0.074152
         Iterations 10
Optimization terminated successfully.
         Current function value: 0.073475
         Iterations 10
Optimization terminated successfully.
         Current function value: 0.

In [77]:
len(params), np.mean(params, axis=0), np.std(params, axis=0)

(100,
 array([2.11433136e-05, 5.66866101e-03]),
 array([4.85114196e-06, 2.33184818e-04]))

In [82]:
# Pretty similar :
# 4.99e-06 vs 4.85114196e-06
# 0.000 vs 2.33184818e-04