In [5]:
from sklearn.datasets import make_classification
import matplotlib.pyplot as plt
X, y = make_classification(
    n_samples=1000,        # 100k samples (big data)
    n_features=100,          # 100 features (high dimensional)
    n_informative=10,        # 10 informative features
    n_redundant=10,          # 10 redundant (correlated) features11
    n_clusters_per_class=3,  # multiple clusters per class (complex boundaries)
    weights=[0.9, 0.1],      # class imbalance: 90% class 0, 10% class 1
    flip_y=0.05,             # 5% label noise
    random_state=42
)

(1000, 100)

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)

from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier()
dt.fit(X_train,y_train)
y_pred = dt.predict(X_test)
print("accuracy :",accuracy_score(y_pred,y_test))
print("confusion_matrix \n:",confusion_matrix(y_pred,y_test))
print("recall_score \n:",recall_score(y_pred,y_test))
print("precision_score \n:",precision_score(y_pred,y_test))
print("f1_score \n:",f1_score(y_pred,y_test))

accuracy : 0.85
confusion_matrix 
: [[163  13]
 [ 17   7]]
recall_score 
: 0.2916666666666667
precision_score 
: 0.35
f1_score 
: 0.31818181818181823


In [3]:
#bagging with replacement  === bootstrap = true
from sklearn.ensemble import BaggingClassifier
basemodel = BaggingClassifier(estimator=dt,n_estimators=100,bootstrap=True)
basemodel.fit(X_train,y_train)
y_pred = basemodel.predict(X_test)
print("accuracy :",accuracy_score(y_pred,y_test))
print("confusion_matrix \n:",confusion_matrix(y_pred,y_test))
print("recall_score \n:",recall_score(y_pred,y_test))
print("precision_score \n:",precision_score(y_pred,y_test))
print("f1_score \n:",f1_score(y_pred,y_test))

accuracy : 0.91
confusion_matrix 
: [[178  16]
 [  2   4]]
recall_score 
: 0.6666666666666666
precision_score 
: 0.2
f1_score 
: 0.30769230769230765


In [4]:
#bagging with without replacement  === bootstrap = False or bydefault
from sklearn.ensemble import BaggingClassifier
basemodel = BaggingClassifier(estimator=dt,n_estimators=100,bootstrap=False)
basemodel.fit(X_train,y_train)
y_pred = basemodel.predict(X_test)
print("accuracy :",accuracy_score(y_pred,y_test))
print("confusion_matrix \n:",confusion_matrix(y_pred,y_test))
print("recall_score \n:",recall_score(y_pred,y_test))
print("precision_score \n:",precision_score(y_pred,y_test))
print("f1_score \n:",f1_score(y_pred,y_test))

accuracy : 0.875
confusion_matrix 
: [[168  13]
 [ 12   7]]
recall_score 
: 0.3684210526315789
precision_score 
: 0.35
f1_score 
: 0.358974358974359
