In [6]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

In [7]:
X, y = make_classification(
    n_samples=1000,
    n_features=10,
    n_informative=8,
    n_redundant=2,
    n_repeated=0,
    n_classes=2,
    random_state=42
)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [8]:
model_lr = LogisticRegression()
model_lr.fit(X_train, y_train)

y_pred = model_lr.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.73      0.65      0.69       130
           1       0.66      0.74      0.70       120

    accuracy                           0.70       250
   macro avg       0.70      0.70      0.70       250
weighted avg       0.70      0.70      0.70       250



In [14]:
from sklearn.model_selection import KFold

kf = KFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    model_lr = LogisticRegression()
    model_lr.fit(X_train, y_train)
    print(model_lr.score(X_test, y_test))

    # y_pred = model_lr.predict(X_test)
    # print(classification_report(y_test, y_pred))

0.675
0.715
0.72
0.645
0.72


### Evaluate Logistic Regression

In [15]:
from sklearn.model_selection import cross_val_score

scores_lg = cross_val_score(LogisticRegression(), X, y, cv=kf)
np.average(scores_lg)

0.6950000000000001

### Evaluate Decision Tree

In [16]:
from sklearn.tree import DecisionTreeClassifier

scores_dt = cross_val_score(DecisionTreeClassifier(), X, y, cv=kf)
np.average(scores_dt)

0.784

In [17]:
### Evaluate Ramdom Forest Classifer

In [18]:
from sklearn.ensemble import RandomForestClassifier

scores_rf = cross_val_score(RandomForestClassifier(n_estimators=40), X, y, cv=kf)
np.average(scores_rf)

0.893

In [19]:
scores_rf = cross_val_score(RandomForestClassifier(n_estimators=40), X, y, cv=kf, scoring='roc_auc')
np.average(scores_rf)

0.9511150756681174

In [20]:
from sklearn.model_selection import cross_validate

cross_validate(RandomForestClassifier(n_estimators=40), X, y, cv=kf, scoring=['accuracy', 'roc_auc'])

{'fit_time': array([0.26354122, 0.40298629, 0.23116398, 0.29280853, 0.41118717]),
 'score_time': array([0.02406096, 0.01981044, 0.02697015, 0.02301073, 0.04688549]),
 'test_accuracy': array([0.865, 0.91 , 0.87 , 0.88 , 0.89 ]),
 'test_roc_auc': array([0.93145323, 0.94692877, 0.95513205, 0.95888421, 0.95890205])}