In [1]:
import numpy as np
import pandas as pd

In [5]:
from sklearn.datasets import make_classification
X,y = make_classification(
    n_features = 10,
    n_samples = 1000,
    n_informative = 4,
    n_repeated = 0,
    n_classes = 2,
    random_state = 42)

In [6]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25,random_state=42)

In [7]:
from sklearn.linear_model import LogisticRegression
model_log = LogisticRegression()
model_log.fit(X_train,y_train)
y_pred = model_log.predict(X_test)
from sklearn.metrics import classification_report
report = classification_report(y_test,y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.70      0.72      0.71       124
           1       0.72      0.70      0.71       126

    accuracy                           0.71       250
   macro avg       0.71      0.71      0.71       250
weighted avg       0.71      0.71      0.71       250



## now we are using kfold cross validation

In [9]:
from sklearn.model_selection import KFold
kf = KFold(n_splits=5,shuffle=True,random_state=42)
for train_index,test_index in kf.split(X,y):
    X_train,X_test = X[train_index],X[test_index]
    y_train,y_test = y[train_index],y[test_index]
    print(model_log.score(X_test,y_test))


0.7
0.7
0.735
0.665
0.73


### these above lines of code can be reduced using the sk library cross val score

## these codes the logistic regression with accuracy and roc_auc curve

In [11]:
from sklearn.model_selection import cross_val_score
score_log=cross_val_score(LogisticRegression(),X,y,cv=kf)
print(score_log)
print(np.average(score_log))

[0.695 0.7   0.725 0.66  0.73 ]
0.7020000000000001


In [12]:
from sklearn.model_selection import cross_val_score
score_log=cross_val_score(LogisticRegression(),X,y,cv=kf,scoring='roc_auc')
print(score_log)
print(np.average(score_log))

[0.74359744 0.77287729 0.79201281 0.73829532 0.76378741]
0.7621140530059585


## these codes the decision with accuracy and roc_auc curve

In [27]:
from sklearn.tree import DecisionTreeClassifier
score_dt=cross_val_score(DecisionTreeClassifier(criterion='entropy'),X,y,cv=kf)
print(score_dt)
print(np.average(score_dt))

[0.87  0.87  0.89  0.895 0.88 ]
0.881


In [28]:
from sklearn.tree import DecisionTreeClassifier
score_dt=cross_val_score(DecisionTreeClassifier(criterion='gini'),X,y,cv=kf)
print(score_dt)
print(np.average(score_dt))

[0.88  0.89  0.875 0.83  0.88 ]
0.8710000000000001


In [20]:
from sklearn.tree import DecisionTreeClassifier
score_dt=cross_val_score(DecisionTreeClassifier(),X,y,cv=kf,scoring='roc_auc')
print(score_dt)
print(np.average(score_dt))

[0.87464986 0.86938694 0.87413672 0.83053221 0.89140226]
0.8680215993219157


## these codes the random forest with accuracy and roc_auc curve


In [29]:
from sklearn.ensemble import RandomForestClassifier
score_rand = cross_val_score(RandomForestClassifier(),X,y,cv=kf)
print(score_rand)
print(np.average(score_rand))

[0.905 0.935 0.94  0.9   0.9  ]
0.916


In [30]:
from sklearn.ensemble import RandomForestClassifier
score_rand = cross_val_score(RandomForestClassifier(),X,y,cv=kf,scoring='roc_auc')
print(score_rand)
print(np.average(score_rand))

[0.97113846 0.97479748 0.97402662 0.97338936 0.96636973]
0.9719443295186947


### now we are using cross validate library both are same but here we can pass mutiple score parameters

In [25]:
from sklearn.model_selection import cross_validate
cross_validate(LogisticRegression(),X,y,cv=kf,scoring=['accuracy','roc_auc'])

{'fit_time': array([0.01479936, 0.00999689, 0.01182795, 0.01054239, 0.01042962]),
 'score_time': array([0.00300241, 0.00299954, 0.00299931, 0.00301409, 0.00359154]),
 'test_accuracy': array([0.695, 0.7  , 0.725, 0.66 , 0.73 ]),
 'test_roc_auc': array([0.74359744, 0.77287729, 0.79201281, 0.73829532, 0.76378741])}

In [26]:
cross_validate(DecisionTreeClassifier(),X,y,scoring=['accuracy','roc_auc'])

{'fit_time': array([0.01100016, 0.00998497, 0.00899887, 0.00823808, 0.0093174 ]),
 'score_time': array([0.00299597, 0.00300074, 0.00300694, 0.00300002, 0.00337648]),
 'test_accuracy': array([0.86 , 0.9  , 0.855, 0.835, 0.85 ]),
 'test_roc_auc': array([0.859986, 0.9     , 0.855   , 0.835   , 0.85    ])}

In [31]:
cross_validate(RandomForestClassifier(),X,y,scoring=['accuracy','roc_auc'])

{'fit_time': array([0.43010211, 0.49112725, 0.48757672, 0.44298625, 0.46902585]),
 'score_time': array([0.03205872, 0.03175616, 0.03729963, 0.03298402, 0.03454041]),
 'test_accuracy': array([0.94 , 0.925, 0.915, 0.9  , 0.92 ]),
 'test_roc_auc': array([0.97949795, 0.97045   , 0.9714    , 0.95005   , 0.9587    ])}