In [22]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import cross_val_score, LeavePOut, StratifiedKFold
from sklearn.model_selection import ShuffleSplit
from sklearn.linear_model import LogisticRegression 
from sklearn.model_selection import train_test_split 
from sklearn.metrics import accuracy_score  
from tabulate import tabulate

In [2]:
iris = datasets.load_iris()
X = iris.data
y = iris.target
print(X.shape, y.shape)

(150, 4) (150,)


## Monte Carlo Cross Validation

In [3]:
clf = LogisticRegression(max_iter = 2000)  
clf.fit(X, y)

In [4]:
# Performing Shuffle Split cross-validation test  
shuffle_split = ShuffleSplit(test_size = 0.3, train_size = 0.5, n_splits = 10)  

In [5]:
# Printing accuracy scores  
print("Shuffle Split Cross Validation Scores are: \n", cross_val_score(clf, X, y, cv = shuffle_split)) 

Shuffle Split Cross Validation Scores are: 
 [0.88888889 0.97777778 0.93333333 0.91111111 0.95555556 0.97777778
 0.97777778 0.93333333 0.97777778 0.97777778]


In [6]:
score = cross_val_score(clf, X, y, cv = shuffle_split)
mean_score =score.mean()
print("Mean Cross Validation score is: ", mean_score)  

Mean Cross Validation score is:  0.9711111111111113


## Leave P Out Cross-Validation

In [7]:
lpo = LeavePOut(p = 2) 

In [8]:
# clf1 = LogisticRegression()  
# Printing accuracy scores  
print("Leave P Out Cross Validation Scores are: \n", cross_val_score(clf, X, y, cv = lpo)) 

Leave P Out Cross Validation Scores are: 
 [1. 1. 1. ... 1. 1. 1.]


In [9]:
scorelpo = cross_val_score(clf, X, y, cv = lpo)

In [10]:
score_lpo = scorelpo.mean()
print("Mean Cross Validation score is: ", score_lpo) 

Mean Cross Validation score is:  0.965413870246085


## Stratified 3-fold Cross Validation

In [11]:
clf2 = LogisticRegression(max_iter = 2000)  

In [12]:
stratified = StratifiedKFold(n_splits = 3)
# Printing accuracy scores  
print("Stratified 3-fold Cross Validation Scores are: \n", cross_val_score(clf2, X, y, cv = stratified)) 

Stratified 3-fold Cross Validation Scores are: 
 [0.98 0.96 0.98]


In [13]:
startified_score = cross_val_score(clf2, X, y, cv = stratified)

In [14]:
startified_score = startified_score.mean()
print("Mean Cross Validation score is: ", startified_score)

Mean Cross Validation score is:  0.9733333333333333


## Hold Out Cross-Validation 

In [15]:
clf3 = LogisticRegression(max_iter = 2000)  
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=41)  

In [16]:
clf3.fit(x_train,y_train)    
y_pred = clf3.predict(x_test)

In [17]:
acc =  accuracy_score(y_pred, y_test)

In [18]:
print("Accuracy score for the training dataset is: ", accuracy_score(clf3.predict(x_train), y_train))  
print("Accuracy score for the testing dataset is: ", accuracy_score(y_pred, y_test))

Accuracy score for the training dataset is:  0.9833333333333333
Accuracy score for the testing dataset is:  0.9333333333333333


## CV-Score Table

In [23]:
mydata = [
    ["Monte Carlo Cross Validation", mean_score],
    ["Leave P Out Cross-Validation", score_lpo],
    ["Stratified 3-fold Cross Validation", startified_score],
      ["Hold Out Cross-Validation", acc]
]
head = ["Cross-Validation Technique", "CV-Score"]
 
# display table
print(tabulate(mydata, headers=head, tablefmt="grid"))

+------------------------------------+------------+
| Cross-Validation Technique         |   CV-Score |
| Monte Carlo Cross Validation       |   0.971111 |
+------------------------------------+------------+
| Leave P Out Cross-Validation       |   0.965414 |
+------------------------------------+------------+
| Stratified 3-fold Cross Validation |   0.973333 |
+------------------------------------+------------+
| Hold Out Cross-Validation          |   0.933333 |
+------------------------------------+------------+


Straitified 3-fold Cross-Validation is better than others because of the following reasons:
<br>
1. In Monte Carlo Cross-Validation it gives a different score everytime we run it.
2. For Leave P Out Cross-Validation it takes longer time to execute than Stratified.
3. For Hold Out Cross-Validation it gives less score than Stratified.
</br>
<p>Also there is an imbalanced class distribution in the dataset Straitified 3-fold is more favoured one.