# K-Fold

In [None]:
import numpy as np
from sklearn.model_selection import KFold

X = np.array([[0, 11], 
              [1, 22], 
              [2, 33], 
              [3, 44]])

kf = KFold(n_splits=4, shuffle=True)

for X_train, X_test in kf.split(X):
    print(X_train)

# Stratified K-Fold

In [None]:
import numpy as np
from sklearn.model_selection import StratifiedKFold 

X = np.array([[0, 11], 
              [1, 22], 
              [2, 33], 
              [3, 44]])
y = np.array([0, 0, 1, 1])

skf = StratifiedKFold(n_splits=2, shuffle=True)

for X_train, X_test in skf.split(X,y):
    print(X_train)


## Train Test Split

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split 

X = np.array([[0, 11], 
              [1, 22], 
              [2, 33], 
              [3, 44]])
y = np.array([0, 0, 1, 1])

X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.5, stratify=y)

print(X_train)


# Grid Search

In [None]:
from sklearn import datasets
iris = datasets.load_iris()
gs_data = iris.data
gs_target = iris.target

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier

parameters = {'p':[1,2,3,4,5],
              'n_neighbors':[3,5,7,11,13,15,17]}
knn = KNeighborsClassifier()
gsc = GridSearchCV(knn, parameters)
gsc.fit(gs_data, gs_target)
gsc.cv_results_['mean_test_score']

In [None]:
gsc.cv_results_['params']

# Performance Metrics

In [None]:
y_pred = [0, 1, 0, 1]
y_true = [0, 1, 0, 0]

In [None]:
from sklearn.metrics import accuracy_score
acc = accuracy_score(y_true, y_pred)
acc

In [None]:
from sklearn.metrics import f1_score
f1 = f1_score(y_true, y_pred,)
f1

In [None]:
from sklearn.metrics import matthews_corrcoef 
mcc = matthews_corrcoef(y_true, y_pred)
mcc

# ROC & PRC Curves

In [None]:
#Loading data from the iris dataset to generate ROC/PRC Curves
from sklearn import svm, datasets
from sklearn.multiclass import OneVsRestClassifier
from sklearn.model_selection import train_test_split
iris = datasets.load_iris()
X = iris.data
y = iris.target

clf = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True,
                                 random_state=0))

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.95,
                                                    random_state=0)

y_score = clf.fit(X_train, y_train).decision_function(X_test)

#Converting multi-class labels to binary class labels
y_true_svc = (y_test == 1).astype(int)

#https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html?highlight=roc%20curve

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, precision_recall_curve, average_precision_score, auc
def plotROC(yscore, true, title=None, outfile=None):
    """Generates a receiver operating characteristic
        curve for the given prediction probabilities.
        
        Parameters
        ----------
        yscore : list of lists
        Probability scores.
        
        true : list of lists
        True labels.
        
        title : str
        The title of the confusion matrix.
        
        outfile : str
        The destination of the .pdf file generated.
        """
    fig = plt.figure()
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.0])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    if title:
        plt.title(title)
    
    fpr, tpr, _ = roc_curve(true, yscore[:,1])
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label='(ROC AUC = %0.2f)' % (roc_auc),linewidth=2)

    plt.legend(loc="lower right")
    
    if outfile:
        pdfplot = PdfPages(outfile);
        pdfplot.savefig(fig)
        pdfplot.close()
    

def plotPRC(yscore, true, title=None, outfile=None):
    """Generates a precision recall curve for the
        given prediction probabilities.
        
        Parameters
        ----------
        yscore : list of lists
        Probability scores.
        
        true : list of lists
        True labels.
        
        title : str
        The title of the confusion matrix.
        
        outfile : str
        The destination of the .pdf file generated.
        """
    
    fig = plt.figure()
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.0])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    if title:
        plt.title(title)
    
    precision, recall, _ =  precision_recall_curve(true, yscore[:,1])
    prc_auc = average_precision_score(true, yscore[:,1])
    plt.plot(recall, precision, label='(Avg. Precision = %0.2f)' % (prc_auc),linewidth=1)
    
    plt.legend(loc="lower right")
    if outfile:
        pdfplot = PdfPages(outfile);
        pdfplot.savefig(fig)
        pdfplot.close()

In [None]:
plotROC(y_score, y_true_svc)

In [None]:
plotPRC(y_score, y_true_svc)