## K-fold cross validation

In [106]:
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import KFold

In [90]:
df=pd.read_csv("diabetes.csv")

In [91]:
df.head()
df.isnull().sum()

glucose          0
bloodpressure    0
diabetes         0
dtype: int64

In [92]:
x=df.drop('diabetes',axis=1)
y=df['diabetes']

In [93]:
model=GaussianNB()

In [94]:
kf = KFold(n_splits=5)

In [95]:
accuracies = []
precisions = []
recalls = []
fscores = []

In [96]:
for train_index , test_index in kf.split(x):
    x_train , x_test = x.iloc[train_index,:],x.iloc[test_index,:]
    y_train , y_test = y[train_index] , y[test_index]
    
    #train the model
    
    model.fit(x_train, y_train)
    
    # do prediction
    
    y_pred = model.predict(x_test)
    
    #get scores
    
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    fscore = f1_score(y_test, y_pred)
    
    
    # append the results to the lists

    accuracies.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)
    fscores.append(fscore)


In [100]:
print ("ACCURACY FOR 5 FOLDS")
print(accuracies)

ACCURACY FOR 5 FOLDS
[0.9597989949748744, 0.964824120603015, 0.914572864321608, 0.8994974874371859, 0.9246231155778895]


In [104]:
print ("PRECISION FOR 5 FOLDS")
print(precisions)

PRECISION FOR 5 FOLDS
[0.946236559139785, 0.9553571428571429, 0.9183673469387755, 0.9326923076923077, 0.8936170212765957]


In [102]:
print ("RECALL FOR 5 FOLDS")
print(recalls)

ACCURACY FOR 5 FOLDS
[0.9597989949748744, 0.964824120603015, 0.914572864321608, 0.8994974874371859, 0.9246231155778895]


In [105]:
print ("F-SCORE FOR 5 FOLDS")
print(fscores)

F-SCORE FOR 5 FOLDS
[0.9565217391304348, 0.9683257918552036, 0.9137055837563451, 0.9065420560747663, 0.9180327868852459]
