In the example below we use the handy StratifiedKFold class 1 from the scikit-learn Python
machine learning library to split up the training dataset into 10 folds. The folds are stratified,
meaning that the algorithm attempts to balance the number of instances of each class in each
fold. The example creates and evaluates 10 models using the 10 splits of the data and collects
all of the scores.

In [11]:

from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import StratifiedKFold
import numpy

In [12]:
seed = 7
numpy.random.seed(seed)
dataset = numpy.loadtxt("pima-indians-diabetes.csv", delimiter=",")

In [13]:
X=dataset[:,0:8] #input
Y=dataset[:,8] #output

In [14]:
#10 fold cross validation
kfold = StratifiedKFold(n_splits=10,shuffle=True,random_state=seed)

In [15]:
#to calulate accuracy
cvscores=[]

In [16]:
#evaluates 10 models using the 10 splits of the data 
# and collects all of the scores.
for train,test in kfold.split(X,Y):
    #create model
    model = Sequential()
    model.add(Dense(12,input_dim=8,init='uniform',activation='relu')) #layer 1 - input
    model.add(Dense(8,init='uniform',activation='relu')) #layer 2
    model.add(Dense(1,init='uniform',activation='sigmoid')) #layer 3 - output
    
    #compile model
    model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
    
    #fit model
    model.fit(X[train],Y[train],nb_epoch=150,batch_size=10,verbose=0)
    
    #evaluate the score
    scores = model.evaluate(X[test],Y[test],verbose=0)
    print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
    cvscores.append(scores[1] * 100)
    


acc: 72.73%
acc: 72.73%
acc: 76.62%
acc: 79.22%
acc: 77.92%
acc: 71.43%
acc: 77.92%
acc: 68.83%
acc: 69.74%
acc: 75.00%


In [17]:
print("%.2f%% (+/- %.2f%%)" % (numpy.mean(cvscores), numpy.std(cvscores)))

74.21% (+/- 3.47%)
