#### Cross Validation Classification Accuracy

In [11]:
import pandas as pd
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression

**Dataset** <br>
Pregnancies - Number of times pregnant <br>
Glucose - Plasma glucose concentration a 2 hours in an oral glucose tolerance test<br>
BloodPressure - Diastolic blood pressure (mm Hg)<br>
SkinThickness - Triceps skin fold thickness (mm)<br>
Insulin - 2-Hour serum insulin (mu U/ml)<br>
BMI - Body mass index (weight in kg/(height in m)^2)<br>
DiabetesPedigree - FunctionDiabetes pedigree function<br>
Age - Age (years)<br>
Outcome - Class variable (0 or 1)

In [73]:
dataframe = pd.read_csv('Pima_India_Diabetes.csv')#, names=names)
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]

In [13]:
seed = 7
kfold = model_selection.KFold(n_splits=10, random_state=seed)

In [49]:
myModel = LogisticRegression()
scoring = 'accuracy'
results = model_selection.cross_val_score(myModel, X, Y, cv=kfold, scoring=scoring)
print(("Accuracy: %.3f (SD:%.3f)") % (results.mean()*100, results.std()))

Accuracy: 76.951 (SD:0.048)


#### Confusion Matrix

In [64]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [65]:
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=.3, random_state=seed)
model = LogisticRegression()
model.fit(X_train, Y_train)
predicted = model.predict(X_test)
cm = confusion_matrix(Y_test, predicted)
print(cm)

[[130  17]
 [ 38  46]]


In [66]:
print('Accuracy: %.2f' % (cm.diagonal().sum()*100/cm.sum()))

Accuracy: 76.19


In [67]:
report = classification_report(Y_test, predicted)
print(report)

             precision    recall  f1-score   support

        0.0       0.77      0.88      0.83       147
        1.0       0.73      0.55      0.63        84

avg / total       0.76      0.76      0.75       231



#### AUC

In [70]:
#AUC - Area Under the Curve
scoring = 'roc_auc'
results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring)
print("AUC: %.3f (%.3f)" % (results.mean(), results.std()))

AUC: 0.823 (0.041)
