# Model Validation Techniques

##### Evaluate using a train and a test set-

In [1]:
from pandas import read_csv
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [2]:
filename = '/pima-indians-diabetes.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = read_csv(filename, names=names)

In [3]:
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]

In [4]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33,random_state=7)

In [5]:
model = LogisticRegression(max_iter=400)
model.fit(X_train, Y_train)
result = model.score(X_test, Y_test)

In [6]:
Y_test

array([0., 1., 1., 0., 1., 1., 0., 1., 0., 0., 1., 0., 0., 1., 0., 0., 0.,
       0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1.,
       0., 0., 1., 0., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1., 1., 1., 0.,
       0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 1., 0., 1., 1., 1., 0., 1.,
       1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       1., 1., 1., 0., 0., 1., 0., 1., 0., 0., 1., 1., 0., 0., 0., 1., 0.,
       0., 0., 0., 1., 0., 0., 0., 1., 0., 1., 1., 0., 1., 0., 0., 0., 1.,
       1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 1., 0., 0., 1., 0.,
       1., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 1., 0., 0., 1., 1.,
       0., 0., 1., 1., 1., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 1.,
       0., 1., 1., 0., 1., 0., 0., 1., 1., 0., 0., 0., 0., 0., 1., 0., 0.,
       0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 1., 0., 1., 0., 0.,
       1., 0., 0., 0., 0., 0., 1., 1., 0., 0., 1., 0., 0., 1., 0., 1., 0.,
       0., 1., 0., 1., 0.

In [7]:
result*100

78.74015748031496

##### Evaluate using K-Fold Cross Validation-

In [8]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression

In [9]:
kfold = KFold(n_splits=5)

In [10]:
model1 = LogisticRegression(max_iter=400)
result1 = cross_val_score(model1, X, Y, cv=kfold)

In [11]:
# accuracy of each iteration
result1

array([0.77272727, 0.72077922, 0.76623377, 0.82352941, 0.77124183])

In [12]:
# average accuracy of all iterations
result1.mean()*100

77.0902300314065

In [13]:
result1.std()*100

3.2579677643937455

##### Evaluate using Leave One Out Cross Validation-

In [14]:
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression

In [15]:
loocv = LeaveOneOut()
model2 = LogisticRegression(max_iter=400)
result2 = cross_val_score(model2, X, Y, cv=loocv)

In [16]:
result2.mean()*100

77.60416666666666

In [17]:
result2.std()*100

41.68944689773287

In [18]:
dataframe

Unnamed: 0,preg,plas,pres,skin,test,mass,pedi,age,class
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1
