In [72]:
from sklearn import datasets
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn import linear_model, svm, neighbors
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [62]:
iris = datasets.load_iris()

In [63]:
headers = iris.feature_names
X = iris.data
y = iris.target
df = pd.DataFrame(iris.data, columns=headers)
df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [57]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=42)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(105, 4) (105,)
(45, 4) (45,)


In [58]:
lg = linear_model.LogisticRegression()
lg.fit(X_train, y_train)
y_pred = lg.predict(X_test)
print(y_pred)

[1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0 0 0 1 0 0 2 1
 0 0 0 2 1 1 0 0]


In [59]:
accuracy = accuracy_score(y_test, y_pred)
accuracy

1.0

In [64]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = 0.25, random_state=42)


print(X_train.shape, y_train.shape)
print(X_val.shape, y_val.shape)
print(X_test.shape, y_test.shape)

(90, 4) (90,)
(30, 4) (30,)
(30, 4) (30,)


In [71]:
lg = linear_model.LogisticRegression()
svc = svm.SVC()
knn = neighbors.KNeighborsClassifier()
models = [lg.fit(X_train, y_train), svc.fit(X_train, y_train), knn.fit(X_train, y_train)]
best_acc = 0.0
best_model = lg
for model in models:
    y_pred = model.predict(X_val)
    acc = accuracy_score(y_val, y_pred)
    print(model, ', accracy: ', acc)
    if best_acc > acc:
        best_model = model
        best_acc = acc
y_pred = best_model.predict(X_test)
print(accuracy_score(y_test, y_pred))

LogisticRegression() , accracy:  0.9666666666666667
SVC() , accracy:  0.9333333333333333
KNeighborsClassifier() , accracy:  0.9333333333333333
1.0


In [77]:
pipeline = make_pipeline(StandardScaler(), linear_model.LogisticRegression())
strtfdKFold = StratifiedKFold(n_splits=10)
kfold = strtfdKFold.split(X, y)
scores = []
for k, (train, test) in enumerate(kfold):
    pipeline.fit(X[train,:], y[train])
    score = pipeline.score(X[test, :], y[test])
    scores.append(score)
    print('Fold: %2d, Training/Test Split Distribution: %s, Accuracy: %.3f' % (k+1, np.bincount(y[train]), score))
print('\n\nCross-Validation accuracy: %.3f +/- %.3f' %(np.mean(scores), np.std(scores)))

Fold:  1, Training/Test Split Distribution: [45 45 45], Accuracy: 1.000
Fold:  2, Training/Test Split Distribution: [45 45 45], Accuracy: 0.933
Fold:  3, Training/Test Split Distribution: [45 45 45], Accuracy: 1.000
Fold:  4, Training/Test Split Distribution: [45 45 45], Accuracy: 1.000
Fold:  5, Training/Test Split Distribution: [45 45 45], Accuracy: 0.933
Fold:  6, Training/Test Split Distribution: [45 45 45], Accuracy: 0.933
Fold:  7, Training/Test Split Distribution: [45 45 45], Accuracy: 0.800
Fold:  8, Training/Test Split Distribution: [45 45 45], Accuracy: 1.000
Fold:  9, Training/Test Split Distribution: [45 45 45], Accuracy: 1.000
Fold: 10, Training/Test Split Distribution: [45 45 45], Accuracy: 1.000


Cross-Validation accuracy: 0.960 +/- 0.061


In [78]:
pipeline = make_pipeline(StandardScaler(), linear_model.LogisticRegression())
strtfdKFold = StratifiedKFold(n_splits=10)
scores = cross_val_score(pipeline, X, y, cv=10, n_jobs=1)
print('Cross Validation accuracy scores: %s' % scores)
print('Cross Validation accuracy: %.3f +/- %.3f' % (np.mean(scores),np.std(scores)))

Cross Validation accuracy scores: [1.         0.93333333 1.         1.         0.93333333 0.93333333
 0.8        1.         1.         1.        ]
Cross Validation accuracy: 0.960 +/- 0.061
