# load data

In [13]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

iris = load_iris()
X_iris, y_iris = iris.data, iris.target
X, y = X_iris[:, :2], y_iris
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=33)

# preprocess data

In [15]:
from sklearn import preprocessing

scalar = preprocessing.StandardScaler()
X_train = scalar.fit_transform(X_train)
X_test = scalar.fit_transform(X_test)

# learn data

In [18]:
from sklearn.linear_model import SGDClassifier

clf = SGDClassifier()
clf.fit(X_train, y_train)

SGDClassifier(alpha=0.0001, average=False, class_weight=None,
              early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
              l1_ratio=0.15, learning_rate='optimal', loss='hinge',
              max_iter=1000, n_iter_no_change=5, n_jobs=None, penalty='l2',
              power_t=0.5, random_state=None, shuffle=True, tol=0.001,
              validation_fraction=0.1, verbose=0, warm_start=False)

# predict data

In [24]:
from sklearn import metrics

# test in train_set
y_train_predict = clf.predict(X_train)
print(metrics.classification_report(y_train, y_train_predict, target_names=iris.target_names))

# test in test_set
y_test_predict = clf.predict(X_test)
print(metrics.classification_report(y_test, y_test_predict, target_names=iris.target_names))

              precision    recall  f1-score   support

      setosa       1.00      0.98      0.99        42
  versicolor       0.80      0.21      0.33        39
   virginica       0.49      0.97      0.65        31

    accuracy                           0.71       112
   macro avg       0.76      0.72      0.66       112
weighted avg       0.79      0.71      0.66       112

              precision    recall  f1-score   support

      setosa       0.89      1.00      0.94         8
  versicolor       0.75      0.27      0.40        11
   virginica       0.68      0.89      0.77        19

    accuracy                           0.74        38
   macro avg       0.77      0.72      0.70        38
weighted avg       0.74      0.74      0.70        38



# KFold cross validation score (*)

In [32]:
from sklearn.model_selection import cross_val_score, KFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
import numpy as np

clf = Pipeline([('scaler', StandardScaler()), ('sgd_classifier', SGDClassifier())])
scores = cross_val_score(clf, X, y, cv=5)
print(scores)
print(np.mean(scores), np.std(scores))

[0.73333333 0.66666667 0.76666667 0.5        0.8       ]
0.6933333333333334 0.10624918300339486
