In [19]:
# 取資料

from sklearn import datasets
import numpy as np

iris = datasets.load_iris()
X = iris.data[:, [2, 3]] # 只拿最後2維的特徵資料
y = iris.target

X.shape, y.shape


((150, 2), (150,))

In [20]:
# 分資料

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=0)

y_train.shape, y_test.shape


((105,), (45,))

In [32]:
# 訓練(fit)、辨識(pred)

from sklearn.linear_model import Perceptron
#from sklearn.linear_model import SGDClassifier  # Perceptron 的兄弟

ppn= Perceptron(n_iter=40, eta0=0.1, random_state=0)
ppn.fit(X_train, y_train)
y_pred= ppn.predict(X_test)

ppn, y_pred

(Perceptron(alpha=0.0001, class_weight=None, eta0=0.1, fit_intercept=True,
       n_iter=40, n_jobs=1, penalty=None, random_state=0, shuffle=True,
       verbose=0, warm_start=False),
 array([2, 0, 0, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 0, 2, 2, 0,
        0, 2, 0, 0, 2, 0, 0, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 0, 0]))

In [33]:
# 辨識率 (acc)

from sklearn.metrics import accuracy_score
acc= accuracy_score(y_test, y_pred)

acc # 辨識率



0.59999999999999998

# 畫圖、分析、討論

有點複雜，
很有趣的作圖，
值得研究一下。


In [34]:
from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt
import warnings


def versiontuple(v):
    return tuple(map(int, (v.split("."))))


def plot_decision_regions(X, y, classifier, test_idx= None, resolution= 0.02):

    # setup marker generator and color map
    markers = ('s', 'x', 'o', '^', 'v')
    colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
    cmap = ListedColormap(colors[:len(np.unique(y))])

    # plot the decision surface
    x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
                           np.arange(x2_min, x2_max, resolution))
    
    Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
    
    Z = Z.reshape(xx1.shape)
    plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap)
    plt.xlim(xx1.min(), xx1.max())
    plt.ylim(xx2.min(), xx2.max())

    for idx, cl in enumerate(np.unique(y)):
        plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1],
                    alpha=0.8, c=cmap(idx),
                    marker=markers[idx], label=cl)

    # highlight test samples
    if test_idx:
        # plot all samples
        if not versiontuple(np.__version__) >= versiontuple('1.9.0'):
            X_test, y_test = X[list(test_idx), :], y[list(test_idx)]
            warnings.warn('Please update to NumPy 1.9.0 or newer')
        else:
            X_test, y_test = X[test_idx, :], y[test_idx]

        plt.scatter(X_test[:, 0],
                    X_test[:, 1],
                    c='',
                    alpha=1.0,
                    linewidths=1,
                    marker='o',
                    s=55, label='test set')

X_combined= np.vstack((X_train, X_test))
y_combined= np.hstack((y_train, y_test))

plot_decision_regions(X= X_combined, 
                      y= y_combined,
                      classifier= ppn, 
                      test_idx= range(105, 150))

plt.xlabel('petal length')
plt.ylabel('petal width')
plt.legend(loc='upper left')

plt.tight_layout()
# plt.savefig('./figures/iris_perceptron_scikit.png', dpi=300)
plt.show()

# 初步做出來，但辨識效果不佳，簡單改進一下。
## 把特徵值標準化(正規化)

In [112]:
## Standardizing the features: 

    
from sklearn.preprocessing import StandardScaler

sc= StandardScaler()
sc.fit(X_train)

X_train_std= sc.transform(X_train)
X_test_std=  sc.transform(X_test)

ppn2= Perceptron(n_iter=40, eta0=0.1, random_state=0)
ppn2.fit(X_train_std, y_train)
y_pred_std= ppn2.predict(X_test_std)
acc_std= accuracy_score(y_test, y_pred_std)

acc_std # 辨識率


0.91111111111111109

In [None]:
#
# 實驗一下改變 這些參數: n_iter=40, eta0=0.1, random_state=0
# 皆對辨識率造成影響！
#
# 知道原理可進一步理解修改參數增加辨識率的方向。
# 尚不知原理時，也可地毯式、有系統的改變可調整的參數，使得辨識率最佳化。
#

# 習題：

把數字資料庫拿來做做看。
看看 0~9 等10個數字初步可辨識到如何的辨識率。

