In [None]:
"""
感知器算法的自己实现，这个不如lr重要
"""
import numpy as np
import pandas as pd
from sklearn import datasets
import math
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import random
from sklearn.metrics import accuracy_score

cancer = datasets.load_breast_cancer()

df_X = pd.DataFrame(cancer.data, columns=cancer.feature_names)
df_y = pd.DataFrame(cancer.target, columns=["target"])

In [None]:
class Perceptron(object):
    def __init__(self, iteration, feature_num):
        self.lr = 0.001
        self.max_iteration = iteration
        self.w = np.random.normal(0, 0.1, feature_num + 1)

    def predict_single(self, x):
        output = np.dot(self.w, np.array(x))
        return int(output > 0)

    def train(self, features, labels):
        # 权重初始化，包括偏置
        cur_iter = 0
        while True:
            if (cur_iter > self.max_iteration):
                break
            cur_iter += 1
            # 随机挑选一个sample
            index = random.randint(0, len(labels) - 1)
            cur_features = list(features[index])
            # 偏置上的总是x总是1
            cur_features.append(1.0)
            y = 2 * labels[index] - 1
            x = np.array(cur_features)
            output = np.dot(self.w, x)
            if (output > 0):
                yhat = 1
            else:
                yhat = -1
            if yhat * y > 0:
                continue
            else:
                for i in range(0, len(self.w)):
                    """
                    感知器算法中，loss函数是无法对w求导的，或者说无法表达成函数的形式。
                    在感知器算法中，如果wx > 0, 则yhat = 1, 反之yhat = -1
                    如果yhat = 1, y = 1或者yhat =-1, y = -1, 则预测正确，不用调整权重。
                    如果yaht = 1, y = -1, 代表新的权重需要降低wx的值，假设x[i] > 0, y - yhat < 0, 此时learning_step * (y - yhat) * x[i] < 0, 所以w[i]会减小，wx也会减小，满足优化的方向。
                    同理可以分析x[i] < 0的情况。
                    对于一个训练例子更新所有的权重
                    """
                    self.w[i] += self.lr * (y - yhat) * x[i]

    def predict_batch(self, features):
        labels = []
        for feature in features:
            x = list(feature)
            x.append(1)
            labels.append(self.predict_single(x))
        return labels

In [None]:
np_X = df_X.to_numpy()
np_y = df_y["target"].to_numpy()
scaler = StandardScaler()
# np_X_normal = scaler.fit_transform(np_X)
# df_np_normal = pd.DataFrame(np_X_normal, columns=cancer.feature_names)
# df_np_normal.head(5)
np_X = scaler.fit_transform(np_X)


train_features, test_features, train_labels, test_labels = train_test_split(np_X, np_y, test_size=0.2, random_state=23323)

perceptron = Perceptron(100, np_X.shape[1])
perceptron.train(train_features, train_labels)
predict_labels = perceptron.predict_batch(test_features)
print(predict_labels)
print(perceptron.w)
print(accuracy_score(test_labels, predict_labels))


perceptron2 = Perceptron(1000, np_X.shape[1])
perceptron2.train(train_features, train_labels)
predict_labels2 = perceptron2.predict_batch(test_features)
print(predict_labels)
print(perceptron2.w)
print(accuracy_score(test_labels, predict_labels2))