In [9]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, scale
from sklearn.utils import shuffle

x_data = pd.read_csv('data/X_data.csv').to_numpy()
y_label = pd.read_csv('data/y_label.csv').to_numpy()

x_data.shape, y_label.shape

((4999, 400), (4999, 1))

In [16]:
def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))


class NN:
    def __init__(self, x, y, classes, hidden_features):
        self.x = x
        self.y = y
        self.classifications = classes
        self.samples, self.features = x.shape
        self.hidden_features = hidden_features

        self.weights1 = np.random.rand(self.features, self.hidden_features)
        self.weights2 = np.random.rand(self.hidden_features, self.classifications)
        self.bias1 = 1
        self.bias2 = 1

        self.net_hidden = None
        self.sigmoid_hidden = None
        self.net_out = None
        self.sigmoid_out = None

    def fit(self, epochs=10000, eta=1e-2):
        print("--------------训练开始--------------")
        max_score = 0
        for i in np.arange(1, epochs + 1):
            self.x, self.y = shuffle(self.x, self.y)
            self._forward_propagation()
            self._back_propagation(eta)

            s = self.score()
            if s > max_score:
                max_score = s

            if i % 10 == 0:
                print(i, self._calculate_square_error(), s)
        print("最好的准确率:", max_score)

    def predict(self, x_test):
        self._forward_propagation(x_test)
        pred_zeros = np.zeros((self.samples, self.classifications))
        for i in range(len(self.sigmoid_out)):
            pred_zeros[i, np.argmax(self.sigmoid_out[i])] = 1
        return pred_zeros

    def score(self, x_test=None, y_test=None):
        if x_test is None and y_test is None:
            x_test, y_test = self.x, self.y
        pred = [np.argmax(i) for i in self.predict(x_test)]
        label = [np.argmax(i) for i in y_test]
        s = 0.0
        for i in range(len(pred)):
            if label[i] == pred[i]:
                s += 1
        return s / x_test.shape[0]

    def _forward_propagation(self, x_test=None):
        """前向传播"""
        # 输入层---->隐含层
        self.net_hidden = np.dot(self.x if x_test is None else x_test, self.weights1) + self.bias1
        self.sigmoid_hidden = sigmoid(self.net_hidden)
        # 隐含层---->输出层
        self.net_out = np.dot(self.sigmoid_hidden, self.weights2) + self.bias2
        self.sigmoid_out = sigmoid(self.net_out)

    def _back_propagation(self, eta):
        """反向传播"""
        delta_out = -(self.y - self.sigmoid_out) * (self.sigmoid_out * (1 - self.sigmoid_out))
        theta_weight2 = np.dot(self.sigmoid_hidden.T, delta_out)

        delta_hidden = np.sum(np.concatenate(
            [(delta_out * self.weights2[i]).reshape(self.samples, 1, self.classifications) for i in
             range(self.hidden_features)], 1), axis=2) * (self.sigmoid_hidden * (1 - self.sigmoid_hidden))
        theta_weight1 = np.dot(self.x.T, delta_hidden)

        self.weights1 -= eta * theta_weight1  # 隐含层---->输出层的偏置项更新
        self.bias1 -= eta * np.sum(delta_hidden)  # 隐含层---->输出层的权值更新
        self.weights2 -= eta * theta_weight2  # 输入层---->隐含层的偏置项更新
        self.bias2 -= eta * np.sum(delta_out)  # 输入层---->隐含层的权值更新

    def _calculate_square_error(self, x_test=None, y_test=None):
        if x_test is None and y_test is None:
            x_test, y_test = self.x, self.y
        return 0.5 * np.sum((y_test - self.sigmoid_out) ** 2)

In [17]:
np.random.seed(64)
x_data = scale(x_data)
y_label_oh = OneHotEncoder(sparse=False).fit_transform(y_label)

nn = NN(x_data, y_label_oh, 10, 25)
print("未训练的模型预测准确率:", nn.score())

未训练的模型预测准确率: 0.135627125425085


In [18]:
nn.fit(eta=0.001)
print("训练后的模型预测准确率:", nn.score())

--------------训练开始--------------
10 7859.210756060044 0.15783156631326264
20 2445.1743125222765 0.16463292658531706
30 2409.724047762197 0.16463292658531706
40 2394.912117960067 0.16883376675335068
50 2387.9367791223062 0.17263452690538109
60 2377.062765165369 0.1764352870574115
70 2367.879885889522 0.1770354070814163
80 2366.0733119213915 0.17863572714542908
90 2364.4518007141996 0.1822364472894579
100 2362.659242653648 0.1842368473694739
110 2360.6661839859926 0.1868373674734947
120 2358.518400149572 0.1884376875375075
130 2356.34093446764 0.19243848769753952
140 2354.1368675856897 0.1956391278255651
150 2351.758257366321 0.2012402480496099
160 2349.103613603336 0.20724144828965793
170 2346.113671368686 0.20884176835367074
180 2342.7252440532834 0.21004200840168033
190 2338.865969160748 0.21404280856171234
200 2334.450291120498 0.21824364872974594
210 2329.375580403336 0.22264452890578115
220 2323.506264407666 0.22564512902580516
230 2316.657771660819 0.23024604920984196
240 2308.613