In [None]:
from sklearn.utils import resample
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
import numpy as np
import os
import pandas as pd


# テキストファイルをnumpy配列に読み込み
all_data = np.loadtxt("claim_toiawase_data_python.txt")
#numpy配列をDataFrame型に変換(でーたを見やすくするため)
all_data_pd = pd.DataFrame(all_data)
#DataFrame型の列名を設定
all_data_pd.columns = ["seq", "Imp", "Imp_val", "m_sum_neg", "c_neg", "m_sum_all", "m_sum_nzr", "Mag", "c_nzr", "c_all", "m_max", "s_min", "m_sdv_neg", 
                   "s_sdv_neg", "m_avg_neg", "s_avg_neg", "s_sdv_nzr", "m_sdv_all", "s_sdv_all", "s_avg_nzr", "m_avg_nzr", "s_avg_all", "Sc",
                   "m_avg_all", "m_sum_pos", "c_pos", "m_min", "s_max", "m_sdv_pos", "s_sdv_pos", "m_avg_pos", "s_avg_pos", "m_sdv_nzr"]
print(all_data_pd)   #ImpとImp_valは同じ

# 特徴量の部分のみall_Xに読み込み
all_X = all_data[:9813,3:33]
#ラベル部分をall_yに読み込み
all_y = all_data[:9813, 2]
print(all_X.shape)
print(all_y)
# print(type(all_X))

In [None]:
import matplotlib.pyplot as plt

#アップサンプリング前のクラス1のデータ数とクラス0のデータ数の出力
print("Number of class 1 samples before:", all_y[all_y == 1].shape[0])
print("Number of class 0 samples before:", all_y[all_y == 0].shape[0])
# print(all_X[all_y == 1])
# print(all_X[all_y == 0])

#クラスの1のサンプルの個数がクラス0と同じになるまで新しいサンプルを復元抽出
X_upsampled, y_upsampled = resample(all_X[all_y == 1],
                                    all_y[all_y == 1],
                                    replace = True,
                                    n_samples = all_X[all_y == 0].shape[0],
                                    random_state = 123)
# アップサンプリング後のクラス１のデータ数とデータの中身の出力
print("Number of class 1 samples after:", X_upsampled.shape[0])
# print(X_upsampled)
# 元のクラス０のサンプルにアップサンプリングしたクラス０のサブセットを結合
X_bal = np.vstack((all_X[all_y == 0], X_upsampled))  #新しい特徴量集合
y_bal = np.hstack((all_y[all_y == 0], y_upsampled))  #新しいラベル集合
print(X_bal.shape, y_bal.shape)

# print(X_bal)
X_bal = X_bal[:16794,0:30]
print(X_bal)
# 新しい特徴量集合とラベル集合をそれぞれ7:3の割合で訓練データとテストデータに分割
X_train, X_test, y_train, y_test = train_test_split(X_bal, y_bal, test_size=0.3, random_state=0, stratify=y_bal )

In [None]:
import numpy
import math
import random
from matplotlib import pyplot

class Neural:

    # constructor
    def __init__(self, n_input, n_hidden, n_output, random_state):
        numpy.random.seed(seed=random_state)
        self.hidden_weight = numpy.random.random_sample((n_hidden, n_input + 1)) * 2 - 1 #n_hidden × n_input+1 の乱数配列生成（-1から1）
        self.output_weight = numpy.random.random_sample((n_output, n_hidden + 1)) * 2 - 1 #n_output × n_hidden+1　の乱数配列生成（-1から1)
        self.hidden_momentum = numpy.zeros((n_hidden, n_input + 1))  * 2 - 1
        self.output_momentum = numpy.zeros((n_output, n_hidden + 1)) * 2 - 1

# public method
    def train(self, X, T, epsilon, mu, epoch):
        self.error = numpy.zeros(epoch)
        N = X.shape[0]
        for epo in range(epoch):
            for i in range(N):
                x = X[i, :]
                t = T[i]

                self.__update_weight(x, t, epsilon, mu)

            self.error[epo] = self.__calc_error(X, T)


    def predict(self, X):
        N = X.shape[0]
        C = numpy.zeros(N).astype('int')
        Y = numpy.zeros((N, X.shape[1]))
        for i in range(N):
            x = X[i, :]
            z, y = self.__forward(x)

            Y[i] = y      #予測ラベルの収納


        return Y


    def error_graph(self):
        pyplot.ylim(0.0, 2.0)
        pyplot.plot(numpy.arange(0, self.error.shape[0]), self.error)
        pyplot.show()


# private method
    def __sigmoid(self, arr):
        return numpy.vectorize(lambda x: 1.0 / (1.0 + math.exp(-x)))(arr)    #活性化関数はシグモイド関数


    def __forward(self, x):
        # z: output in hidden layer, y: output in output layer
        z = self.__sigmoid(self.hidden_weight.dot(numpy.r_[numpy.array([1]), x]))
        y = self.__sigmoid(self.output_weight.dot(numpy.r_[numpy.array([1]), z]))

        return (z, y)

    def __update_weight(self, x, t, epsilon, mu):
        z, y = self.__forward(x)

        # update output_weight
        output_delta = (y - t) * y * (1.0 - y)
        _output_weight = self.output_weight
        self.output_weight -= epsilon * output_delta.reshape((-1, 1)) * numpy.r_[numpy.array([1]), z] - mu * self.output_momentum
        self.output_momentum = self.output_weight - _output_weight

        # update hidden_weight
        hidden_delta = (self.output_weight[:, 1:].T.dot(output_delta)) * z * (1.0 - z)
        _hidden_weight = self.hidden_weight
        self.hidden_weight -= epsilon * hidden_delta.reshape((-1, 1)) * numpy.r_[numpy.array([1]), x]
        self.hidden_momentum = self.hidden_weight - _hidden_weight


    def __calc_error(self, X, T):
        N = X.shape[0]
        err = 0.0
        for i in range(N):
            x = X[i, :]
            t = T[i]

            z, y = self.__forward(x)
            err += (y - t).dot((y - t).reshape((-1, 1))) / 2.0

        return err

In [None]:
from sklearn.metrics import confusion_matrix

if __name__ == '__main__':

    X = X_train
    T = y_train
    N = X.shape[0] # number of data

    input_size = X.shape[1]
    hidden_size = 5
    output_size = 1
    epsilon = 0.1
    mu = 0
    epoch = 1
    seed = 2

    nn = Neural(input_size, hidden_size, output_size, seed)
    nn.train(X, T, epsilon, mu, epoch)
    nn.error_graph()

    Y = nn.predict(X)
    y_out = Y.mean(axis = 1)
    y_pre = []
    for i in y_out:
        if 0.5 <= i:
            y_pre.append(1.0)
        elif 0.5>i:
            y_pre.append(0.0)
    y_pre = numpy.array(y_pre)

    print(confusion_matrix(y_train, y_pre,labels=[1,0]))