In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import matplotlib.pyplot as plt
from itertools import cycle
from sklearn import svm, datasets
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier
import math
import random
import datetime,time

# BP神经网络 乳腺癌二分类问题

# Ⅰ 
* 隐藏层与输出层激活函数均为Sigmoid
* 原始数据集
* 有dropout

In [None]:
data = pd.read_csv("/kaggle/input/breast-cancer-wisconsin-data/data.csv")

## 数据预处理

### 检查缺失数据

In [None]:
data.isnull().sum()

In [None]:
data.shape

In [None]:
samples = 569

In [None]:
data.head()

In [None]:
data["diagnosis"].value_counts().plot.bar()
plt.show()

In [None]:
data = data.iloc[:,1:32]
data

### 标签数值化

In [None]:
data.loc[data['diagnosis'] == 'M','diagnosis_value'] = 0
data.loc[data['diagnosis'] == 'B','diagnosis_value'] = 1
data.drop('diagnosis', axis=1, inplace=True)
data

### 每一维特征数据归一化

In [None]:
newDataFrame = pd.DataFrame(index=data.index)
columns = data.columns.tolist()
for c in columns:
    d = data[c]
    MAX = d.max()
    MIN = d.min()
    newDataFrame[c] = ((d - MIN) / (MAX - MIN)).tolist()
data = newDataFrame
data

### 测试集和训练集分割

In [None]:
test = data.sample(frac = 0.3)
train = data[~data.index.isin(test.index)]
test = test.reset_index(drop=True)
train = train.reset_index(drop=True)

In [None]:
test

In [None]:
train

In [None]:
test["diagnosis_value"].value_counts().plot.bar()
plt.show()

In [None]:
train["diagnosis_value"].value_counts().plot.bar()
plt.show()

## BP神经网络训练与预测

In [None]:
input_nodes =30        #输入层
hidden_nodes =10       #隐藏层
n_classes = output_nodes =1        #输出层
learning_rate =0.1             #初始学习率
epoch = 800             #迭代学习代数
train_samples = 398   #训练样本数
test_samples = 171     #测试样本数
dropout = 2          #每次训练失效的神经元个数
epsilon = 0.5
bias = 1

### 特征与标签分割

In [None]:
trainY = train["diagnosis_value"]
trainY = trainY.values
trainY[0]

In [None]:
trainX = train.drop('diagnosis_value',axis=1)
trainX = trainX.values
trainX

### 激活函数配置

In [None]:
def hidden_activate_function(x):
    return 1 / (1 + np.exp(-x))

In [None]:
def op_activate_function(x):
    return 1 / (1 + np.exp(-x))

In [None]:
weights1 = np.random.normal(0.0,pow(hidden_nodes,-0.5),(hidden_nodes,input_nodes))    
weights2 = np.random.normal(0.0,pow(output_nodes,-0.5),(output_nodes,hidden_nodes))   

### 训练

In [None]:
starttime = datetime.datetime.now()
for e in range(epoch):
    
    for i in range(train_samples):
        
        
        
        inputs = trainX[i].reshape(trainX[i].shape[0],1)     
        targets = trainY[i]  

        #正向传播
        hidden_ip = np.dot(weights1,inputs)                  
        hidden_op = hidden_activate_function(hidden_ip)            

        final_ip = np.dot(weights2,hidden_op)                
        op = op_activate_function(final_ip)                     
        hidden_op[hidden_nodes - 1] = bias
        
        #反向传播
        output_errors = (targets - op)                                            
        
        delta_op = output_errors * op * (1 - op)
        
        hidden_errors = np.dot(weights2.T,delta_op)
        
        delta_hidden = np.zeros(shape=(hidden_nodes,1))
        delta_hidden = hidden_op * (1 - hidden_op) * hidden_errors      
        
        weights2 = weights2 + learning_rate * np.dot(delta_op,hidden_op.T)
        weights1 = weights1 + learning_rate * np.dot(delta_hidden,inputs.T)
        
endtime = datetime.datetime.now()
runtime = endtime - starttime
print(runtime)

In [None]:
testX = test.drop('diagnosis_value',axis=1)
testX = testX.values
testX

In [None]:
testY = test["diagnosis_value"].values

testY

### 预测

In [None]:
pred_op = np.zeros(shape=(test_samples,output_nodes))
for i in range(test_samples):
    inputs = testX[i].reshape(testX[i].shape[0],1) 
    
    hidden_ip = np.dot(weights1,inputs)                  
    hidden_op = hidden_activate_function(hidden_ip)             

    final_ip = np.dot(weights2,hidden_op)                
    op = op_activate_function(final_ip)                     

    pred_op[i] = op.T

In [None]:
score = pred_op
score

## 结果评估 

### ROC曲线

In [None]:
fpr,tpr,threshold = roc_curve(testY, score) ###计算真正率和假正率
roc_auc = auc(fpr,tpr) ###计算auc的值
 
plt.figure()
lw = 2
plt.figure(figsize=(10,10))
plt.plot(fpr, tpr, color='darkorange',
         lw=lw, label='ROC curve (area = %0.2f)' % roc_auc) ###假正率为横坐标，真正率为纵坐标做曲线
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic example')
plt.legend(loc="lower right")
plt.show()

### 准确率

In [None]:
threshold = 0.5
prednum = np.ones(shape = pred_op.shape)
for i in range(test_samples):
    if pred_op[i] >= threshold:
        prednum[i] = 1
    else:
        prednum[i] = 0

s = 0

for i in range(test_samples):
    if testY[i]==prednum[i]:
        s = s + 1
accuracy = s / test_samples
accuracy

### 精确率 特异度 召回率

In [None]:
TP = FP = TN = FN = 0
for i in range(test_samples):
    if testY[i] == 1 and prednum[i] == 1:
        TP = TP + 1
    if testY[i] == 0 and prednum[i] == 1:
        FP = FP + 1
    if testY[i] == 1 and prednum[i] == 0:
        FN = FN + 1
    if testY[i] == 0 and prednum[i] == 0:
        TN = TN + 1
precision = (TP / (TP + FP))
specificity = (TN / (TN + FP))
recall = (TP / (TP + FN))

assess = [precision,specificity,recall]
assess = pd.DataFrame(assess)
assess['Assess'] = ['precision','specificity','recall']
assess

### F1-score

In [None]:
F1 = 2 * assess.iat[0,0] * assess.iat[2,0] / (assess.iat[0,0] + assess.iat[2,0])
F1

### 混淆矩阵

In [None]:
confusion = np.array(([TP,FN],[FP,TN]))
plt.imshow(confusion, cmap=plt.cm.Blues)
indices = range(len(confusion))
plt.xticks(indices, ['1', '0'])
plt.yticks(indices, ['1', '0'])

plt.colorbar()

plt.xlabel('Predict')
plt.ylabel('True')
plt.title('Confusion Matrix')

for first_index in range(len(confusion)):    
    for second_index in range(len(confusion[first_index])):    
        plt.text(first_index, second_index, confusion[first_index][second_index])

plt.show()

# Ⅱ PCA降维
* 将原始数据集的特征用PCA的方法进行降维
* BP神经网络配置除了输入神经元个数改变之外，其余设置与 Ⅰ 相同

## PCA降维过程

In [None]:
X = data.drop('diagnosis_value',axis=1)
X = X.values
X

### 协方差矩阵与协方差矩阵的特征值与特征向量

In [None]:
Xmean = np.mean(X,axis = 0)
Xnomean = trainX - np.tile(Xmean,(train_samples,1))     #零均值特征矩阵
cov = (1 / train_samples) * np.dot(Xnomean.T,Xnomean)   #协方差矩阵
d,v=np.linalg.eig(cov)                                  #协方差矩阵特征值和特征向量
d

### 特征向量与特征值按特征值大小排序

In [None]:
D = sorted(d,reverse = True)
indx = np.argsort(-d)
print(D,indx)

In [None]:
rankV = np.zeros(shape = v.shape)
rankV = v[:,indx]
rankV

### 挑选特征值与特征向量

In [None]:
energy=0.99
t = 0
tt = sum(D)
for n in range(30): #选出累积能量占%99特征值
    t = t + D[n]
    ratio = t/tt
    if(ratio>=energy):
        break
P1 = rankV[:,0:n]
print(n,P1)

In [None]:
P1.shape

### 单位化后获得需要的新基

In [None]:
P1_T = P1.T
P_T = np.ones(shape = P1_T.shape)
for i in range(len(P1_T)):
    P_T[i] = P1_T[i] / math.sqrt(np.dot(P1_T[i],P1_T[i]))
P = P_T.T
P.shape

### 将原特征投影到新基上得到新特征

In [None]:
newX = np.dot(X,P)
newX

In [None]:
newX.shape

In [None]:
newdfX = pd.DataFrame(newX)
newdf = pd.concat([newdfX,data["diagnosis_value"]],axis=1)
newdata = newdf
newdata

## 数据预处理

In [None]:
newDataFrame = pd.DataFrame(index=newdata.index)
columns = newdata.columns.tolist()
for c in columns:
    d = newdata[c]
    MAX = d.max()
    MIN = d.min()
    newDataFrame[c] = ((d - MIN) / (MAX - MIN)).tolist()
newdata = newDataFrame
newdata

In [None]:
newtest = newdata.sample(frac = 0.3)
newtrain = newdata[~newdata.index.isin(newtest.index)]
newtest = newtest.reset_index(drop=True)
newtrain = newtrain.reset_index(drop=True)

In [None]:
trainY = newtrain["diagnosis_value"]
trainY = trainY.values

trainX = newtrain.drop('diagnosis_value',axis=1)
trainX = trainX.values            

## BP神经网络训练与预测

In [None]:
input_nodes =15        #输入层
hidden_nodes =9       #隐藏层
n_classes = output_nodes =1        #输出层
learning_rate =0.1             #初始学习率
epoch = 800             #迭代学习代数
train_samples = 398   #训练样本数
test_samples = 171     #测试样本数
dropout = 2          #每次训练失效的神经元个数
epsilon = 0.5
bias = 1

In [None]:
weights1 = np.random.normal(0.0,pow(hidden_nodes,-0.5),(hidden_nodes,input_nodes))    
weights2 = np.random.normal(0.0,pow(output_nodes,-0.5),(output_nodes,hidden_nodes))   

In [None]:
#训练
starttime = datetime.datetime.now()
for e in range(epoch):
    
    for i in range(train_samples):
        
        
        
        inputs = trainX[i].reshape(trainX[i].shape[0],1)     
        targets = trainY[i] 

        #正向传播
        hidden_ip = np.dot(weights1,inputs)                  
        hidden_op = hidden_activate_function(hidden_ip)            

        final_ip = np.dot(weights2,hidden_op)                
        op = op_activate_function(final_ip)                     
        hidden_op[hidden_nodes - 1] = bias
        
        #反向传播
        output_errors = (targets - op)                                            
        
        delta_op = output_errors * op * (1 - op)
        
        hidden_errors = np.dot(weights2.T,delta_op)
        
        delta_hidden = np.zeros(shape=(hidden_nodes,1))
        delta_hidden = hidden_op * (1 - hidden_op) * hidden_errors      
        
        weights2 = weights2 + learning_rate * np.dot(delta_op,hidden_op.T)
        weights1 = weights1 + learning_rate * np.dot(delta_hidden,inputs.T)
        
endtime = datetime.datetime.now()
runtime1 = endtime - starttime
print(runtime1)

In [None]:
testX = newtest.drop('diagnosis_value',axis=1)
testX = testX.values
testY = newtest['diagnosis_value'].values

In [None]:
pred_op = np.zeros(shape=(test_samples,output_nodes))
for i in range(test_samples):
    inputs = testX[i].reshape(testX[i].shape[0],1) 
    
    hidden_ip = np.dot(weights1,inputs)                  #(300,1)
    hidden_op = hidden_activate_function(hidden_ip)             #(300,1)

    final_ip = np.dot(weights2,hidden_op)                #(10,1)
    op = op_activate_function(final_ip)                     #(10,1)

    pred_op[i] = op.T

In [None]:
score = pred_op
score

## 结果评估

In [None]:
fpr1,tpr1,threshold1 = roc_curve(testY, score) ###计算真正率和假正率
roc_auc1 = auc(fpr1,tpr1) ###计算auc的值
 
plt.figure()
lw = 2
plt.figure(figsize=(10,10))
plt.plot(fpr1, tpr1, color='darkorange',
         lw=lw, label='ROC curve (area = %0.2f)' % roc_auc1) ###假正率为横坐标，真正率为纵坐标做曲线
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic example')
plt.legend(loc="lower right")
plt.show()

In [None]:
threshold = 0.5
prednum = np.ones(shape = pred_op.shape)
for i in range(test_samples):
    if pred_op[i] >= threshold:
        prednum[i] = 1
    else:
        prednum[i] = 0

s = 0

for i in range(test_samples):
    if testY[i]==prednum[i]:
        s = s + 1
accuracy1 = s / test_samples
accuracy1

In [None]:
TP = FP = TN = FN = 0
for i in range(test_samples):
    if testY[i] == 1 and prednum[i] == 1:
        TP = TP + 1
    if testY[i] == 0 and prednum[i] == 1:
        FP = FP + 1
    if testY[i] == 1 and prednum[i] == 0:
        FN = FN + 1
    if testY[i] == 0 and prednum[i] == 0:
        TN = TN + 1
precision = (TP / (TP + FP))
specificity = (TN / (TN + FP))
recall = (TP / (TP + FN))

assess = [precision,specificity,recall]
assess = pd.DataFrame(assess)
assess['Assess'] = ['precision','specificity','recall']
assess

In [None]:
F11 = 2 * assess.iat[0,0] * assess.iat[2,0] / (assess.iat[0,0] + assess.iat[2,0])
F11

In [None]:
confusion = np.array(([TP,FN],[FP,TN]))
plt.imshow(confusion, cmap=plt.cm.Blues)
indices = range(len(confusion))
plt.xticks(indices, ['1', '0'])
plt.yticks(indices, ['1', '0'])

plt.colorbar()

plt.xlabel('Predict')
plt.ylabel('True')
plt.title('Confusion Matrix')

for first_index in range(len(confusion)):    
    for second_index in range(len(confusion[first_index])):    
        plt.text(first_index, second_index, confusion[first_index][second_index])

plt.show()

# I 和 Ⅱ 对比

In [None]:
plt.figure()
lw = 2
plt.figure(figsize=(10,10))
plt.plot(fpr1, tpr1, color='darkorange',
         lw=lw, label='ROC curve1 (area = %0.2f)' % roc_auc1) ###假正率为横坐标，真正率为纵坐标做曲线
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')

plt.plot(fpr, tpr, color='green',
         lw=lw, label='ROC curve (area = %0.2f)' % roc_auc) ###假正率为横坐标，真正率为纵坐标做曲线
plt.plot([0, 1], [0, 1], color='red', lw=lw, linestyle='--')

plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic example')
plt.legend(loc="lower right")
plt.show()

In [None]:
ca = pd.DataFrame(columns = ['runtime','F1']) 
ca["runtime"] = [runtime,runtime1]
ca["F1"] = [F1,F11]
ca

# Ⅲ 自适应梯度更新

AdaGrad方法（Adaptive gradient 自适应梯度）

![](https://private.codecogs.com/gif.latex?%5Cdpi%7B120%7D%20%5Cfn_jvn%20%5CDelta%20w_t%20%3D%20-%5Cfrac%7B%5Ceta%20%7D%7B%5Csqrt%7B%5Csum_%7Bk%3D1%7D%5E%7Bt%7Dg_k%5E2%20&plus;%20%5Cvarepsilon%20%7D%7Dg_t)

In [None]:
input_nodes =15        #输入层
hidden_nodes =10       #隐藏层
n_classes = output_nodes =1        #输出层
learning_rate =0.1             #初始学习率
epoch = 800             #迭代学习代数
train_samples = 398   #训练样本数
test_samples = 171     #测试样本数
dropout = 2          #每次训练失效的神经元个数
epsilon = 0.5
bias = 1

In [None]:
weights1 = np.random.normal(0.0,pow(hidden_nodes,-0.5),(hidden_nodes,input_nodes))    
weights2 = np.random.normal(0.0,pow(output_nodes,-0.5),(output_nodes,hidden_nodes))   

## 以 Ⅱ 中的准确率为结束训练迭代的条件，评估采用自适应梯度更新的模型性能

In [None]:

lr = 1
starttime = datetime.datetime.now()
gho_sum = gih_sum = 0
lrho = np.ones(shape = weights2.shape)
lrih = np.ones(shape = weights1.shape)
lrho_sum = np.zeros(shape = weights2.shape)
lrih_sum = np.zeros(shape = weights1.shape)
prednum = np.ones(shape = pred_op.shape)
pred_op = np.zeros(shape=(test_samples,output_nodes))
accuracy2 = 0

while accuracy2 <= accuracy1:
    
    for i in range(train_samples):
        
        dropout_list=random.sample(range(0,hidden_nodes),dropout)
        
        inputs = trainX[i].reshape(trainX[i].shape[0],1)    
        targets = trainY[i]

        #正向传播
        hidden_ip = np.dot(weights1,inputs)                   
        hidden_op = hidden_activate_function(hidden_ip)              
        
        for j in dropout_list:
            hidden_ip[j][0] = 0
            hidden_op[j][0] = 0
        
        final_ip = np.dot(weights2,hidden_op)                 
        op = op_activate_function(final_ip)                      
        
       
        
        #反向传播
        output_errors = (targets - op)                                            
        hidden_errors = np.dot(weights2.T,output_errors * op * (1 - op))          
        
        delta_op = output_errors * op * (1 - op)
        
        delta_hidden = np.zeros(shape=(hidden_nodes,1))
        delta_hidden = hidden_op * (1 - hidden_op) *np.dot(weights2.T,delta_op)     

        gho = np.dot(delta_op,hidden_op.T)
        gih = np.dot(delta_hidden,inputs.T)
        
        gho_sum = gho_sum + gho * gho
        gih_sum = gih_sum + gih * gih
        
        weights2 = weights2 + lrho * gho / np.sqrt(gho_sum + epsilon)
        weights1 = weights1 + lrih * gih / np.sqrt(gih_sum + epsilon)
        
    
    for n in range(test_samples):
        inputs = testX[n].reshape(testX[n].shape[0],1) 
    
        hidden_ip = np.dot(weights1,inputs)                  
        hidden_op = hidden_activate_function(hidden_ip)             

        final_ip = np.dot(weights2,hidden_op)                
        op = op_activate_function(final_ip)                     

        pred_op[n] = op.T

    
    threshold = 0.5
    prednum = np.ones(shape = pred_op.shape)
    for i in range(test_samples):
        if pred_op[i] >= threshold:
            prednum[i] = 1
        else:
            prednum[i] = 0

    s = 0

    for i in range(test_samples):
        if testY[i]==prednum[i]:
            s = s + 1
    accuracy2 = s / test_samples

    

endtime = datetime.datetime.now()
runtime2 = endtime - starttime
print(runtime2,accuracy2)

In [None]:
score = pred_op
score

In [None]:
fpr2,tpr2,threshold2 = roc_curve(testY, score) ###计算真正率和假正率
roc_auc2 = auc(fpr2,tpr2) ###计算auc的值
 
plt.figure()
lw = 2
plt.figure(figsize=(10,10))
plt.plot(fpr2, tpr2, color='blue',
         lw=lw, label='ROC curve AG (area = %0.2f)' % roc_auc2) ###假正率为横坐标，真正率为纵坐标做曲线
plt.plot(fpr1, tpr1, color='green',
         lw=lw, label='ROC curve1 (area = %0.2f)' % roc_auc1) ###假正率为横坐标，真正率为纵坐标做曲线
plt.plot([0, 1], [0, 1], color='red', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic example')
plt.legend(loc="lower right")
plt.show()

In [None]:
TP = FP = TN = FN = 0
for i in range(test_samples):
    if testY[i] == 1 and prednum[i] == 1:
        TP = TP + 1
    if testY[i] == 0 and prednum[i] == 1:
        FP = FP + 1
    if testY[i] == 1 and prednum[i] == 0:
        FN = FN + 1
    if testY[i] == 0 and prednum[i] == 0:
        TN = TN + 1
precision = (TP / (TP + FP))
specificity = (TN / (TN + FP))
recall = (TP / (TP + FN))

assess = [precision,specificity,recall]
assess = pd.DataFrame(assess)
assess['Assess'] = ['precision','specificity','recall']
assess

In [None]:
F12 = 2 * assess.iat[0,0] * assess.iat[2,0] / (assess.iat[0,0] + assess.iat[2,0])
F12

# Ⅰ Ⅱ Ⅲ 对比

In [None]:
ca = pd.DataFrame(columns = ['runtime','F1']) 
ca["runtime"] = [runtime,runtime1,runtime2]
ca["F1"] = [F1,F11,F12]
ca