In [3]:
import scipy.io as scio
import os
import numpy as np
import sklearn.metrics as metrics
from sklearn import svm
from keras import layers
from keras import models
import pandas as pd
from sklearn.metrics import f1_score,recall_score,precision_score,auc,accuracy_score,confusion_matrix
from sklearn.metrics import precision_recall_curve, average_precision_score,roc_auc_score,roc_curve
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
from itertools import cycle
from scipy import interp
plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签
import seaborn as sns

In [4]:
path='./CWRU/'
#寻找文件夹下所有的数据文件
path_list=os.listdir(path)
data_x=[]
data_y=[]
for i in path_list:
    path1=path+i
    dd=scio.loadmat(path1)
    for j in dd.keys():
        #只使用DE的数据
        if 'DE' in j:
            ddd=dd[j]
            #截前12W个数据
            data_x.append(ddd[:120000].ravel())
            data_y.append(i)
            # print(len(data_x))
            # print(len(data_y))
# data_x=np.array(data_x)

In [5]:
#切分成多个样本数据
def yuchuli(a,label):
    """
    前200个样本用于训练
    后100个样本用于测试
    """
    a  = a.reshape(300,400)
    np.random.shuffle(a)
    train = a[:200,:]
    test = a[200:,:]
    label_test =[label for i in range(0,100)]
    label_train = [label for i in range(0, 200)]
    return train,test,label_train ,label_test



#绘制P-R曲线
def micro_PR(y_test,y_score):
    # 对每一个类别计算性能指标
    precision = dict()
    recall = dict()
    average_precision = dict()
    # .shape会返回一个元组，存储行和列，取第二个数，也就是列
    n_classes = y_score.shape[1]
    for i in range(n_classes):
        precision[i], recall[i], _ =  precision_recall_curve(y_test[:, i], y_score[:, i])
        average_precision[i] = average_precision_score(y_test[:, i], y_score[:, i])
    precision["micro"],  recall["micro"], _ = precision_recall_curve(y_test.ravel(), y_score.ravel())
    average_precision["micro"] = average_precision_score(y_test, y_score, average="micro")
    return precision, recall, average_precision

def plot_a_model_PR_curve(precision,recall,average_precision,title=None,path=None):
    plt.figure(figsize=(6,6))
    for i in precision.keys():
        label = str(i)+"，AP = {0:0.4f}".format(average_precision[i])
        plt.step(recall[i], precision[i], where='post', lw=2,label=label)
    plt.legend()
    plt.xlabel("Recall", fontdict={'weight': 'normal', 'size': 15})
    plt.ylabel("Precision", fontdict={'weight': 'normal', 'size': 15})
    if title != None:
        plt.title(title)
        
    if path!= None:
        plt.savefig(path,dpi=700)
    plt.show()

    
#将y转变为onehot向量
def to_one_hot(labels,label_dict):
    dimension=len(label_dict.keys())
    results = np.zeros((len(labels),dimension))
    for i,label in enumerate(labels):
        results[i,label_dict[label]] = 1
    return results

    
#创建CNN模型
def creat_model(input_shape,output,train_or_test='train'):
    """
    train_or_test：用于识别该模型的任务是什么，如果是train则为训练，需要所有的结构；如果为test，则只需要最后隐藏层的输出
    """
    model = models.Sequential()
    model.add(layers.Conv2D(60,(5,5),padding='same',activation='relu',input_shape=input_shape,name='conv1'))
    model.add(layers.MaxPooling2D((3,3),strides=2,name='max'))
    model.add(layers.Conv2D(128,(5,5),padding='same',activation='relu'))
    model.add(layers.MaxPooling2D((3,3),strides=2))
    model.add(layers.Conv2D(128,(3,3),padding='same',activation='relu'))
    model.add(layers.Conv2D(128,(3,3),padding='same',activation='relu'))
    model.add(layers.Conv2D(128,(3,3),padding='same',activation='relu'))
    model.add(layers.MaxPooling2D((3,3),strides=2))
    model.add(layers.Flatten(name='flatten'))
    model.add(layers.Dense(64,activation='relu',name='dense1'))
    if train_or_test=='train':
        model.add((layers.Dense(output,activation='softmax')))
        model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])    
    return model

def get_result_picture(all_result,title=None,path=None):
    x = np.arange(all_result.shape[1])# 生成横坐标范围
    columns=all_result.columns# 生成横坐标
    model_name_list=list(all_result.index)
    plt.figure(figsize=(10,8))
    for i in range(len(model_name_list)):
        plt.bar(x+0.15*i, all_result.iloc[i], width=0.15, label=model_name_list[i])
    plt.xticks(x+0.3*(i//2),columns)# 横坐标绑定
    plt.ylim(np.min(np.min(all_result))*0.99,1)
    plt.legend(loc="best")
    plt.xlabel('Evaluation',fontdict={'weight': 'normal', 'size': 15})
    plt.ylabel('Value',fontdict={'weight': 'normal', 'size': 15})
    plt.ylim(collect_every_model_result.values.min()*0.99,collect_every_model_result.values.max()*1.01)
    if title != None:
        plt.title(title)
    if path!=None:
        plt.savefig(path,dpi=700)
    plt.show()

In [6]:
#将多条数据的所有样本合并起来
x_train,x_test,y_train,y_test=[],[],[],[]
for i in range(len(data_x)):
    train,test,label_train ,label_test=yuchuli(data_x[i],data_y[i])
    x_train+=list(train)
    x_test+=list(test)
    y_train+=label_train
    y_test+=label_test
    
#转换数据格式
x_train=np.array(x_train)
x_test=np.array(x_test)

#将x转变为二维矩阵，方便CNN训练
x_train = x_train.reshape(-1,20,20,1)
x_test = x_test.reshape(-1,20,20,1)

state = np.random.get_state()
np.random.shuffle(x_train)
np.random.set_state(state)
np.random.shuffle(y_train)

#转换y的数据格式
y_train=np.array(y_train)
y_test=np.array(y_test)

#生成label的字典，用于将y转换为onehot格式
label_dict={}
label_dict_anti={}
for i,j in enumerate(list(set(list(y_train)+list(y_test)))):
    label_dict[j]=i
    label_dict_anti[i]=j
    

one_hot_train_labels = to_one_hot(y_train,label_dict)
one_hot_test_labels = to_one_hot(y_test,label_dict)

y_test_transform=np.argmax(one_hot_test_labels,axis=1)
y_train_transform=np.argmax(one_hot_train_labels,axis=1)

input_shape=x_train.shape[1:]
output=one_hot_train_labels.shape[1]

In [7]:
y_test_transform

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,

In [8]:
#创建回收器，保存必要的结果
collect_every_model_result=[]
collect_every_model_PR=[]

In [11]:
#创建新的模型，结构与原来的结构一致
model2 = creat_model(input_shape,output,train_or_test='test')


In [12]:
#加载原来保存的权重参数

x_temp1=model2.predict(x_train)
# x_temp2=model2.predict(x_test)

In [13]:
#加载原来保存的权重参数

# x_temp1=model2.predict(x_train)
x_temp2=model2.predict(x_test)

In [14]:
x_temp1.shape,x_temp2.shape

((2000, 64), (1000, 64))

In [13]:
#创建CNN+svm模型，此处核函数为线性核函数
svm_line = svm.SVC(C=0.9, kernel='linear',probability=True)  # linear kernel
svm_line.fit(x_temp1, y_train_transform)
svm_line_pred_test=svm_line.predict_proba(x_temp2)
svm_line_pred_test_1=np.argmax(svm_line_pred_test,axis=1)

# acc,pre,rec,f1=get_result(y_test_transform,svm_line_pred_test_1)
# print('CNN\'s result \n accuracy= {} \t precision= {} \t recall= {} \t F1= {}'.format(round(acc,5),round(pre,5),round(rec,5),round(f1,5)))

# #模型分类结果的混淆矩阵   
# plt_heatmap(y_test_transform,svm_line_pred_test_1,title=None,path=None)

# #模型的P-R曲线    
# precision, recall, average_precision = micro_PR(one_hot_test_labels,svm_line_pred_test)
# plot_a_model_PR_curve(precision,recall,average_precision,title=None,path=None)

# #画模型的ROC曲线
# roc_auc=get_ROC_AUC(one_hot_test_labels,svm_line_pred_test,list(label_dict.values()),title=None,path=None)

# #回收结果
# collect_every_model_result.append([acc,pre,rec,f1,roc_auc,'CNN+linear_svm'])
# collect_every_model_PR.append([precision, recall, average_precision,'CNN+linear_svm'])

In [15]:
svm_line_pred_test_1.shape

(1000,)

In [16]:
y_test.shape

(1000,)

In [20]:
y_test_transform.shape

(1000,)

In [21]:
import tensorflow as tf

In [27]:
svm_line_pred_test_1

array([2, 2, 4, 4, 2, 2, 4, 2, 2, 2, 2, 2, 2, 4, 4, 4, 2, 2, 2, 2, 2, 2,
       4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 2, 2, 2, 2, 4, 2, 4, 2, 4,
       2, 2, 4, 4, 2, 4, 2, 2, 4, 2, 2, 4, 4, 4, 2, 4, 4, 4, 4, 2, 2, 4,
       2, 4, 2, 2, 2, 4, 2, 2, 4, 4, 2, 2, 2, 4, 4, 4, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 4, 2, 2, 2, 2, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
       9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
       9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
       9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
       9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
       9, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
       7, 7, 7, 7, 7, 7, 5, 7, 7, 7, 7, 7, 7, 5, 5, 7, 7, 7, 7, 5, 7, 7,
       7, 7, 7, 7, 7, 7, 5, 7, 7, 5, 5, 7, 7, 7, 7, 7, 7, 7, 5, 7, 7, 7,
       7, 7, 7, 7, 7, 5, 3, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
       7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1,

In [28]:
y_test_transform

array([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
       9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
       9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
       9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
       9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
       9, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
       7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
       7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
       7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
       7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1,

In [30]:
correct_prediction = tf.equal(svm_line_pred_test_1, y_test_transform)
acc = tf.reduce_mean(tf.cast(correct_prediction, "float"))
acc=float(acc)
# f1=metrics.f1_score(svm_line_pred_test_1, y_test_transform)

In [31]:
acc

0.5419999957084656