
# CNN-KCL: Automatic Myocarditis Diagnosis using Convolutional Neural Network Combined with K-means Clustering
**Danial Sharifrazi et al.**
* This code is related to the mentioned paper. Please cite the paper.

In [1]:
def clusteringFunc(x_data,y_data,k):    
    import numpy as np
    from sklearn.cluster import KMeans


 
    print('Start Clustering.............!')
    normals=[]
    sicks=[]
    for i in range(len(y_data)):
        if y_data[i]==0:
            normals.append(x_data[i])
        else:
            sicks.append(x_data[i])

    normals=np.array(normals)
    sicks=np.array(sicks)


    model=KMeans(n_clusters=k)   
    y_n=model.fit_predict(normals)  
    y_s=model.fit_predict(sicks)
    

    y_s2=[]
    for item in y_s:
        y_s2.append(item+k)


    y_n=list(y_n)
    y_n.extend(y_s2)
    y=np.array(y_n)


    normals=list(normals)
    sicks=list(sicks)
    normals.extend(sicks)
    x=np.array(normals)

    return x,y

In [2]:
def NetPlot(net_histories,n_epch):
    import numpy as np
    import matplotlib.pyplot as plt
  
    losses=[]
    val_losses=[]
    accuracies=[]
    val_accuracies=[]

    for item in net_histories:
        
        history=item.history
        loss=history['loss']
        val_loss=history['val_loss']
        accuracy=history['acc']
        val_accuracy=history['val_acc']
        
        losses.append(loss)
        val_losses.append(val_loss)
        accuracies.append(accuracy)
        val_accuracies.append(val_accuracy)


    losses2=np.zeros((1,n_epch))
    val_losses2=np.zeros((1,n_epch))
    accuracies2=np.zeros((1,n_epch))
    val_accuracies2=np.zeros((1,n_epch))

    for i in losses:
        losses2+=i

    for i in val_losses:
        val_losses2+=i
    
    for i in accuracies:
        accuracies2+=i
    
    for i in val_accuracies:
        val_accuracies2+=i


    # 10 is number of folds
    losses2=(losses2/10).flatten()
    accuracies2=(accuracies2/10).flatten()
    val_losses2=(val_losses2/10).flatten()
    val_accuracies2=(val_accuracies2/10).flatten()

    plt.figure('Accracy Diagram',dpi=600)
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.plot(accuracies2,color='black')
    plt.plot(val_accuracies2,color='green')
    plt.legend(['Train Data','Validation Data'])
    plt.savefig('Accuracy Diagram.jpg')

    plt.figure('Loss Diagram',dpi=600)
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.plot(losses2,color='black')
    plt.plot(val_losses2,color='green')
    plt.legend(['Train Data','Validation Data'])
    plt.savefig('Loss Diagram.jpg')

In [3]:
def DeepCNN(x_data,y_data,k):

    import datetime
    from sklearn.metrics import ( auc, classification_report,
                                confusion_matrix, roc_curve)
    from sklearn.model_selection import KFold, train_test_split
    from keras.layers import Conv1D,Dense, Dropout, Flatten
    from keras.losses import binary_crossentropy
    from keras.models import Sequential
    from keras.optimizers import Adam
    from keras.utils import np_utils
    from keras.callbacks import CSVLogger



    print('Start Deep Learning............!')


    lst_loss=[]
    lst_acc=[]
    lst_reports=[]
    lst_AUC=[]
    lst_matrix=[]
    lst_times=[]
    lst_history=[]
    fold_number=1
    n_epch=30

    kfold=KFold(n_splits=10,shuffle=True,random_state=None)
    for train,test in kfold.split(x_data,y_data):

        x_train=x_data[train]
        x_test=x_data[test]
        y_train=y_data[train]
        y_test=y_data[test]

        x_train,y_train=clusteringFunc(x_train,y_train,k)
        x_test,y_test=clusteringFunc(x_test,y_test,k)


        x_train=x_train.reshape((x_train.shape[0],100,100))
        x_test=x_test.reshape((x_test.shape[0],100,100))

        x_train,x_valid,y_train,y_valid=train_test_split(x_train,y_train,test_size=0.2,random_state=None)


        print(f'train: {x_train.shape}  {y_train.shape}')
        print(f'test: {x_test.shape}  {y_test.shape}')
        print(f'valid: {x_test.shape}  {y_valid.shape}')


        calback=CSVLogger(f'logger_fold{fold_number}.log')

        y_train=np_utils.to_categorical(y_train)
        y_test=np_utils.to_categorical(y_test)
        y_valid=np_utils.to_categorical(y_valid)



        # CNN Architecture
        model=Sequential()
        model.add(Conv1D(32,3,padding='same',activation='relu',strides=2,input_shape=(100,100)))
        model.add(Conv1D(64,3,padding='same',activation='relu',strides=2))
        model.add(Conv1D(128,3,padding='same',activation='relu',strides=2))
        model.add(Conv1D(256,3,padding='same',activation='relu',strides=1))
        model.add(Conv1D(256,3,padding='same',activation='relu',strides=1))
        model.add(Conv1D(256,3,padding='same',activation='relu',strides=1))
        model.add(Flatten())
        model.add(Dense(256,activation='relu'))
        model.add(Dense(128,activation='relu'))
        model.add(Dense(64,activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(k*2,activation='sigmoid'))



        model.compile(optimizer=Adam(),loss=binary_crossentropy,metrics=['accuracy'])
            
        start=datetime.datetime.now()
        net_history=model.fit(x_train, y_train, batch_size=512, epochs=n_epch,validation_data=(x_valid,y_valid),callbacks=[calback])
        end=datetime.datetime.now()
        training_time=end-start

        model.save(f'CNN_fold{fold_number}.h5')

        test_loss, test_acc=model.evaluate(x_test,y_test)

        predicts=model.predict(x_test)
        predicts=predicts.argmax(axis=1)
        actuals=y_test.argmax(axis=1)

        fpr, tpr, thresholds = roc_curve(actuals, predicts, pos_label=1)
        a=auc(fpr,tpr)
        r=classification_report(actuals,predicts)
        c=confusion_matrix(actuals,predicts)



        lst_history.append(net_history)
        lst_times.append(training_time)
        lst_acc.append(test_acc)
        lst_loss.append(test_loss)
        lst_AUC.append(a)
        lst_reports.append(r)
        lst_matrix.append(c)

        fold_number+=1

        
    # plot loss and accuracy diagrams
    NetPlot(lst_history,n_epch)

    path=f'CNN_Kmeans_Results.txt' 
    f1=open(path,'a')
    f1.write('\nAccuracies: '+str(lst_acc)+'\nLosses: '+str(lst_loss))
    f1.write('\n\nMetrics for all Folds: \n\n')
    for i in range(len(lst_reports)):
        f1.write(str(lst_reports[i]))
        f1.write('\n\nTraining Time: '+str(lst_times[i])+'\nAUC: '+str(lst_AUC[i]))
        f1.write('\n\nCofusion Matrix: \n'+str(lst_matrix[i])+'\n\n__________________________________________________________\n')
    f1.close()

In [1]:
actuals

NameError: name 'actuals' is not defined

In [4]:
def Read_Data():    
    import numpy as np
    import cv2
    from skimage.io import imread
    import glob
    import os


    normals=[]
    #main_path='/kaggle/input/myocarditis-dataset/Normal/'
    main_path='E:/Datasets/dataset_myocarditis/Normal/'
    main_folders=next(os.walk(main_path))[1]
    for i in main_folders:
        path=main_path+i+'/'
        folders=next(os.walk(path))[1]
        for x in folders:
            new_path=path+x+'/'
            data=glob.glob(new_path+'*.jpg')
            if len(data)<1:
                indent_folders=next(os.walk(new_path))[1]
                for y in indent_folders:
                    new_path=new_path+y+'/'
                    data=glob.glob(new_path+'*.jpg')
            normals.extend(data)




    #read sicks files
    sicks=[]
    #main_path='/kaggle/input/myocarditis-dataset/Sick/'
    main_path='E:/Datasets/dataset_myocarditis/Sick/'
    main_folders=next(os.walk(main_path))[1]
    for i in main_folders:
        path=main_path+i+'/'
        folders=next(os.walk(path))[1]
        for x in folders:
            new_path=path+x+'/'
            data=glob.glob(new_path+'*.jpg')
            if len(data)<1:
                indent_folders=next(os.walk(new_path))[1]
                for y in indent_folders:
                    new_path=new_path+y+'/'
                    data=glob.glob(new_path+'*.jpg')
            sicks.extend(data)
    
    #load normal files
    labels_n=[]
    train_data_n=[]
    for id in normals:    
        img=imread(id)
        img=cv2.resize(img,(100,100))
        # img=img.astype('float32')
        img=img.flatten()
        train_data_n.append(img)
        labels_n.append(0)



    #load sick files
    labels_s=[]
    train_data_s=[]
    for id in sicks:    
        img=imread(id)
        img=cv2.resize(img,(100,100))
        # img=img.astype('float32')
        img=img.flatten()
        train_data_s.append(img)
        labels_s.append(1)

    train_data_n.extend(train_data_s)
    labels_n.extend(labels_s)

    x_data=np.array(train_data_n)
    y_data=np.array(labels_n)

    
    # calling deep learning method
    k=2
    DeepCNN(x_data,y_data,k)

In [5]:
Read_Data()

Start Deep Learning............!
Start Clustering.............!
Start Clustering.............!
train: (70907, 100, 100)  (70907,)
test: (9849, 100, 100)  (9849,)
valid: (9849, 100, 100)  (17727,)
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


ValueError: multiclass format is not supported

import tensorflow as tf
tf.version.VERSION