# Comparative methods
In order to prove the effectiveness of the DCGN method, the following approaches are chosen as comparison methods

Firstly,load the data and then ensure that the shape of the data is same as the shape of the tag. Then feature normalization is performed.

## Random Forest (RF): a classifier that contains multiple decision trees and whose outputs are determined by the plurality of the classes output by individual trees.

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score, precision_score, recall_score

from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import hamming_loss
import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import KFold
epoch = 10
for x in range(epoch):
    Xtrain, Xval, Ytrain, Yval = train_test_split(da1,tag,test_size=0.2)
        
    rfc = RandomForestClassifier(random_state=0)
    rfc = rfc.fit(Xtrain,Ytrain)
    pre_rfc = rfc.predict(Xval)
    acc_rfc = accuracy_score(pre_rfc,Yval)
    f1_rfc = f1_score(Yval, pre_rfc, average='weighted' )
    p_rfc = precision_score(Yval,pre_rfc, average='weighted')
    r_rfc = recall_score(Yval,pre_rfc, average='weighted')
    kappa = cohen_kappa_score(Yval,pre_rfc)
    ham_distance = hamming_loss(Yval,pre_rfc)

## Support Vector Machine (SVM): the basic model is a linear classifier with the largest interval defined on the feature space.

In [None]:
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score, precision_score, recall_score,confusion_matrix
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import hamming_loss

random_state = np.random.RandomState(0)
data = np.array(data)
tag = np.array(tag)
epoch = 10
for t in range(epoch):
    Xtrain, Xval, Ytrain, Yval = train_test_split(data,tag,test_size=0.2)
    model = OneVsRestClassifier(svm.SVC(kernel='linear',probability=True,random_state=random_state))
    clt = model.fit(Xtrain,Ytrain)
    y_test_pred = clt.predict(Xval)
    acc = accuracy_score(y_test_pred,Yval)
    f1 = f1_score(Yval, y_test_pred, average='weighted' )
    p = precision_score(Yval,y_test_pred, average='weighted')
    r = recall_score(Yval,y_test_pred, average='weighted')
    cm = confusion_matrix(Yval,y_test_pred)
    kappa = cohen_kappa_score(Yval,y_test_pred)
    ham_distance = hamming_loss(Yval,y_test_pred)

##  GBDT (Gradient Boost Decision Tree):GBDT means the decision tree  trained with the strategy of Gradient Boosting.

In [None]:
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score, precision_score, recall_score,confusion_matrix

epoch = 10
for i in range(epoch):
    Xtrain, Xval, Ytrain, Yval = train_test_split(data,tag,test_size=0.2)

    gbm= GradientBoostingClassifier(learning_rate=0.1, n_estimators=230,max_depth=3,min_samples_leaf =5, min_samples_split =5, max_features='sqrt',subsample=0.8, random_state=10)
    gbm.fit(Xtrain,Ytrain)
    y_pred= gbm.predict(Xval)
    acc = accuracy_score(Yval,y_pred)
    f1 = f1_score(Yval,y_pred, average='weighted' )
    p = precision_score(Yval,y_pred, average='weighted')
    r = recall_score(Yval,y_pred, average='weighted')
    cm = confusion_matrix(Yval,y_pred)

## Deep Forest: deep forest model is a deep neural network principle applied to the traditional machine learning algorithm "random forest".

In [None]:
from deepforest import CascadeForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score, precision_score, recall_score,confusion_matrix
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import hamming_loss

epoch = 10
for i in range(epoch):
    Xtrain, Xval, Ytrain, Yval = train_test_split(da1,tag,test_size=0.2)
    model = CascadeForestClassifier(random_state=0)
    model.fit(Xtrain, Ytrain)
    y_pred = model.predict(Xval)
    acc = accuracy_score(y_pred,Yval)
    f1 = f1_score(Yval, y_pred, average='weighted' )
    p = precision_score(Yval,y_pred, average='weighted')
    r = recall_score(Yval,y_pred, average='weighted')
    #cm = confusion_matrix(Yval,y_pred)
    kappa = cohen_kappa_score(Yval,y_pred)
    ham_distance = hamming_loss(Yval,y_pred)

## SAE: it is a deep neural network model composed of multiple layers of Spase AutoEncoder (sparse self-encoder).

The SAE loss function part also includes reconstruction loss, which is different from the general classification model.

In [None]:
import numpy as np
import scipy.io
import tensorflow as tf
from tensorflow.keras.layers import Reshape
from tensorflow.keras import Input
from matplotlib import pyplot as plt
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Activation,Dropout,Flatten,Dense
from tensorflow.keras import Model,datasets
import os

class MyModel(tf.keras.Model):
    

    def __init__(self):
        super (MyModel,self).__init__()
        #self.f5 = tf.keras.layers.Flatten()
        self.f1 = tf.keras.layers.Dense(1024,activation ='relu')
        self.f2= tf.keras.layers.Dense(256,activation ='relu')
        self.f3 = tf.keras.layers.Dense(64,activation ='relu')
        self.f4= tf.keras.layers.Dense(10)
        self.f5 = tf.keras.layers.Dense(64,activation ='relu')
        self.f6 = tf.keras.layers.Dense(256,activation ='relu')
        self.f7 = tf.keras.layers.Dense(1024,activation ='relu')
        self.f8 = tf.keras.layers.Dense(20000,activation ='relu')
    def call(self,x):
        x = self.f1(x)
        x = self.f2(x)
        x = self.f3(x)
        x1 = self.f4(x)
        x = self.f5(x1)
        x = self.f6(x)
        x = self.f7(x)
        y = self.f8(x)
        z = tf.concat([y,x1],axis=1)
        return z
    
    
model = MyModel()
model.build(input_shape=(909,20000))
model.call(Input(shape=(20000,)))

model.summary()