In [1]:
from google.colab import drive
drive.mount('/content/drive')

%cd /content/drive/MyDrive/MLinApp_project_mine/NNI_ste

Mounted at /content/drive
/content/drive/.shortcut-targets-by-id/1wXg2dci4kAWzFZAvhlShcmwk3t0dUHTP/MLinApp_project_mine/NNI_ste


In [2]:
import numpy as np
import pickle as pk  

from sklearn.cluster import KMeans, DBSCAN
from sklearn.model_selection import train_test_split
from sklearn import svm
import sklearn.metrics
import csv, pprint
import scipy.stats
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
import math
import json
import zipfile, sqlite3, json, os, pandas as pd
import tensorflow as tf
from keras.models import load_model

random_state = 42
test_size = 0.2


In [3]:
%%capture 
import os
if "requirements_pip.txt" not in os.listdir("./"):
  raise SystemError('requirements_pip.txt missing!')
else:
  ! pip install -r requirements_pip.txt

#os.kill(os.getpid(), 9)

# KMeans


In [4]:
# Dataset Functions
def label_processing(labels):
    new_miRna_label = []
    dictionary = create_dictionary(labels)
    for i in labels:
        new_miRna_label.append(dictionary[i])
    return new_miRna_label
def create_dictionary(labels):
    dictionary = {}
    class_names = np.unique(labels)
    for i, name in enumerate(class_names):
        dictionary[name] = i
    return dictionary
def extract_label(file_name):
    label = []
    with open(file_name, "r") as fin:
        reader = csv.reader(fin, delimiter=',')
        first = True
        for row in reader:
            label.append(row)
    return np.array(label)
def load_dataset(data_file_name, label_file_name):
    miRna_label = extract_label(label_file_name)
    miRna_data = np.genfromtxt(data_file_name, delimiter=',')
    print(f"Dataset dimensions: {miRna_data.shape[0]}")
    return miRna_data, miRna_label




###Reload CNN and SCNN models

In [5]:
# CNN and SCNN functions

def load_model_from_experiment(optim_nni_experiment, dataset_name):

  local_output_folder = "./results/Experiment_{}_{}/".format("cnn",optim_nni_experiment)
  model = load_model("./output/tmp_{}_{}_{}/model_cnn".format("cnn",optim_nni_experiment,dataset_name))
  print(f"Model for {dataset_name} loaded correctly")
  model.summary()
  return model



def get_batch(optim_nni_experiment, dataset_name):
    ##### GET NETWORK STRUCTURE PARAMETERS from previous NNI optimization of non-spiking CNN #####
  optim_db_filepath = "root/nni-experiments/{}/db/nni.sqlite".format(optim_nni_experiment)
  local_experiment_folder = "./results/Experiment_{}_{}/".format("cnn",optim_nni_experiment)
  for ii in os.listdir(local_experiment_folder):
    if optim_nni_experiment in ii:
      target = ii
  zf = zipfile.ZipFile(local_experiment_folder+target)
  con = sqlite3.connect(zf.extract(optim_db_filepath))
  df = pd.read_sql_query("SELECT * from MetricData", con)
  con.close()
  df_default = df[df["type"]=="FINAL"].sort_values(by='data',ascending=False)
  optim_nni_trial = df_default["trialJobId"].iloc[0]
  optim_filename = 'parameter.cfg'
  optim_nni_ref = 'nni-experiments/'+optim_nni_experiment+'/trials/'+optim_nni_trial
  optim_nni_dir = os.path.expanduser('~')
  optim_filepath = os.path.join(optim_nni_dir,optim_nni_ref,optim_filename)

  for ii in os.listdir(local_experiment_folder):
    if optim_nni_experiment in ii:
      target = ii

  zf = zipfile.ZipFile(local_experiment_folder+target)

  with open(zf.extract(optim_filepath[1:]), 'r') as f:
      data = f.read()

  param_data = json.loads(data)
  network_parameters = param_data['parameters']
  return network_parameters['nni_network/batch_size/randint']

# SNN parameters 

def get_params_scnn(optim_nni_experiment, dataset_name):
    ##### GET NETWORK STRUCTURE PARAMETERS from previous NNI optimization of non-spiking CNN #####
  optim_db_filepath = "root/nni-experiments/{}/db/nni.sqlite".format(optim_nni_experiment)
  local_experiment_folder = "./results/Experiment_{}_{}/".format("scnn",optim_nni_experiment)
  for ii in os.listdir(local_experiment_folder):
    if optim_nni_experiment in ii:
      target = ii
  zf = zipfile.ZipFile(local_experiment_folder+target)
  con = sqlite3.connect(zf.extract(optim_db_filepath))
  df = pd.read_sql_query("SELECT * from MetricData", con)
  con.close()
  df_default = df[df["type"]=="FINAL"].sort_values(by='data',ascending=False)
  optim_nni_trial = df_default["trialJobId"].iloc[0]
  optim_filename = 'parameter.cfg'
  optim_nni_ref = 'nni-experiments/'+optim_nni_experiment+'/trials/'+optim_nni_trial
  optim_nni_dir = os.path.expanduser('~')
  optim_filepath = os.path.join(optim_nni_dir,optim_nni_ref,optim_filename)

  for ii in os.listdir(local_experiment_folder):
    if optim_nni_experiment in ii:
      target = ii

  zf = zipfile.ZipFile(local_experiment_folder+target)

  with open(zf.extract(optim_filepath[1:]), 'r') as f:
      data = f.read()

  param_data = json.loads(data)
  network_parameters = param_data['parameters']
  return(network_parameters)

import nengo, nengo_dl

# SCNN evaluation


def scnn(keras_model, optim_snn, params, dataset_name, test_data):
    out_dir = './output/tmp_' + "scnn" + '_' + optim_snn + '_' + dataset_name + '/'
    trained_converter = nengo_dl.Converter(keras_model,
                                            max_to_avg_pool=True,
                                            swap_activations={tf.nn.relu: nengo.SpikingRectifiedLinear()},
                                            scale_firing_rates=params['nni_keras2snn_network/scale_firing_rates/randint'],
                                            synapse=params['nni_keras2snn_network/synapse/quniform'],
                                            
                                            )
        
    with trained_converter.net:
        nengo_dl.configure_settings(planner=nengo_dl.graph_optimizer.noop_planner)
        output_p = trained_converter.outputs[keras_model.output]
        conv0_p = nengo.Probe(trained_converter.layers[keras_model.layers[1].get_output_at(-1)])
        conv1_p = nengo.Probe(trained_converter.layers[keras_model.layers[3].get_output_at(-1)])
        conv2_p = nengo.Probe(trained_converter.layers[keras_model.layers[5].get_output_at(-1)])
        conv3_p = nengo.Probe(trained_converter.layers[keras_model.layers[7].get_output_at(-1)])

    sim = nengo_dl.Simulator(trained_converter.net, minibatch_size=params['nni_keras2snn_network/batch_size/randint'])
    sim.compile(
                    optimizer=tf.optimizers.Adam(params['nni_keras2snn_network/lr/quniform']),
                    loss={
                        output_p: tf.losses.SparseCategoricalCrossentropy(from_logits=True),
                        conv0_p: tf.losses.mse,
                        conv1_p: tf.losses.mse,
                        conv2_p: tf.losses.mse,
                        conv3_p: tf.losses.mse,
                        },
                    loss_weights={
                                output_p: 1, 
                                conv0_p: params['nni_keras2snn_network/reg_conv0/quniform'], 
                                conv1_p: params['nni_keras2snn_network/reg_conv1/quniform'],
                                conv2_p: params['nni_keras2snn_network/reg_conv2/quniform'],
                                conv3_p: params['nni_keras2snn_network/reg_conv3/quniform']
                                },
                    metrics=["accuracy"],
                )
                
    n_steps = params['nni_keras2snn_network/n_steps/randint']
    tiled_test_data = np.tile(test_data, (1, n_steps, 1))
    try:
        sim.load_params(out_dir+'best_test_'+optim_snn)
        data = sim.predict({trained_converter.inputs[keras_model.input]: tiled_test_data})  
        predictions = np.argmax(data[trained_converter.outputs[keras_model.output]][:, -1], axis=-1)
        return predictions
    except Exception as e:
        print(e)
        raise

    sim.close()

# Matrix for RFC 
def metadata_matrix_generator(ovo, ovr, train_data, verbose=False):
  ovo_scores = ovo.decision_function(train_data)
  if verbose==True:
    print(ovo_scores.shape)
  ovr_scores = ovr.decision_function(train_data)
  if verbose==True:
    print(ovr_scores.shape)
  
  metadata_matrix = np.hstack((ovo_scores, ovr_scores))
  if verbose==True:
   print(metadata_matrix.shape)
  return metadata_matrix


In [6]:
# CNN models loading
numberOfSuperclass = 5
CNN_models_code = {
                  '0': 'Xs68DgU3',
                  '1': 'FnrO3zG8',
                  '2': 'Kw4Amaru',
                  '3': 'zQ402ChL',
                   '4': 'RgpmIaYS',
                  }

CNN_models = []
batch_sizes = []
for i in range(numberOfSuperclass):
  cnn_code = CNN_models_code[f'{i}']
  dataset_name = f'miRNA-superclass{i}'
  CNN_models.append(load_model_from_experiment(cnn_code, dataset_name))
  batch_sizes.append(get_batch(cnn_code, dataset_name))




Model for miRNA-superclass0 loaded correctly
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 415, 1)]          0         
                                                                 
 Conv1D_1 (Conv1D)           (None, 412, 73)           365       
                                                                 
 MaxPooling1D_1 (MaxPooling1  (None, 206, 73)          0         
 D)                                                              
                                                                 
 Conv1D_2 (Conv1D)           (None, 201, 75)           32925     
                                                                 
 MaxPooling1D_2 (MaxPooling1  (None, 50, 75)           0         
 D)                                                              
                                                                 
 Conv1D_3 (Conv1

In [7]:
# Local label mapping for later conversion
local_label_mapping = []
for i in range(numberOfSuperclass):
  temp = json.load(open(f"../data-knn/superclass{i}/label_mapping_dict.json","r"))
  local_label_mapping.append({v: k for k, v in temp.items()}) 

In [8]:
# PCA Superclass 
PCA_per_class = []
for i in range(numberOfSuperclass):
  pca = pk.load(open(f"../models/pca_superclass_{i}.pkl",'rb'))
  PCA_per_class.append(pca)

# Classificator loading
rfc = pk.load(open("../models/final_RFC-RAW.pkl",'rb'))
ovo = pk.load(open("../models/final_ovo.pkl",'rb'))
ovr = pk.load(open("../models/final_ovr.pkl",'rb'))


### Data loading

In [9]:
# Remove the first row and the last column from the feature
miRna_data, miRna_label = load_dataset("../train_test/test_data.csv","../train_test/test_label.csv")

print("Standard deviation: ",miRna_data.std())
print("Mean value: ", miRna_data.mean())
# Check all is done good
assert np.isnan(miRna_data).sum() == 0

print(miRna_label.shape, miRna_data.shape)

test_data, test_label = miRna_data, miRna_label[:,0]
print(test_data.shape, test_label.shape)

Dataset dimensions: 2217
Standard deviation:  1.0059083350556282
Mean value:  -2.113397681330766e-14
(2217, 1) (2217, 1881)
(2217, 1881) (2217,)


## Inference

### CNN 

In [10]:
superclasses = []
# Superclass predictions
matrix = metadata_matrix_generator(ovo, ovr, test_data)

superclasses = rfc.predict(matrix)

for i in range(numberOfSuperclass):
  data = test_label[superclasses == i]
  print(f"Superclass{i}: {len(data)}\n")
  # for label in np.unique(data):
  #   print(f"\t{label}, {len(data[data==label])}")
tf.autograph.set_verbosity(0)
tf.autograph.experimental.do_not_convert

Superclass0: 195

Superclass1: 109

Superclass2: 857

Superclass3: 103

Superclass4: 953



<function tensorflow.python.autograph.impl.api.do_not_convert(func=None)>

In [11]:
# Sample per Sample classification

predicted = []
for i, test_sample in enumerate(test_data):
  test_sample=test_sample.reshape(1,-1)
  matrix = metadata_matrix_generator(ovo, ovr, test_sample)
  SVM_prediction = rfc.predict(matrix)[0]
  test_sample_red = PCA_per_class[SVM_prediction].transform(test_sample)
  test_sample = test_sample_red
  local_map = local_label_mapping[SVM_prediction]
  final_prediction_to_translate = CNN_models[SVM_prediction].predict(test_sample, verbose=0)
  final_prediction_to_translate = np.argmax(np.array(final_prediction_to_translate))
  final_prediction = local_map[final_prediction_to_translate]
  predicted.append(final_prediction)
    

Cause: Unable to locate the source code of <function Model.make_predict_function.<locals>.predict_function at 0x7f0d5fdb98b0>. Note that functions defined in certain environments, like the interactive Python shell, do not expose their source code. If that is the case, you should define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.experimental.do_not_convert. Original error: lineno is out of bounds
Cause: Unable to locate the source code of <function Model.make_predict_function.<locals>.predict_function at 0x7f0d5fcc6550>. Note that functions defined in certain environments, like the interactive Python shell, do not expose their source code. If that is the case, you should define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.experimental.do_not_convert. Original error: lineno is out of bounds
Cause: Unable to locate the source code of <function Model.make_pred

In [12]:
# Superclass division and classification (same results, way faster)
for i in range(numberOfSuperclass):
  test_sample= test_data[superclasses == i]
  matrix = metadata_matrix_generator(ovo, ovr, test_sample)
  SVM_prediction = rfc.predict(matrix)[0]
  test_sample_red = PCA_per_class[SVM_prediction].transform(test_sample)
  test_sample = test_sample_red
  local_map = local_label_mapping[SVM_prediction]
  final_prediction_to_translate = CNN_models[SVM_prediction].predict(test_sample, verbose=0)
  final_prediction_to_translate2 = np.argmax(np.array(final_prediction_to_translate), axis=1)
  final_prediction = [local_label_mapping[i].get(item,item)  for item in final_prediction_to_translate2]
  pred_per_superclass = np.array(final_prediction)
  real_per_superclass = test_label[superclasses == i]
  predicted_labels_boolean = (pred_per_superclass == real_per_superclass)
  accuracy = predicted_labels_boolean.sum()/len(real_per_superclass) * 100
  print(f"Accuracy for superclass {i} = {accuracy:.1f}%")


Accuracy for superclass 0 = 55.9%
Accuracy for superclass 1 = 65.1%
Accuracy for superclass 2 = 39.0%
Accuracy for superclass 3 = 49.5%
Accuracy for superclass 4 = 47.3%


In [13]:
# Sample per Sample
for i in range(numberOfSuperclass):
  pred_per_superclass = np.array(predicted)[superclasses == i]
  real_per_superclass = test_label[superclasses == i]
  predicted_labels_boolean = (pred_per_superclass == real_per_superclass)
  accuracy = predicted_labels_boolean.sum()/len(real_per_superclass) * 100
  print(f"Accuracy for superclass {i} = {accuracy:.1f}%")
predicted_labels_boolean = (predicted == test_label)
accuracy = predicted_labels_boolean.sum()/len(test_label) * 100
print(f"Overall accuracy={accuracy:.1f}%")

Accuracy for superclass 0 = 55.9%
Accuracy for superclass 1 = 65.1%
Accuracy for superclass 2 = 39.0%
Accuracy for superclass 3 = 49.5%
Accuracy for superclass 4 = 47.3%
Overall accuracy=45.8%


In [None]:
# Information about accuracy for each class
match = {}
tot = {}
for label in np.unique(test_label):
  count = 0
  match[label] = 0
  for j in test_label:
    if j == label:
      count =count+1
  tot[label]  = count

for idx, el in enumerate(predicted):
  if el == test_label[idx]:
    match[el] = match[el]+1

percentage = {}
print("\t\t Class accuracy")
for label in np.unique(test_label):
  percentage[label] = (match[label]/tot[label]) * 100
  print(f"Accuracy for class {label} = {percentage[label]:.1f}% ({match[label]}/{tot[label]})")


### SCNN

In [None]:
## Loading of SNN parameters

SNN_models_code = {
                  0: '1bGwCNz9',
                  1: '4YcOS1Nl',
                  2: 'suDcUgiF',
                  3: 'BYb0Vo26',
                  4: 'yZlABRhf',
}
params = []
for i in range(numberOfSuperclass):
  cnn_model = CNN_models[i]
  dataset_name = f'miRNA-superclass{i}'
  params.append(get_params_scnn(SNN_models_code[i], dataset_name))

In [None]:
# SCNN
predicted = []
superclass_data = []
superclass_label = []

# Sample division
for i in range(5):
  superclass_data.append(test_data[superclasses == i])
  superclass_label.append(test_label[superclasses == i])


for i in range(5):
  print(f"Superclass {i} evaluation started...\n")
  samples = superclass_data[i]
  rfc_pred=i
  samples = PCA_per_class[rfc_pred].transform(samples)
  samples = samples.reshape((samples.shape[0], 1, samples.shape[1]))

  local_map = local_label_mapping[rfc_pred]
  final_prediction_to_translate = scnn(CNN_models[rfc_pred], SNN_models_code[rfc_pred], params[rfc_pred], f"miRNA-superclass{rfc_pred}",samples) 
  final_prediction = [local_label_mapping[i].get(item,item)  for item in final_prediction_to_translate]
  predicted.append(final_prediction)
  
  print(f"\nSuperclass {i} evaluation finished.\n")
  #print(f"Prediction:{final_prediction}, Actual:{test_label[i]}")

  

Superclass 0 evaluation started...

|                     Building network (0%)                    | ETA:  --:--:--



Build finished in 0:00:00                                                      
Optimization finished in 0:00:00                                               
Construction finished in 0:00:00                                               


Cause: Unable to locate the source code of <function Model.make_predict_function.<locals>.predict_function at 0x7f9972566ca0>. Note that functions defined in certain environments, like the interactive Python shell, do not expose their source code. If that is the case, you should define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.experimental.do_not_convert. Original error: lineno is out of bounds



Superclass 0 evaluation finished.

Superclass 1 evaluation started...

|#                     Building network (2%)                     | ETA: 0:00:07



Build finished in 0:00:00                                                      
Optimization finished in 0:00:00                                               
Construction finished in 0:00:00                                               


Cause: Unable to locate the source code of <function Model.make_predict_function.<locals>.predict_function at 0x7f998f522d30>. Note that functions defined in certain environments, like the interactive Python shell, do not expose their source code. If that is the case, you should define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.experimental.do_not_convert. Original error: lineno is out of bounds







Superclass 1 evaluation finished.

Superclass 2 evaluation started...

|                     Building network (0%)                    | ETA:  --:--:--



Build finished in 0:00:00                                                      
Optimization finished in 0:00:00                                               
|##############Constructing graph: build stage (81%)#            | ETA: 0:00:00



Construction finished in 0:00:01                                               


Cause: Unable to locate the source code of <function Model.make_predict_function.<locals>.predict_function at 0x7f9972801ee0>. Note that functions defined in certain environments, like the interactive Python shell, do not expose their source code. If that is the case, you should define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.experimental.do_not_convert. Original error: lineno is out of bounds



Superclass 2 evaluation finished.

Superclass 3 evaluation started...

|#                     Building network (2%)                     | ETA: 0:00:06



Build finished in 0:00:00                                                      
Optimization finished in 0:00:00                                               
Construction finished in 0:00:00                                               


Cause: Unable to locate the source code of <function Model.make_predict_function.<locals>.predict_function at 0x7f9994249670>. Note that functions defined in certain environments, like the interactive Python shell, do not expose their source code. If that is the case, you should define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.experimental.do_not_convert. Original error: lineno is out of bounds



Superclass 3 evaluation finished.

Superclass 4 evaluation started...

|#                     Building network (2%)                     | ETA: 0:00:08



Build finished in 0:00:00                                                      
Optimization finished in 0:00:00                                               
Construction finished in 0:00:00                                               


Cause: Unable to locate the source code of <function Model.make_predict_function.<locals>.predict_function at 0x7f997255a9d0>. Note that functions defined in certain environments, like the interactive Python shell, do not expose their source code. If that is the case, you should define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.experimental.do_not_convert. Original error: lineno is out of bounds



Superclass 4 evaluation finished.



In [None]:
for i in range(numberOfSuperclass):
  pred_per_superclass = np.array(predicted[i])
  real_per_superclass = np.array(superclass_label[i])
  predicted_labels_boolean = (pred_per_superclass == real_per_superclass[:pred_per_superclass.shape[0]])
  accuracy = predicted_labels_boolean.sum()/len(real_per_superclass) * 100
  print(f"Accuracy for superclass {i} = {accuracy:.1f}%")


Accuracy for superclass 0 = 40.0%
Accuracy for superclass 1 = 63.3%
Accuracy for superclass 2 = 8.6%
Accuracy for superclass 3 = 60.2%
Accuracy for superclass 4 = 3.3%


In this case, results for sueprclass 2 and 4 are affected by computational limits in SNN parameters optimization due to the fact that most of the samples fall into this two categories. Moreover, we can see that the performances are mostly unaffected compared to the CNN counterparts.

# Paper Inference

In this part, we try to reproduce all the paper experiments (but without any superclass classification technique, just using the superclass predictions they made and testing if the network gives the same results as they had). As we will see, the results are lower than the paper, indicating that even with their superclass predictions and using the same model, we can reproduce the same results.

### Data Loading

In [None]:
import numpy as np
from matplotlib import pyplot as plt
from sklearn.cluster import KMeans, DBSCAN
from sklearn.model_selection import train_test_split
from sklearn import svm
import sklearn.metrics
import csv, pprint
import scipy.stats
from sklearn.manifold import TSNE
from sklearn.mixture import GaussianMixture
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier, NeighborhoodComponentsAnalysis
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
import math
import json 

random_state = 0

def label_processing(labels):
    new_miRna_label = []
    dictionary = create_dictionary(labels)
    for i in labels:
        new_miRna_label.append(dictionary[i])
    return new_miRna_label
def create_dictionary(labels):
    dictionary = {}
    class_names = np.unique(labels)
    for i, name in enumerate(class_names):
        dictionary[name] = i
    return dictionary

def extract_label(file_name):
    label = []
    with open(file_name, "r") as fin:
        reader = csv.reader(fin, delimiter=',')
        first = True
        for row in reader:
            label.append(row[0])
    return np.array(label)


def load_dataset(data_file_name, label_file_name):
    miRna_label = extract_label(label_file_name)
    miRna_data = np.genfromtxt(data_file_name, delimiter=',')
    print(f"Dataset dimensions: {miRna_data.shape[0]}")
    return miRna_data, miRna_label


In [None]:
# Remove the first row and the last column from the feature
miRna_data, miRna_label = load_dataset("../train_test/test_data_paper.csv","../train_test/test_label_paper.csv")

# Deleting all the data that came from TARGET-* labels
number_to_delete = abs(len(miRna_label) - miRna_data.shape[0])
miRna_data = miRna_data[number_to_delete:, :]

# Z-score normalization
print("Standard deviation: ",miRna_data.std())
print("Mean value: ", miRna_data.mean())
# Check all is done good
assert np.isnan(miRna_data).sum() == 0

print(miRna_label.shape, miRna_data.shape)

test_data, test_label = miRna_data, miRna_label
print(test_data.shape, test_label.shape)

numberOfSuperclass = 5


Dataset dimensions: 1952
Standard deviation:  0.9797580089364685
Mean value:  0.0053002307994000335
(1952,) (1952, 1881)
(1952, 1881) (1952,)


Feature for each superclass


In [None]:
selected_features = []
with open("../Selected features.csv", "r") as fin:
        reader = csv.reader(fin, delimiter=',')
        first = True
        for row in reader:
          features = []
          for el in row:  
            for e in el.split(";"):
              if e.find("Super") == -1:   
            #    print(e)
                f=e.split("-")[1]
                features.append(int(f))
            selected_features.append(features)
            print(features)


[0, 4, 5, 8, 16, 17, 20, 21, 22, 24, 26, 31, 33, 37, 39, 40, 44, 45, 49, 51, 54, 59, 64, 72, 77, 78, 83, 86, 89, 94, 98, 104, 106, 111, 113, 118, 126, 128, 131, 135, 138, 139, 140, 144, 151, 155, 156, 158, 164, 169, 170, 177, 183, 195, 196, 202, 212, 224, 226, 228, 230, 234, 240, 249, 251, 252, 253, 254, 256, 257, 260, 261, 262, 263, 267, 280, 281, 289, 294, 296, 298, 299, 300, 303, 304, 306, 309, 311, 314, 316, 318, 321, 325, 327, 328, 330, 332, 336, 337, 340, 345, 347, 348, 350, 359, 369, 370, 376, 377, 382, 384, 387, 388, 389, 390, 397, 399, 400, 401, 403, 405, 410, 411, 412, 413, 414, 420, 425, 441, 444, 445, 449, 451, 455, 457, 458, 459, 462, 469, 471, 477, 479, 481, 482, 487, 490, 491, 492, 495, 498, 505, 515, 520, 521, 523, 531, 537, 538, 539, 541, 542, 544, 549, 557, 562, 565, 572, 577, 582, 587, 591, 597, 609, 611, 612, 614, 625, 626, 630, 633, 640, 641, 643, 645, 646, 651, 654, 656, 664, 665, 668, 671, 673, 682, 686, 689, 690, 703, 712, 725, 727, 730, 731, 736, 743, 746, 750,

##Reloading CNN Models

In [None]:
import zipfile, sqlite3, json, os, pandas as pd
import tensorflow as tf
from keras.models import load_model

CNN_models_code = {
                  '0': 'TR2aXqPw',
                  '1': 'ln6H275F',
                  '2': '6fN7r5ou',
                  '3': '0cZk25wY',
                  '4': '0VRjyXHQ',
                  }
                  
def load_model_from_experiment(optim_nni_experiment, dataset_name):

  local_output_folder = "./results/Experiment_{}_{}/".format("cnn",optim_nni_experiment)
  
  # for ii in os.listdir(local_output_folder):
  #      if "output" in ii:
  #          target = ii
  # zf = zipfile.ZipFile(local_output_folder+target)
  # zf.extractall("./")
  
  model = load_model("./output/tmp_{}_{}_{}/model_cnn".format("cnn",optim_nni_experiment,dataset_name))
  print(f"Model for {dataset_name} loaded correctly")
  #model.summary()
  return model
def get_params(optim_nni_experiment, dataset_name):
    ##### GET NETWORK STRUCTURE PARAMETERS from previous NNI optimization of non-spiking CNN #####
  optim_db_filepath = "root/nni-experiments/{}/db/nni.sqlite".format(optim_nni_experiment)
  local_experiment_folder = "../results-papers/results/Experiment_{}_{}/".format("cnn",optim_nni_experiment)
  for ii in os.listdir(local_experiment_folder):
    if optim_nni_experiment in ii:
      target = ii
  zf = zipfile.ZipFile(local_experiment_folder+target)
  con = sqlite3.connect(zf.extract(optim_db_filepath))
  df = pd.read_sql_query("SELECT * from MetricData", con)
  con.close()
  df_default = df[df["type"]=="FINAL"].sort_values(by='data',ascending=False)
  optim_nni_trial = df_default["trialJobId"].iloc[0]
  optim_filename = 'parameter.cfg'
  optim_nni_ref = 'nni-experiments/'+optim_nni_experiment+'/trials/'+optim_nni_trial
  optim_nni_dir = os.path.expanduser('~')
  optim_filepath = os.path.join(optim_nni_dir,optim_nni_ref,optim_filename)

  for ii in os.listdir(local_experiment_folder):
    if optim_nni_experiment in ii:
      target = ii

  zf = zipfile.ZipFile(local_experiment_folder+target)

  with open(zf.extract(optim_filepath[1:]), 'r') as f:
      data = f.read()

  param_data = json.loads(data)
  network_parameters = param_data['parameters']
  return network_parameters['nni_network/batch_size/randint']

def get_mapping():
  local_label_mapping = []
  for i in range(numberOfSuperclass):
    map = json.load(open(f"../data-paper/superclass{i}/label_mapping_dict.json","r"))
    local_label_mapping.append({v: k for k, v in map.items()})
  return local_label_mapping


In [None]:
CNN_models = []
batch_sizes = []
for i in range(numberOfSuperclass):
  cnn_code = CNN_models_code[f'{i}']
  dataset_name = f'miRNA-superclass{i}'
  CNN_models.append(load_model_from_experiment(cnn_code, dataset_name))
  #batch_sizes.append(get_params(cnn_code, dataset_name))

local_label_mapping = get_mapping()


Model for miRNA-superclass0 loaded correctly
Model for miRNA-superclass1 loaded correctly
Model for miRNA-superclass2 loaded correctly




Model for miRNA-superclass3 loaded correctly
Model for miRNA-superclass4 loaded correctly


## Superclass Predictions

In [None]:
superclass_division = {
    0:['BLCA', 'KIRC', 'SKCM', 'UCEC', 'UVM'],
    1:['ACC', 'BRCA', 'CHOL', 'DLBC', 'ESCA', 'LIHC', 'PCPG'],
    2:['HNSC', 'KIRP', 'LGG', 'LUSC', 'MESO', 'TGCT'],
    3:['CESC', 'KICH', 'LUAD', 'PAAD', 'PRAD', 'THYM'],
    4:['FPPP', 'SARC', 'UCS', 'STAD', 'THCA'] # La classe FPPP non esiste nel nostro dataset
}
miRna_superclasses = []

# Arrays for superclass data and label divisions
superclass_array = []
superclasses_labels = []
numberOfSuperclass = 5

# Data division
for idx, label in enumerate(miRna_label):
  for i in range(numberOfSuperclass):
    if label in superclass_division[i]:
      miRna_superclasses.append(i)
predicted_labels = np.array(miRna_superclasses)

In [None]:
pred = []
for i, test_sample in enumerate(miRna_data):
  superclass_pred = predicted_labels[i] 
  # Feature Selection
  idx = selected_features[superclass_pred]
  test_sample_red = test_sample[idx]
  test_sample = test_sample_red.reshape(1,-1)
  local_map = local_label_mapping[superclass_pred]
  # CNN Prediction
  final_prediction_to_translate = CNN_models[superclass_pred].predict(test_sample, verbose=0) 
  final_prediction_to_translate = np.argmax(np.array(final_prediction_to_translate))
  # Mapping from superclass labels numeration to the absolute one 
  final_prediction = local_map[final_prediction_to_translate]
  pred.append(final_prediction) 
pred = np.array(pred)
print(pred.shape)

(1952,)


In [None]:
for i in range(numberOfSuperclass):
  pred_per_superclass = pred[predicted_labels == i]
  real_per_superclass = test_label[predicted_labels == i]
  predicted_labels_boolean = (pred_per_superclass == real_per_superclass)
  accuracy = predicted_labels_boolean.sum()/len(real_per_superclass) * 100
  print(f"Accuracy for superclass {i} = {accuracy}")
  predicted_labels_boolean = (pred == test_label)
accuracy = predicted_labels_boolean.sum()/len(test_label) * 100
print("Total accuracy:",accuracy)

Accuracy for superclass 0 = 87.99076212471132
Accuracy for superclass 1 = 72.31121281464532
Accuracy for superclass 2 = 69.24829157175398
Accuracy for superclass 3 = 83.87978142076503
Accuracy for superclass 4 = 81.2274368231047
Total accuracy: 78.53483606557377
