# Paths

In [1]:
path = '/home/tih_isi_7/G-NeuroDAVIS/'
res_plots = path + 'Results/Plots/'
path_emb = path + 'Data/Embeddings/'
path_tab = path + 'Results/Tables/'

# Importing libraries

In [2]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import sys
sys.path.append(path)

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import keras
import ipynbname
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
from utils import Datasets as ds
from keras.models import load_model
from utils import DownstreamAnalysis as DA

dataname = ipynbname.name().rsplit("_")[0]
txt = %pwd
methodname = txt.rsplit("/")[-1]

# Load labels and embeddings

In [3]:
y, _ = ds.LoadData(dataname, DType='None', LType=True)
x_embedding = np.array(pd.read_csv(path_emb+dataname+'/'+dataname+'_'+methodname+'.csv', index_col=0, header=0))

# Downstream analysis

### k-NN

In [4]:
neighbour = [5, 15, 25, 35, 45]
Score_knn = []
for n in neighbour:
    Score_knn.append(DA.Knn(x_embedding, y, n))
Score_knn = pd.DataFrame(Score_knn, index=['neighbour '+ str(s) for s in neighbour],
                         columns = ['Accuracy', 'Precision','Recall','F1-Score'])
Score_knn.to_csv(path_tab+dataname+'_'+methodname+'_knn.csv')
Score_knn

Unnamed: 0,Accuracy,Precision,Recall,F1-Score
neighbour 5,0.775833,0.777903,0.775833,0.77578
neighbour 15,0.791833,0.795934,0.791833,0.79294
neighbour 25,0.794333,0.79895,0.794333,0.795562
neighbour 35,0.794417,0.799522,0.794417,0.79574
neighbour 45,0.79525,0.8008,0.79525,0.796643


### Random Forest

In [5]:
n_tree = [20,40,60,80,100]
Score_rfc = []
for n in n_tree:
    Score_rfc.append(DA.RFC(x_embedding, y, n))
Score_rfc = pd.DataFrame(Score_rfc, index=['n_tree '+ str(s) for s in n_tree],
                         columns = ['Accuracy', 'Precision','Recall','F1-Score'])
Score_rfc.to_csv(path_tab+dataname+'_'+methodname+'_rfc.csv')
Score_rfc

Unnamed: 0,Accuracy,Precision,Recall,F1-Score
n_tree 20,0.774583,0.776267,0.774583,0.775022
n_tree 40,0.77725,0.779558,0.77725,0.777851
n_tree 60,0.778583,0.780941,0.778583,0.779297
n_tree 80,0.7775,0.779744,0.7775,0.77824
n_tree 100,0.7795,0.781822,0.7795,0.780223


### k-Means

In [6]:
iteration = 10
Results_km = []

for i in tqdm(range(iteration), desc='Progress'):
    Results_km.append(DA.kmeans(x_embedding, y))
    
Results_km = pd.DataFrame(Results_km,columns = ['ARI', 'FMI','NMI'])
Results_km

Progress: 100%|█████████████████████████████████| 10/10 [00:04<00:00,  2.15it/s]


Unnamed: 0,ARI,FMI,NMI
0,0.370969,0.546549,0.448367
1,0.371272,0.54672,0.448627
2,0.37038,0.546317,0.447934
3,0.371272,0.54672,0.448627
4,0.370421,0.546216,0.447951
5,0.369277,0.545412,0.446994
6,0.370353,0.546275,0.447908
7,0.370053,0.545977,0.447628
8,0.370354,0.546182,0.447898
9,0.370656,0.54633,0.448141


### Agglomerative 

In [7]:
idx = ['ARI', 'FMI','NMI']
AglScore = pd.DataFrame(DA.Agglomerative(x_embedding, y), index = idx)    #seurat_clusters, Cluster, celltype.l2
AglScore.T

Unnamed: 0,ARI,FMI,NMI
0,0.285714,0.480608,0.378466
