# Paths

In [1]:
path = '/home/chayan/NeuroGDAVIS/'
res_plots = path + 'Results/Plots/'
path_emb = path + 'Data/Embeddings/'

# Importing libraries

In [2]:
import sys
sys.path.append(path)

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import keras
import ipynbname
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
from utils import Datasets as ds
from keras.models import load_model
from utils import DownstreamAnalysis as DA

dataname = ipynbname.name().rsplit("_")[0]
txt = %pwd
methodname = txt.rsplit("/")[-1]

# Load labels and embeddings

In [3]:
y = ds.LoadData(dataname, DType='None', LType=True)
x_embedding = np.array(pd.read_csv(path_emb+dataname+'/'+dataname+'_'+methodname+'.csv', index_col=0, header=0))

# Downstream analysis

### k-NN

In [4]:
neighbour = [5, 15, 25, 35, 45]
Score_knn = []
for n in neighbour:
    Score_knn.append(DA.Knn(x_embedding, y['x'], n))
Score_knn = pd.DataFrame(Score_knn, index=['neighbour '+ str(s) for s in neighbour],
                         columns = ['Accuracy', 'Precision','Recall','F1-Score'])
Score_knn

Unnamed: 0,Accuracy,Precision,Recall,F1-Score
neighbour 5,0.888889,0.882728,0.888889,0.882873
neighbour 15,0.8125,0.806089,0.8125,0.800439
neighbour 25,0.763889,0.755387,0.763889,0.744752
neighbour 35,0.722222,0.709003,0.722222,0.697933
neighbour 45,0.690972,0.687523,0.690972,0.659324


### Random Forest

In [5]:
n_tree = [20,40,60,80,100]
Score_rfc = []
for n in n_tree:
    Score_rfc.append(DA.RFC(x_embedding, y['x'], n))
Score_rfc = pd.DataFrame(Score_rfc, index=['n_tree '+ str(s) for s in neighbour],
                         columns = ['Accuracy', 'Precision','Recall','F1-Score'])
Score_rfc

Unnamed: 0,Accuracy,Precision,Recall,F1-Score
n_tree 5,0.90625,0.897388,0.90625,0.900219
n_tree 15,0.920139,0.920815,0.920139,0.914742
n_tree 25,0.913194,0.906644,0.913194,0.905992
n_tree 35,0.920139,0.912979,0.920139,0.914284
n_tree 45,0.913194,0.903644,0.913194,0.906387


### k-Means

In [6]:
iteration = 10
Results_km = []

for i in tqdm(range(iteration), desc='Progress'):
    Results_km.append(DA.kmeans(x_embedding, y['x']))
    
Results_km = pd.DataFrame(Results_km,columns = ['ARI', 'FMI','NMI'])
Results_km

Progress: 100%|█████████████████████████████████| 10/10 [00:00<00:00, 22.88it/s]


Unnamed: 0,ARI,FMI,NMI
0,0.36636,0.614779,0.405101
1,0.390072,0.630163,0.429565
2,0.38459,0.614065,0.417809
3,0.382051,0.620426,0.41781
4,0.403222,0.632187,0.43628
5,0.340594,0.601751,0.382733
6,0.373479,0.625541,0.4124
7,0.403861,0.627807,0.4373
8,0.375511,0.633369,0.412535
9,0.400198,0.633855,0.437198


### Agglomerative 

In [7]:
idx = ['ARI', 'FMI','NMI']
AglScore = pd.DataFrame(DA.Agglomerative(x_embedding, y['x']), index = idx)    #seurat_clusters, Cluster, celltype.l2
AglScore.T

Unnamed: 0,ARI,FMI,NMI
0,0.395762,0.64543,0.43056
