# Librerías

In [1]:
from sklearn.neighbors import kneighbors_graph
from scipy.stats import pearsonr, spearmanr
from scipy.optimize import linear_sum_assignment
from sklearn.metrics import (calinski_harabasz_score, davies_bouldin_score,
                             silhouette_score, normalized_mutual_info_score, adjusted_rand_score)
from sklearn import metrics
from tqdm import tqdm
import networkx as nx
import pandas as pd
import scanpy as sc
import numpy as np
import pickle
import h5py 

from sklearn.decomposition import PCA

In [2]:
from graph_based_corr import * 

# Lectura de los tres datasets 

In [3]:
# Lectura HumanLiver

# Datos raw
with h5py.File('../../data/HumanLiver_counts_top5000.h5') as file:
    X = np.array(file['X'])
    y = np.array(file['Y'])

adata_hl = sc.AnnData(X)
adata_hl.obs['Group'] = y

# Datos AE+GMM
path_z_hl = '../../results/HumanLiver_counts_top5000/Z.csv'

In [4]:
# Lectura 10XPBMC
with h5py.File('../../data/10X_PBMC_select_2100.h5') as file:
    X = np.array(file['X'])
    y = np.array(file['Y'])

adata_10x = sc.AnnData(X)
adata_10x.obs['Group'] = y

# Datos AE+GMM
path_z_10x = '../../results/10X_PBMC_select_2100/Z.csv'

In [5]:
# Lectura Macosko
with h5py.File('../../data/Macosko_mouse_retina.h5') as file:
    X = np.array(file['X'])
    y = np.array(file['Y'])

adata_macosko = sc.AnnData(X)
adata_macosko.obs['Group'] = y

# Datos AE+GMM
path_z_macosko = '../../results/Macosko_mouse_retina/Z.csv'

# Human Liver

In [8]:
# PCA
results_hl_pca = main_pca(path = '../../data/HumanLiver_counts_top5000.h5')

# No PCA
results_hl_no_pca = main_no_pca(path = path_z_hl, y = adata_hl.obs['Group'])

### Autoencoder: Successfully preprocessed 4999 genes and 8444 cells.


100%|██████████| 30/30 [19:30<00:00, 39.01s/it]
100%|██████████| 30/30 [18:39<00:00, 37.31s/it]


In [12]:
with open('results_human_liver.pickle', 'wb') as file:
    pickle.dump({'pca':results_hl_pca, 'nopca': results_hl_no_pca}, file)

In [10]:
results_hl_pca

Unnamed: 0,N,r,acc,nmi,ari,chs,dbs,ss,num_clusters
0,5,0.1,0.83124,0.85615,0.76445,1388.52579,1.19884,0.29561,10
1,5,0.2,0.76658,0.81939,0.61743,1136.79468,1.2731,0.24352,15
2,5,0.3,0.59901,0.76995,0.44167,1112.57323,1.24429,0.26444,17
3,7,0.1,0.8329,0.86044,0.76649,1392.29415,1.1901,0.29062,10
4,7,0.2,0.83787,0.85419,0.7518,1196.17706,1.24834,0.2517,14
5,7,0.3,0.62636,0.77488,0.45552,1108.62829,1.23286,0.26618,17
6,9,0.1,0.82307,0.8707,0.75144,1394.43488,1.18783,0.29333,10
7,9,0.2,0.86333,0.8655,0.76406,1202.61455,1.16863,0.25395,13
8,9,0.3,0.66497,0.7943,0.52345,1163.12556,1.28275,0.24651,16
9,11,0.1,0.76765,0.82386,0.70216,1503.33856,1.13287,0.34664,9


# 10X PBMC

In [5]:
# PCA
results_10x_pca = main_pca(path = '../../data/10X_PBMC_select_2100.h5')

# No PCA
results_10x_no_pca = main_no_pca(path = path_z_10x, y = adata_10x.obs['Group'])

### Autoencoder: Successfully preprocessed 16653 genes and 2100 cells.


100%|██████████| 30/30 [05:18<00:00, 10.60s/it]
100%|██████████| 30/30 [04:41<00:00,  9.39s/it]


In [11]:
with open('results_10x.pickle', 'wb') as file:
    pickle.dump({'pca':results_10x_pca, 'nopca': results_10x_no_pca}, file)

In [7]:
results_10x_pca

Unnamed: 0,N,r,acc,nmi,ari,chs,dbs,ss,num_clusters
0,5,0.1,0.70143,0.7672,0.68427,354.93189,1.65128,0.23076,4
1,5,0.2,0.73714,0.76486,0.6807,258.10871,1.68872,0.2315,6
2,5,0.3,0.73333,0.76312,0.68827,247.65515,1.40088,0.21981,7
3,7,0.1,0.60952,0.64683,0.42548,413.36818,1.42368,0.27845,3
4,7,0.2,0.72476,0.75783,0.68153,297.71734,1.8116,0.23454,5
5,7,0.3,0.74286,0.78113,0.71333,250.8659,1.39422,0.2272,7
6,9,0.1,0.70714,0.75929,0.64761,368.27204,1.60706,0.28131,4
7,9,0.2,0.72143,0.78155,0.70512,293.62568,1.52231,0.23661,5
8,9,0.3,0.7281,0.77383,0.70405,282.65359,1.43814,0.25152,6
9,11,0.1,0.60952,0.65306,0.42482,415.15532,1.42796,0.28131,3


# Macosko

In [6]:
# PCA
results_macosko_pca = main_pca(path = '../../data/Macosko_mouse_retina.h5')

# No PCA
results_macosko_no_pca = main_no_pca(path = path_z_macosko, y = adata_macosko.obs['Group'])

### Autoencoder: Successfully preprocessed 11422 genes and 14653 cells.


100%|██████████| 30/30 [36:41<00:00, 73.40s/it] 
100%|██████████| 30/30 [31:59<00:00, 63.99s/it]


In [7]:
with open('results_macosko.pickle', 'wb') as file:
    pickle.dump({'pca':results_macosko_pca, 'nopca': results_macosko_no_pca}, file)