In [7]:
import h5py
import scanpy as sc
import anndata
import loompy as lp

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import sys
import glob

In [4]:
import sys
sys.path.insert(0, '/home/tchari/monod/src/')

In [5]:
import monod
#importlib.reload(monod)
from monod import preprocess, extract_data, cme_toolbox, analysis, mminference

In [71]:
from sklearn.metrics import adjusted_rand_score, adjusted_mutual_info_score
from sklearn.cluster import KMeans
from sklearn.neighbors import kneighbors_graph

import igraph as ig
import leidenalg as la

In [44]:
# ********* Get meK_looms.tar.gz and meKMeans_data_samp_params.csv *********

samps = pd.read_csv('meKMeans_data_samp_params.csv')
samps.head()

Unnamed: 0,Dataset,Transcrip,Clusters,Cu,Lam
0,allen_b08,/home/tchari/perturbCME/notebooks/gg_200524_mo...,10,-6.777778,-1.25
1,allen_b02h01,/home/tchari/perturbCME/notebooks/gg_200524_mo...,11,-6.777778,-1.25
2,cl3,/home/tchari/perturbCME/notebooks/gg_200525_ge...,3,-6.777778,-1.25
3,cl5,/home/tchari/perturbCME/notebooks/gg_200525_ge...,5,-6.777778,-0.7
4,brca1,/home/tchari/perturbCME/notebooks/gg_200524_mo...,-1,-6.777778,-0.7


In [45]:
d_strings  = glob.glob('./hvg_objs/*.loom')
d_strings 

['./hvg_objs/allen_b08_300hvgs.loom',
 './hvg_objs/cl5_2000hvgs.loom',
 './hvg_objs/allen_b02h01_300hvgs.loom',
 './hvg_objs/cl5_300hvgs.loom',
 './hvg_objs/allen_b08_2000hvgs.loom',
 './hvg_objs/allen_b02h01_1000hvgs.loom',
 './hvg_objs/allen_b08_4000hvgs.loom',
 './hvg_objs/cl3_1000hvgs.loom',
 './hvg_objs/allen_b08_1000hvgs.loom',
 './hvg_objs/cl3_2000hvgs.loom',
 './hvg_objs/cl5_1000hvgs.loom',
 './hvg_objs/cl5_4000hvgs.loom',
 './hvg_objs/allen_b02h01_4000hvgs.loom',
 './hvg_objs/cl3_4000hvgs.loom',
 './hvg_objs/allen_b02h01_2000hvgs.loom',
 './hvg_objs/cl3_300hvgs.loom']

In [46]:
datas = pd.unique(samps.Dataset)
datas

array(['allen_b08', 'allen_b02h01', 'cl3', 'cl5', 'brca1', 'pbmc',
       'e11e13'], dtype=object)

In [68]:
attribute_names =[('unspliced','spliced'),'gene_name','barcode']

### **meK-Means**

Test with 2, correct K, K+5, K+10

In [None]:
#For each loom in looms from ./hvg_objs

!mkdir /home/tchari/perturbCME/notebooks/fits/meKruns_0209

dir_strings = []
data_strings = []
result_strings = []

#Save method, clustering, mat_in (U,S, etc), hyper (K or res), hvgs, data, ARI, AMI
meK_res = pd.DataFrame()
meths = []
result = []
mats = []
hyper = []
hvgs = []
data = []
ari = []
ami = []

for d in d_strings:
    
    #Get tech param by keyword (datas) in loom paths
    ind = [i in d for i in datas]
    
    #Inference params for sampling
    tech_tup = [list(samps.Cu[ind])[0],list(samps.Lam[ind])[0]]
    
    #Preprocessing params for filepaths
    trans = list(samps.Transcrip[ind])[0]
    k = list(samps.Clusters[ind])[0]
    data_name = list(samps.Dataset[ind])[0]
    
    
    ds = lp.connect(d)
    num_genes = len(ds.ra[attribute_names[1]])
    num_cells = len(ds.ca[attribute_names[2]])
    true_labs = list(ds.ca['subclass_label'])
    ds.close()
    print(data_name+' with '+str(num_genes)+' HVGs')
    
    Ks = [2,k,k+5,k+10]

    for clus in Ks:
        #Run Monod preprocessing to set up files for run, leave room for genes to be removed with low U/S
        dir_string,dataset_string = monod.preprocess.construct_batch([d], \
                                                     trans, \
                                                     [data_name], \
                                                     attribute_names=attribute_names,\
                                                     batch_location='./fits/meKruns_0209', \
                                                     meta=data_name+'_K'+str(clus), \
                                                     batch_id=1, \
                                                     n_genes=num_genes-60,exp_filter_threshold=None,viz=False)
        dir_strings += [dir_string]
        data_strings += [dataset_string]

        #Set model and run meK-Means
        
        #Define bounds for params
        phys_lb = [-2.0, -1.8, -1.8 ] 
        phys_ub = [4.2, 2.5, 2.5] 
        samp_lb = tech_tup 
        samp_ub = tech_tup  
        gridsize = [1,1] #Already have tech params
    

        epochs = 10

        # ---------------- meK-Means Inference ---------------- 
        #Define model with bursty transcription and Poisson molecule capture/sampling
        fitmodel = monod.cme_toolbox.CMEModel('Bursty','Poisson')

        #Set up mminference parameters
        inference_parameters = monod.mminference.InferenceParameters(phys_lb,phys_ub,samp_lb,samp_ub,gridsize,\
                    dataset_string[0],fitmodel,k=clus,epochs=epochs,use_lengths = True,
                    gradient_params = {'max_iterations':5,'init_pattern':'moments','num_restarts':1})

        #Read in loom file with filtered barcodes
        search_data = monod.extract_data.extract_data(d, trans, data_name,
                    dataset_string[0], dir_string, viz=False, dataset_attr_names=attribute_names)

        #Run inference(fit_all_grid_points()) and Save result file strings
        full_result_string = inference_parameters.fit_all_grid_points(40,search_data) 

        result_strings.append(full_result_string)
        

        # ----------------  Save output ---------------- 
        #Read in results and get cluster assignments
        sr = [monod.analysis.load_search_results(i) for i in full_result_string]
        
        
        cat_assigns = np.array([None]*num_cells) #cat_assigns is empty list of length = no. cell barcodes
        for i in range(len(sr)):
            r = sr[i]
            cat_assigns[r.filt] = r.assigns #denotes which cluster (k), cells were assigned to

        print('No. clus: ',len(np.unique(cat_assigns)))
        

        meths += ['meK-Means']
        result += [cat_assigns]
        mats += ['U,S']
        hyper += [clus]
        hvgs += [num_genes]
        data += [data_name]
        ari += [adjusted_rand_score(true_labs,cat_assigns)]
        ami += [adjusted_mutual_info_score(true_labs,cat_assigns)]


meK_res['Method'] = meths
meK_res['Clustering'] = result
meK_res['Matrices'] = mats
meK_res['Hyperparam'] = hyper
meK_res['HVGs'] = hvgs
meK_res['Data'] = data
meK_res['ARI'] = ari
meK_res['AMI'] = ami
    

allen_b08 with 299 HVGs


100%|████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:14<00:00,  7.46s/it]
100%|████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:10<00:00,  5.02s/it]


mstep self.weights:  [0.8630301 0.1369699]
Q Function:  -1169045.4176676944



100%|████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:10<00:00,  5.29s/it]


mstep self.weights:  [0.87036381 0.12963619]
Q Function:  -1109316.3334968125



100%|████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:10<00:00,  5.35s/it]


mstep self.weights:  [0.87218347 0.12781653]
Q Function:  -1109059.4199196238



100%|████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:11<00:00,  5.65s/it]


mstep self.weights:  [0.87347886 0.12652114]
Q Function:  -1109416.6973109464



100%|████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:11<00:00,  5.55s/it]


mstep self.weights:  [0.87282665 0.12717335]
Q Function:  -1109491.6192687005



100%|████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:10<00:00,  5.29s/it]


mstep self.weights:  [0.87285625 0.12714375]
Q Function:  -1108699.4987426656



100%|████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:11<00:00,  5.53s/it]


mstep self.weights:  [0.87285626 0.12714374]
Q Function:  -1108699.498779183



100%|████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:10<00:00,  5.38s/it]


mstep self.weights:  [0.87285626 0.12714374]
Q Function:  -1108699.4987791968



100%|████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:10<00:00,  5.39s/it]


mstep self.weights:  [0.87285626 0.12714374]
Q Function:  -1108699.4987791968



100%|████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:10<00:00,  5.30s/it]


mstep self.weights:  [0.87285626 0.12714374]
Q Function:  -1108699.4987791968

No. clus:  2


100%|██████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:11<00:00,  1.18s/it]
100%|██████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:10<00:00,  1.05s/it]


mstep self.weights:  [0.00386678 0.02386963 0.06490772 0.05793504 0.04672846 0.43901911
 0.05838434 0.08044751 0.21079607 0.01404533]
Q Function:  -1191773.1900790024



100%|██████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:10<00:00,  1.04s/it]


mstep self.weights:  [1.74926588e-04 3.17959111e-02 3.35701154e-02 5.58659841e-02
 1.75653758e-04 5.30054430e-01 5.26368663e-02 5.35632221e-02
 2.23985395e-01 1.81774953e-02]
Q Function:  -1119046.5771273316



100%|████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:10<00:00,  1.33s/it]


mstep self.weights:  [1.75654312e-10 3.37198331e-02 3.81491597e-02 5.61597822e-02
 1.75654312e-10 4.97996994e-01 5.38398532e-02 7.60762837e-02
 2.27460504e-01 1.65975897e-02]
Q Function:  -1141014.8065862232



100%|████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:11<00:00,  1.39s/it]


mstep self.weights:  [1.75654312e-10 3.35328295e-02 6.86886201e-03 5.53466068e-02
 1.75654312e-10 5.69004589e-01 5.59212695e-02 3.62457571e-02
 2.27890283e-01 1.51898023e-02]
Q Function:  -1137321.0063579483



100%|████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:10<00:00,  1.30s/it]


mstep self.weights:  [1.75654312e-10 3.34544904e-02 6.25897372e-03 5.52741410e-02
 1.75654312e-10 5.73661223e-01 5.68036493e-02 3.17300679e-02
 2.28248612e-01 1.45688425e-02]
Q Function:  -1150337.6133785865



100%|████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:10<00:00,  1.32s/it]


mstep self.weights:  [1.75654312e-10 3.32687250e-02 3.31297031e-03 5.51231295e-02
 1.75654312e-10 5.78007281e-01 5.76615364e-02 2.95119969e-02
 2.29252267e-01 1.38620936e-02]
Q Function:  -1141765.3606356462



100%|████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:10<00:00,  1.29s/it]


mstep self.weights:  [1.75654312e-10 3.31022102e-02 2.45197339e-03 5.50774483e-02
 1.75654312e-10 5.78673874e-01 5.77575613e-02 2.99190635e-02
 2.29206588e-01 1.38112806e-02]
Q Function:  -1132489.4570568576



100%|████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:10<00:00,  1.32s/it]


mstep self.weights:  [1.75654312e-10 3.30925205e-02 2.11446132e-03 5.50691938e-02
 1.75654312e-10 5.79248485e-01 5.76948234e-02 2.97347783e-02
 2.29163639e-01 1.38820979e-02]
Q Function:  -1132601.9179071845



100%|████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:11<00:00,  1.40s/it]


mstep self.weights:  [1.75654312e-10 3.28796161e-02 2.10785192e-03 5.50677353e-02
 1.75654312e-10 5.78798937e-01 5.76946462e-02 2.98713941e-02
 2.29697594e-01 1.38822258e-02]
Q Function:  -1129394.7719237613



100%|████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:10<00:00,  1.28s/it]


mstep self.weights:  [1.75654312e-10 3.22683977e-02 2.10785192e-03 5.50692816e-02
 1.75654312e-10 5.79306116e-01 5.76946472e-02 2.96637369e-02
 2.30007743e-01 1.38822260e-02]
Q Function:  -1133431.426791057

No. clus:  8


100%|██████████████████████████████████████████████████████████████████████████████████████| 15/15 [00:13<00:00,  1.13it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████| 15/15 [00:17<00:00,  1.17s/it]


mstep self.weights:  [1.14332221e-02 2.85437167e-01 6.91538101e-03 4.02575345e-01
 7.94124679e-04 3.52369171e-02 3.51304852e-04 1.49450122e-04
 9.71877419e-02 5.75039901e-02 2.74172688e-02 7.13187738e-02
 1.59662884e-04 3.28613011e-03 2.33521022e-04]
Q Function:  -1237916.9560637702



100%|██████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:11<00:00,  1.13s/it]


mstep self.weights:  [2.23638027e-02 2.66990863e-01 1.75654312e-10 5.75583417e-01
 1.75654488e-04 4.69127142e-02 1.75654488e-04 1.75654312e-10
 7.90283143e-02 1.56277023e-03 6.84395276e-03 3.62856171e-04
 1.75654312e-10 1.75733367e-10 1.75654312e-10]
Q Function:  -1179713.5933503222



100%|████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:10<00:00,  1.77s/it]


mstep self.weights:  [1.68727644e-02 2.46261027e-01 1.75654312e-10 6.05777237e-01
 1.75654312e-10 4.90994027e-02 1.75654312e-10 1.75654312e-10
 7.77079042e-02 1.75655134e-10 4.28166256e-03 1.75654312e-10
 1.75654312e-10 1.75654312e-10 1.75654312e-10]
Q Function:  -1097151.7731452289



100%|████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:10<00:00,  1.77s/it]


mstep self.weights:  [1.85497522e-02 2.34668120e-01 1.75654312e-10 6.17891604e-01
 1.75654312e-10 5.20891847e-02 1.75654312e-10 1.75654312e-10
 7.49338518e-02 1.75654312e-10 1.86748523e-03 1.75654312e-10
 1.75654312e-10 1.75654312e-10 1.75654312e-10]
Q Function:  -1128563.5620754394



100%|████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:10<00:00,  1.77s/it]


mstep self.weights:  [1.58012223e-02 2.45865809e-01 1.75654312e-10 6.11021562e-01
 1.75654312e-10 5.41458190e-02 1.75654312e-10 1.75654312e-10
 7.29899284e-02 1.75654312e-10 1.75657354e-04 1.75654312e-10
 1.75654312e-10 1.75654312e-10 1.75654312e-10]
Q Function:  -1095411.5304958571



100%|████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:10<00:00,  2.08s/it]


mstep self.weights:  [1.70620477e-02 2.40107341e-01 1.75654312e-10 6.15129805e-01
 1.75654312e-10 5.30554689e-02 1.75654312e-10 1.75654312e-10
 7.46453354e-02 1.75654312e-10 1.75654312e-10 1.75654312e-10
 1.75654312e-10 1.75654312e-10 1.75654312e-10]
Q Function:  -1106175.801290434



100%|████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:10<00:00,  2.15s/it]


mstep self.weights:  [1.47239479e-02 2.47290969e-01 1.75654312e-10 6.10497854e-01
 1.75654312e-10 5.46384882e-02 1.75654312e-10 1.75654312e-10
 7.28487391e-02 1.75654312e-10 1.75654312e-10 1.75654312e-10
 1.75654312e-10 1.75654312e-10 1.75654312e-10]
Q Function:  -1084186.4741080375



100%|████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:10<00:00,  2.04s/it]


mstep self.weights:  [1.50889915e-02 2.47162932e-01 1.75654312e-10 6.10117915e-01
 1.75654312e-10 5.52557959e-02 1.75654312e-10 1.75654312e-10
 7.23743641e-02 1.75654312e-10 1.75654312e-10 1.75654312e-10
 1.75654312e-10 1.75654312e-10 1.75654312e-10]
Q Function:  -1083189.1903504808



100%|████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:10<00:00,  2.09s/it]


mstep self.weights:  [1.35494911e-02 2.48709927e-01 1.75654312e-10 6.10208887e-01
 1.75654312e-10 5.54384691e-02 1.75654312e-10 1.75654312e-10
 7.20932236e-02 1.75654312e-10 1.75654312e-10 1.75654312e-10
 1.75654312e-10 1.75654312e-10 1.75654312e-10]
Q Function:  -1083978.6516400182



100%|████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:10<00:00,  2.03s/it]


mstep self.weights:  [1.74591222e-02 2.44799730e-01 1.75654312e-10 6.10216106e-01
 1.75654312e-10 5.56305731e-02 1.75654312e-10 1.75654312e-10
 7.18944665e-02 1.75654312e-10 1.75654312e-10 1.75654312e-10
 1.75654312e-10 1.75654312e-10 1.75654312e-10]
Q Function:  -1082193.6490742634

No. clus:  5


100%|██████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:12<00:00,  1.64it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████| 14/14 [00:11<00:00,  1.25it/s]


mstep self.weights:  [1.43615495e-06 3.50207766e-02 1.81189152e-04 3.36451138e-04
 5.37581373e-02 1.75820597e-04 7.53603730e-01 1.89179253e-10
 1.01319354e-01 4.01218008e-04 1.83770377e-06 2.53530596e-03
 1.48765536e-03 3.26597009e-02 3.34669404e-05 6.33309849e-06
 5.45813740e-03 5.01381461e-03 8.00271695e-03 2.91763664e-06]
Q Function:  -1224678.3023931282



100%|██████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:10<00:00,  1.10it/s]


mstep self.weights:  [1.88808671e-06 3.26003472e-02 1.75654312e-10 1.75654312e-10
 2.34582974e-02 1.75654312e-10 8.26988835e-01 1.75911644e-10
 3.37498725e-02 1.75654312e-10 1.71785963e-04 2.04653338e-08
 8.78274638e-04 6.50751471e-02 4.89836682e-04 4.87604391e-06
 2.28213405e-03 1.75654184e-03 1.21443092e-02 3.97832806e-04]
Q Function:  -1190192.8390566187



100%|██████████████████████████████████████████████████████████████████████████████████████| 11/11 [00:10<00:00,  1.09it/s]


mstep self.weights:  [2.78285957e-05 3.13846581e-02 1.75654312e-10 1.75654312e-10
 2.92881714e-02 1.75654312e-10 8.33293842e-01 1.76032887e-10
 1.59083427e-02 1.75654312e-10 1.75654312e-10 1.76370358e-10
 3.54080835e-04 6.55702685e-02 1.75654312e-10 8.12143710e-04
 4.00196773e-03 1.75653110e-03 1.74274295e-02 1.74734245e-04]
Q Function:  -1155839.812048054



100%|████████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:10<00:00,  1.14s/it]


mstep self.weights:  [7.67207336e-04 2.80022949e-02 1.75654312e-10 1.75654312e-10
 2.91392000e-02 1.75654312e-10 8.40438418e-01 7.06702325e-09
 1.41532709e-02 1.75654312e-10 1.75654312e-10 1.75654312e-10
 1.81781882e-10 6.23331161e-02 1.75654312e-10 1.75656508e-10
 7.40794204e-03 1.93214553e-03 1.58263965e-02 1.75654312e-10]
Q Function:  -1092506.011672021



100%|██████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:10<00:00,  1.02s/it]


mstep self.weights:  [3.18722662e-04 2.63730321e-02 1.75654312e-10 1.75654312e-10
 2.74409149e-02 1.75654312e-10 8.44497766e-01 2.03843197e-04
 1.29673129e-02 1.75654312e-10 1.75654312e-10 1.09827615e-06
 1.75654312e-10 5.52654822e-02 1.75654312e-10 1.75654312e-10
 1.44743194e-02 1.93219759e-03 1.65253093e-02 1.75654312e-10]
Q Function:  -1163394.595169723



100%|████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:10<00:00,  1.25s/it]


mstep self.weights:  [1.75654312e-10 2.06253661e-02 1.75654312e-10 1.75654312e-10
 2.91189076e-02 1.75654312e-10 8.50963213e-01 1.75654312e-10
 1.15584175e-02 1.75654312e-10 1.75654312e-10 1.87974732e-10
 1.75654312e-10 5.36440494e-02 1.75654312e-10 1.75654312e-10
 1.64409693e-02 2.10717953e-03 1.55418955e-02 1.75654312e-10]
Q Function:  -1102118.5710938273



100%|████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:10<00:00,  1.27s/it]


mstep self.weights:  [1.75654312e-10 2.09168709e-02 1.75654312e-10 1.75654312e-10
 2.66557811e-02 1.75654312e-10 8.50915790e-01 1.75654312e-10
 1.36863685e-02 1.75654312e-10 1.75654312e-10 1.75654312e-10
 1.75654312e-10 5.38464262e-02 1.75654312e-10 1.75654312e-10
 1.62396446e-02 2.27337069e-03 1.54657455e-02 1.75654312e-10]
Q Function:  -1096056.9309551835



100%|████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:10<00:00,  1.27s/it]


mstep self.weights:  [1.75654312e-10 2.01714180e-02 1.75654312e-10 1.75654312e-10
 2.41333389e-02 1.75654312e-10 8.51930866e-01 1.75654312e-10
 1.58319986e-02 1.75654312e-10 1.75654312e-10 1.75654312e-10
 1.75654312e-10 5.42354020e-02 1.75654312e-10 1.75654312e-10
 1.58506694e-02 2.12483368e-03 1.57214714e-02 1.75654312e-10]
Q Function:  -1096201.4360908377



100%|████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:10<00:00,  1.36s/it]


mstep self.weights:  [1.75654312e-10 1.65039817e-02 1.75654312e-10 1.75654312e-10
 2.13705159e-02 1.75654312e-10 8.55715902e-01 1.75654312e-10
 1.63432689e-02 1.75654312e-10 1.75654312e-10 1.75654312e-10
 1.75654312e-10 5.33865400e-02 1.75654312e-10 1.75654312e-10
 1.66996493e-02 2.10150087e-03 1.78786388e-02 1.75654312e-10]
Q Function:  -1099695.9985633735



100%|████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:10<00:00,  1.28s/it]


mstep self.weights:  [1.75654312e-10 2.17762527e-02 1.75654312e-10 1.75654312e-10
 2.19066792e-02 1.75654312e-10 8.50348311e-01 1.75654312e-10
 1.55906579e-02 1.75654312e-10 1.75654312e-10 1.75654312e-10
 1.75654312e-10 5.39058998e-02 1.75654312e-10 1.75654312e-10
 1.61801710e-02 2.10744433e-03 1.81845823e-02 1.75654312e-10]
Q Function:  -1096936.2207330945

No. clus:  8
cl5 with 1999 HVGs


100%|███████████████████████████████████████████████████████████████████████████████████████| 2/2 [05:23<00:00, 161.69s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████| 2/2 [05:38<00:00, 169.50s/it]


mstep self.weights:  [0.84860063 0.15139937]
Q Function:  -5128509.070303669



100%|███████████████████████████████████████████████████████████████████████████████████████| 1/1 [04:40<00:00, 280.60s/it]


mstep self.weights:  [1.00000000e+00 2.02142713e-10]
Q Function:  -5151588.072666363



100%|███████████████████████████████████████████████████████████████████████████████████████| 1/1 [04:44<00:00, 284.09s/it]


mstep self.weights:  [1.00000000e+00 2.02142713e-10]
Q Function:  -5103427.4975165585



100%|███████████████████████████████████████████████████████████████████████████████████████| 1/1 [04:47<00:00, 287.37s/it]


mstep self.weights:  [1.00000000e+00 2.02142713e-10]
Q Function:  -5103427.4975165585



100%|███████████████████████████████████████████████████████████████████████████████████████| 1/1 [04:29<00:00, 269.10s/it]


mstep self.weights:  [1.00000000e+00 2.02142713e-10]
Q Function:  -5103427.4975165585



  0%|                                                                                                | 0/1 [00:00<?, ?it/s]

In [None]:
#Save method, clustering, mat_in (U,S, etc), hyper (K or res), hvgs, data, ARI, NMI
meK_res.to_csv('./fits/meKruns_0209/meK_bench_results.csv',index=None)
meK_res.head()


In [96]:
# dataset_string

['./fits/meKruns_0209/gg_240209_025_allen_b08_K2_1/allen_b08']

In [99]:
# !rm -r ./fits/meKruns_0209

### **Leiden & K-Means**


In [90]:
#Perform standard normalization (read-depth + log1p)
def norm_mat(mat,new_tot=1e4):
    '''
    mat: cell x gene matrix
    '''  
    tots = np.sum(mat,axis=1)
    divids = (new_tot/tots)[:,None]
    mat_norm = mat*divids
    mat_norm = np.log1p(mat_norm)
    
    mat_norm[np.isnan(mat_norm)] = 0
    
    return mat_norm

In [77]:
def get_graph(mat,neigh=30):
    A = kneighbors_graph(mat, neigh, mode='connectivity', include_self=True)
    sources, targets = A.nonzero()
    weights = A[sources, targets]
    if isinstance(weights, np.matrix):
        weights = weights.A1
    g = ig.Graph(directed=False) #True in scanpy, which is odd
    g.add_vertices(A.shape[0])  # this adds adjacency.shape[0] vertices
    g.add_edges(list(zip(sources, targets)))

    g.es['weight'] = weights
    
    return g

In [91]:
# ! rm -r /home/tchari/perturbCME/notebooks/fits/LeidKMeansruns_0209

In [None]:
#Make the U,S, U+S, Concat matrices, res 0.75,1,1.5,2

!mkdir /home/tchari/perturbCME/notebooks/fits/LeidKMeansruns_0209


#Save method, clustering, mat_in (U,S, etc), hyper (K or res), hvgs, data, ARI, AMI
LK_res = pd.DataFrame()
meths = []
result = []
mats = []
hyper = []
hvgs = []
data = []
ari = []
ami = []

for d in d_strings:
    
    #Get tech param by keyword (datas) in loom paths
    ind = [i in d for i in datas]
    
    k = list(samps.Clusters[ind])[0]
    data_name = list(samps.Dataset[ind])[0]
    
    
    ds = lp.connect(d)
    num_genes = len(ds.ra[attribute_names[1]])
    num_cells = len(ds.ca[attribute_names[2]])
    U = ds.layers[attribute_names[0][0]][:,:].T
    S = ds.layers[attribute_names[0][1]][:,:].T
    true_labs = list(ds.ca['subclass_label'])
    ds.close()
    print(data_name+' with '+str(num_genes)+' HVGs')
    
    res = [0.75,1,1.5,2]
    
    #Get U,S,U_p_S,U_c_S
        
    #Norm mats
    U_norm = norm_mat(U)
    S_norm = norm_mat(S)
    U_p_S_norm = norm_mat(U+S)
    U_c_S_norm = norm_mat(np.concatenate([U,S],axis=1))
    
    g_U = get_graph(U_norm)
    g_S = get_graph(S_norm)
    g_U_p_S = get_graph(U_p_S_norm)
    g_U_c_S = get_graph(U_c_S_norm)
    
    # -------- Fit Leiden -------
    for r in res:
        
        partition_type = la.RBConfigurationVertexPartition
        #Make Knn graphs
        for g in [g_U,g_S,g_U_p_S,g_U_c_S]:
            part = la.find_partition(g, partition_type, 
                                        weights=np.array(g.es['weight']).astype(np.float64), 
                                        n_iterations=-1, resolution_parameter=r)

            labels = np.array(part.membership)
            
            # ----- Save Results ---- ARI,AMI etc
            meths += ['Leiden']
            result += [labels]
            hyper += [r]
            hvgs += [num_genes]
            data += [data_name]
            ari += [adjusted_rand_score(true_labs,labels)]
            ami += [adjusted_mutual_info_score(true_labs,labels)]
            
#             print('No. clus Leiden: ',len(np.unique(labels)))
            
        mats += ['U','S','U+S','UcatS']
        
        
        
    # -------- Fit K-Means with k -------
    
    for x in [U_norm,S_norm,U_p_S_norm,U_c_S_norm]:
        kmeans = KMeans(n_clusters=k, random_state=0).fit(x)
        labs = kmeans.labels_
        
        meths += ['K-Means']
        result += [labs]
        hyper += [k]
        hvgs += [num_genes]
        data += [data_name]
        ari += [adjusted_rand_score(true_labs,labs)]
        ami += [adjusted_mutual_info_score(true_labs,labs)]
        
#         print('No. clus K-Means: ',len(np.unique(labs)))
        
    mats += ['U','S','U+S','UcatS']

    
LK_res['Method'] = meths
LK_res['Clustering'] = result
LK_res['Matrices'] = mats
LK_res['Hyperparam'] = hyper
LK_res['HVGs'] = hvgs
LK_res['Data'] = data
LK_res['ARI'] = ari
LK_res['AMI'] = ami

In [93]:
#Save method, clustering, mat_in (U,S, etc), hyper (K or res), hvgs, data, ARI, NMI
LK_res.to_csv('./fits/LeidKMeansruns_0209/LeidKMeans_bench_results.csv',index=None)
LK_res.head()



Unnamed: 0,Method,Clustering,Matrices,Hyperparam,HVGs,Data,ARI,AMI
0,Leiden,"[1, 3, 5, 1, 0, 7, 2, 8, 3, 1, 0, 2, 1, 3, 3, ...",U,0.75,299,allen_b08,0.70171,0.832537
1,Leiden,"[1, 2, 6, 1, 0, 8, 3, 9, 2, 10, 0, 3, 1, 2, 2,...",S,0.75,299,allen_b08,0.675147,0.849593
2,Leiden,"[1, 2, 6, 1, 0, 8, 3, 9, 2, 10, 0, 3, 1, 2, 2,...",U+S,0.75,299,allen_b08,0.662079,0.836243
3,Leiden,"[4, 1, 7, 4, 0, 9, 3, 10, 1, 2, 0, 3, 2, 1, 1,...",UcatS,0.75,299,allen_b08,0.570947,0.802221
4,Leiden,"[4, 2, 5, 4, 0, 8, 1, 9, 2, 3, 0, 1, 3, 2, 2, ...",U,1.0,299,allen_b08,0.617054,0.79936
