In [1]:
# run my code
%run megatron_ak.ipynb

### Read in Data

In [2]:
datadir = '../../preprocess/kleinlab/output_subset/'
cellid_to_idx, cloneid_to_idx, clonegrouping_spmtx, coords_mtx, time_vec = readin(datadir)

Number of cells:  3221
Number of clones:  365
Number of dimensions:  2
Time Steps:  [2. 4. 6.]


## Trials

In [3]:
from scipy.cluster.hierarchy import dendrogram, linkage, fcluster
from sklearn.metrics.cluster import adjusted_rand_score
df_truth = pd.read_csv(os.path.join(datadir,'./clones_truth.tsv.gz'),sep='\t',index_col=0)
#linkage_options = ['complete', 'ward', 'average', 'weighted', 'centroid', 'median']
linkage_options = ['ward']
cluster_options = ['maxclust']

### Wasserstein Trials

In [7]:
dist_options = ['wasserstein', 'energy']

wass_score_dict = {}
for do in dist_options:
    condensed_dist_mat = getdistance_wasserstein(clonegrouping_spmtx, coords_mtx, time_vec, choice=do)
    for lo in linkage_options:
        Z = linkage(condensed_dist_mat, lo)
        for co in cluster_options:
            clone_clusters = fcluster(Z, 5, criterion=co)
            score = adjusted_rand_score(df_truth['celltype'].tolist(), clone_clusters.tolist())
            wass_score_dict[(do, lo, co)]=score

Out of 66430 clonal distances, 0 are noninformative
Out of 66430 clonal distances, 0 are noninformative


In [10]:
listofTuples = sorted(wass_score_dict.items() , reverse=True, key=lambda x: x[1])
for elem in listofTuples :
    print(elem[0] , "\t\t" , elem[1] )

('energy', 'ward', 'maxclust') 		 0.7899530119959177
('wasserstein', 'ward', 'maxclust') 		 0.7885719124781362
('wasserstein', 'median', 'maxclust') 		 0.7616486910947495
('energy', 'average', 'maxclust') 		 0.7502881477816572
('energy', 'weighted', 'maxclust') 		 0.7412519902286092
('wasserstein', 'centroid', 'maxclust') 		 0.7294044632136572
('energy', 'complete', 'maxclust') 		 0.7289260391327324
('wasserstein', 'average', 'maxclust') 		 0.7105797278643441
('wasserstein', 'complete', 'maxclust') 		 0.6990144268531757
('wasserstein', 'weighted', 'maxclust') 		 0.5355769532850246
('energy', 'median', 'maxclust') 		 0.5007467235162486
('energy', 'centroid', 'maxclust') 		 0.47804527941231256
('energy', 'single', 'maxclust') 		 0.18600624102198973
('wasserstein', 'single', 'maxclust') 		 -0.0034818974557002946


### MNN Trials - Radius Graph

In [6]:
# run my code
%run megatron_ak.ipynb
r_options = [x for x in range(100, 570, 30)]
mode_options = ["distance", "connectivity"]
slope_options = [x for x in range(1, 3)]
xshift_options = [x for x in range(1, 10)]

mnn_r_score_dict = {}
for ro in r_options:
    for mo in mode_options:
        #for so in slope_options:
            #for xo in xshift_options:
        condensed_dist_mat = getdistance_mnn(
        clonegrouping_spmtx, coords_mtx, time_vec, 
        dist="radius", radius=ro, mode=mo, 
        slope = 1, xshift = 1)
        for lo in linkage_options:
            Z = linkage(condensed_dist_mat, lo)
            for co in cluster_options:
                clone_clusters = fcluster(Z, 5, criterion=co)
                score = adjusted_rand_score(df_truth['celltype'].tolist(), clone_clusters.tolist())
                mnn_r_score_dict[(ro, mo, lo, co)]=score
                print((ro, mo, lo, co), "\t", score)

(100, 'distance', 'ward', 'maxclust') 	 0.01812479115635514
(100, 'connectivity', 'ward', 'maxclust') 	 0.0085998029590482
(130, 'distance', 'ward', 'maxclust') 	 0.039539259332314294
(130, 'connectivity', 'ward', 'maxclust') 	 0.0013159239912256615


KeyboardInterrupt: 

In [None]:
# run my code
%run megatron_ak.ipynb
r_options = [x for x in range(100, 570, 30)]
mode_options = ["distance", "connectivity"]
slope_options = [x for x in range(1, 3)]
xshift_options = [x for x in range(1, 10)]

mnn_r_score_dict = {}
for ro in r_options:
    for mo in mode_options:
        #for so in slope_options:
            #for xo in xshift_options:
        condensed_dist_mat = getdistance_mnn(
        clonegrouping_spmtx, coords_mtx, time_vec, 
        dist="radius", radius=ro, mode=mo, 
        slope = 1, xshift = 1)
        for lo in linkage_options:
            Z = linkage(condensed_dist_mat, lo)
            for co in cluster_options:
                clone_clusters = fcluster(Z, 5, criterion=co)
                score = adjusted_rand_score(df_truth['celltype'].tolist(), clone_clusters.tolist())
                mnn_r_score_dict[(ro, mo, lo, co)]=score
                print((ro, mo, lo, co), "\t", score)

In [None]:
listofTuples = sorted(mnn_r_score_dict.items() , reverse=True, key=lambda x: x[1])
for elem in listofTuples :
    print(elem[0] , "\t\t" , elem[1] )

### MNN Trials - KNN Graph

In [4]:
k_options = [x for x in range(350, 500, 50)]
#mode_options = ["distance", "connectivity"]
mode_options = ["connectivity"]
slope_options = [x for x in range(1, 3)]
xshift_options = [x for x in range(1, 10)]

mnn_k_score_dict = {}
for ko in k_options:
    for mo in mode_options:
        #for so in slope_options:
            #for xo in xshift_options:
        condensed_dist_mat = getdistance_mnn(
            clonegrouping_spmtx, coords_mtx, time_vec, 
            dist="kneighbors", neighbors=ko, mode=mo, 
            slope = 1, xshift = 1)
        for lo in linkage_options:
            Z = linkage(condensed_dist_mat, lo)
            for co in cluster_options:
                clone_clusters = fcluster(Z, 5, criterion=co)
                score = adjusted_rand_score(df_truth['celltype'].tolist(), clone_clusters.tolist())
                mnn_k_score_dict[(ko, mo, lo, co)]=score
                print((ko,mo,lo,co),"\t",score)

(350, 'connectivity', 'ward', 'maxclust') 	 0.39069940300884487
(400, 'connectivity', 'ward', 'maxclust') 	 0.41058581088603674
(450, 'connectivity', 'ward', 'maxclust') 	 0.11549967340088849


In [8]:
k_options = [x for x in range(350, 500, 50)]
#mode_options = ["distance", "connectivity"]
mode_options = ["connectivity"]
slope_options = [x for x in range(1, 3)]
xshift_options = [x for x in range(1, 10)]

mnn_k_score_dict = {}
for ko in k_options:
    for mo in mode_options:
        #for so in slope_options:
            #for xo in xshift_options:
        condensed_dist_mat = getdistance_mnn(
            clonegrouping_spmtx, coords_mtx, time_vec, 
            dist="kneighbors", neighbors=ko, mode=mo, 
            slope = 1, xshift = 1)
        for lo in linkage_options:
            Z = linkage(condensed_dist_mat, lo)
            for co in cluster_options:
                clone_clusters = fcluster(Z, 5, criterion=co)
                score = adjusted_rand_score(df_truth['celltype'].tolist(), clone_clusters.tolist())
                mnn_k_score_dict[(ko, mo, lo, co)]=score
                print((ko,mo,lo,co),"\t",score)

(350, 'connectivity', 'ward', 'maxclust') 	 0.39069940300884487


KeyboardInterrupt: 

In [5]:
listofTuples = sorted(mnn_k_score_dict.items() , reverse=True, key=lambda x: x[1])
for elem in listofTuples :
    print(elem[0] , "\t\t" , elem[1] )

(400, 'connectivity', 'ward', 'maxclust') 		 0.41058581088603674
(350, 'connectivity', 'ward', 'maxclust') 		 0.39069940300884487
(450, 'connectivity', 'ward', 'maxclust') 		 0.11549967340088849


### MNN Troubleshooting

In [None]:
%run megatron_ak.ipynb
r_options = [x/10 for x in range(10, 100, 30)]
mode_options = ["distance", "connectivity"]
slope_options = [x for x in range(1, 3)]
xshift_options = [x for x in range(1, 3)]

for ro in r_options:
    for mo in mode_options:
        for so in slope_options:
            for xo in xshift_options:
                dm, fracs, trans, numers, denoms = getdistance_mnn_debug(
                    clonegrouping_spmtx, coords_mtx, time_vec, dist="radius", 
                    radius=ro, mode=mo, slope=so, xshift=xo)
                Z = linkage(dm, 'ward')
                print((ro,mo,so,xo))
                plt.hist(numers)
                plt.title('Numerators')
                plt.show()
                plt.hist(denoms)
                plt.title('Denominators')
                plt.show()
                plt.hist(fracs)
                plt.title('Fraction - 1')
                plt.show()
                plt.hist(trans)
                plt.title('Transformed with sigmoid')
                plt.show()

In [9]:
%run megatron_ak.ipynb
%reload_ext line_profiler
%lprun -f getdistance_mnn getdistance_mnn(clonegrouping_spmtx, coords_mtx, time_vec, dist="radius", radius=100, mode="connectivity", slope=4.6, xshift=1.1)