In [1]:
import sys
sys.path.append('/home/miaochangjiu/miniconda3/envs/snakemake/lib/python3.12/site-packages/')
import pickle, os, gzip, json, sys, itertools
from pathlib import Path
from importlib import reload
from dataclasses import dataclass, field
import collections
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import pandas as pd
import pysam
import scipy as sp
import seaborn
import sharedmem

plt.rcParams["figure.facecolor"] = "white"
plt.rcParams["figure.dpi"] = 300


sys.path.append("scripts")
sys.path.append("../../scripts")

In [2]:
from data_io import is_fwd_id, get_fwd_id, get_sibling_id
from dim_reduction import SpectralEmbedding, scBiMapEmbedding
from nearest_neighbors import (
    ExactNearestNeighbors,
    NNDescent,
    WeightedLowHash,
    PAFNearestNeighbors,
    LowHash,
    HNSW,
    ProductQuantization,
    _NearestNeighbors,
    IVFProductQuantization,
)
from graph import OverlapGraph, GenomicInterval, get_overlap_statistics, remove_false_edges
from truth import get_overlaps
from evaluate import NearestNeighborsConfig, mp_compute_nearest_neighbors
from plots import plot_read_graph, mp_plot_read_graphs, get_graphviz_layout, get_umap_layout

/home/miaochangjiu/kNN-overlap-finder/scripts/../lib


In [6]:
find_neighbor_method = []
methods = ['Exact_Euclidean','Exact_Cosine',
          'NNDescent_Euclidean','PQ_Euclidean','PQ_Cosine','HNSW_Euclidean','HNSW_Cosine',
          'LowHash_Jaccard','WeightedLowHash_Jaccard','MinHash_Jaccard','WeightedMinHash_Jaccard']
dim_redu_method = ['Spectural_100d','Spectural_500d','scBiMap_100d','scBiMap_500d','None']
pre_process = ['TF','IDF','TF-IDF','None']
for a in methods:
    for b in dim_redu_method:
        for c in pre_process:
            simple_name = '_'.join([a,b,c])
            find_neighbor_method.append(simple_name)

simple_to_complete = {'Exact':'ExactNearestNeighbors',
      'PQ':'ProductQuantization',
      'scBiMap':'scBiMapEmbedding',
      'Spectural':'SpectralEmbedding',      
      '100d':'100',
      '500d':'500'}

simple_names = []
configs_str = []
for method_index in range(len(find_neighbor_method)):
    simple_name = find_neighbor_method[method_index]
    simple_name_list = simple_name.split('_')
    for i in range(len(simple_name_list)):
        if simple_name_list[i] in simple_to_complete:
            simple_name_list[i] = simple_to_complete[simple_name_list[i]]
    complete_name = simple_name_list
    des = ','.join(complete_name)
    if 'LowHash' in complete_name[0] and complete_name[2] == 'None':
        simple_names.append(simple_name)
        config = f'''NearestNeighborsConfig(
        nearest_neighbors_method={complete_name[0]},
        description='{des}',
        tfidf='{complete_name[3]}',
        nearest_neighbors_kw=dict(
        lowhash_fraction=0.01,
        max_bucket_size=max_bucket_size,
        repeats=100,
        seed=458,
        ))'''
        configs_str.append(config)
        
    elif 'MinHash' in complete_name[0] and complete_name[2] == 'None':
        if 'Weight' in complete_name[0]:
            simple_names.append(simple_name)
            config = f'''NearestNeighborsConfig(
            nearest_neighbors_method=WeightedLowHash,
            description='{des}',
            tfidf='{complete_name[3]}',
            nearest_neighbors_kw=dict(
            lowhash_count=20,
            max_bucket_size=max_bucket_size,
            repeats=100,
            seed=458,
            ))'''
            configs_str.append(config)
        else:
            simple_names.append(simple_name)
            config = f'''NearestNeighborsConfig(
            nearest_neighbors_method=LowHash,
            description='{des}',
            tfidf='{complete_name[3]}',
            nearest_neighbors_kw=dict(
            lowhash_count=20,
            max_bucket_size=max_bucket_size,
            repeats=100,
            seed=458,
            ))'''
            configs_str.append(config)
    elif complete_name[0] in ['HNSW','ProductQuantization','NNdescent','ExactNearestNeighbors']:
        if complete_name[2] == 'None':
            config = f'''NearestNeighborsConfig(
            nearest_neighbors_method={complete_name[0]},
            description='{des}',
            tfidf='{complete_name[3]}',
            nearest_neighbors_kw=dict(metric='{complete_name[1].lower()}')) '''
            simple_names.append(simple_name)
            configs_str.append(config)
        else:
            config = f'''NearestNeighborsConfig(
            nearest_neighbors_method={complete_name[0]},
            description='{des}',
            tfidf='{complete_name[4]}',
            dimension_reduction_method={complete_name[2]},
            dimension_reduction_kw=dict(n_dimensions={complete_name[3]}),
            nearest_neighbors_kw=dict(metric='{complete_name[1].lower()}')) '''
            simple_names.append(simple_name)
            configs_str.append(config)

In [46]:
for i in range(len(simple_names)):
    add_to_dict = 'config_dict["'+simple_names[i]+'"]='+configs_str[i]
    print(add_to_dict)

config_dict["Exact_Euclidean_Spectural_100d_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Euclidean,SpectralEmbedding,100,TF',
            tfidf='TF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["Exact_Euclidean_Spectural_100d_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Euclidean,SpectralEmbedding,100,IDF',
            tfidf='IDF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["Exact_Euclidean_Spectural_100d_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description=

In [3]:
MAX_SAMPLE_SIZE = int(1e9)
COVERAGE_DEPTH = 20
max_bucket_size = COVERAGE_DEPTH * 1.5
config_dict = {}
kw = {'data':1}
config_dict["Exact_Euclidean_Spectural_100d_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Euclidean,SpectralEmbedding,100,TF',
            tfidf='TF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["Exact_Euclidean_Spectural_100d_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Euclidean,SpectralEmbedding,100,IDF',
            tfidf='IDF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["Exact_Euclidean_Spectural_100d_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Euclidean,SpectralEmbedding,100,TF-IDF',
            tfidf='TF-IDF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["Exact_Euclidean_Spectural_100d_None"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Euclidean,SpectralEmbedding,100,None',
            tfidf='None',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["Exact_Euclidean_Spectural_500d_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Euclidean,SpectralEmbedding,500,TF',
            tfidf='TF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["Exact_Euclidean_Spectural_500d_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Euclidean,SpectralEmbedding,500,IDF',
            tfidf='IDF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["Exact_Euclidean_Spectural_500d_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Euclidean,SpectralEmbedding,500,TF-IDF',
            tfidf='TF-IDF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["Exact_Euclidean_Spectural_500d_None"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Euclidean,SpectralEmbedding,500,None',
            tfidf='None',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["Exact_Euclidean_scBiMap_100d_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Euclidean,scBiMapEmbedding,100,TF',
            tfidf='TF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["Exact_Euclidean_scBiMap_100d_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Euclidean,scBiMapEmbedding,100,IDF',
            tfidf='IDF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["Exact_Euclidean_scBiMap_100d_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Euclidean,scBiMapEmbedding,100,TF-IDF',
            tfidf='TF-IDF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["Exact_Euclidean_scBiMap_100d_None"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Euclidean,scBiMapEmbedding,100,None',
            tfidf='None',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["Exact_Euclidean_scBiMap_500d_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Euclidean,scBiMapEmbedding,500,TF',
            tfidf='TF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["Exact_Euclidean_scBiMap_500d_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Euclidean,scBiMapEmbedding,500,IDF',
            tfidf='IDF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["Exact_Euclidean_scBiMap_500d_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Euclidean,scBiMapEmbedding,500,TF-IDF',
            tfidf='TF-IDF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["Exact_Euclidean_scBiMap_500d_None"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Euclidean,scBiMapEmbedding,500,None',
            tfidf='None',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["Exact_Euclidean_None_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Euclidean,None,TF',
            tfidf='TF',
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["Exact_Euclidean_None_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Euclidean,None,IDF',
            tfidf='IDF',
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["Exact_Euclidean_None_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Euclidean,None,TF-IDF',
            tfidf='TF-IDF',
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["Exact_Euclidean_None_None"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Euclidean,None,None',
            tfidf='None',
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["Exact_Cosine_Spectural_100d_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Cosine,SpectralEmbedding,100,TF',
            tfidf='TF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["Exact_Cosine_Spectural_100d_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Cosine,SpectralEmbedding,100,IDF',
            tfidf='IDF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["Exact_Cosine_Spectural_100d_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Cosine,SpectralEmbedding,100,TF-IDF',
            tfidf='TF-IDF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["Exact_Cosine_Spectural_100d_None"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Cosine,SpectralEmbedding,100,None',
            tfidf='None',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["Exact_Cosine_Spectural_500d_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Cosine,SpectralEmbedding,500,TF',
            tfidf='TF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["Exact_Cosine_Spectural_500d_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Cosine,SpectralEmbedding,500,IDF',
            tfidf='IDF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["Exact_Cosine_Spectural_500d_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Cosine,SpectralEmbedding,500,TF-IDF',
            tfidf='TF-IDF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["Exact_Cosine_Spectural_500d_None"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Cosine,SpectralEmbedding,500,None',
            tfidf='None',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["Exact_Cosine_scBiMap_100d_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Cosine,scBiMapEmbedding,100,TF',
            tfidf='TF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["Exact_Cosine_scBiMap_100d_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Cosine,scBiMapEmbedding,100,IDF',
            tfidf='IDF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["Exact_Cosine_scBiMap_100d_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Cosine,scBiMapEmbedding,100,TF-IDF',
            tfidf='TF-IDF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["Exact_Cosine_scBiMap_100d_None"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Cosine,scBiMapEmbedding,100,None',
            tfidf='None',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["Exact_Cosine_scBiMap_500d_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Cosine,scBiMapEmbedding,500,TF',
            tfidf='TF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["Exact_Cosine_scBiMap_500d_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Cosine,scBiMapEmbedding,500,IDF',
            tfidf='IDF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["Exact_Cosine_scBiMap_500d_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Cosine,scBiMapEmbedding,500,TF-IDF',
            tfidf='TF-IDF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["Exact_Cosine_scBiMap_500d_None"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Cosine,scBiMapEmbedding,500,None',
            tfidf='None',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["Exact_Cosine_None_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Cosine,None,TF',
            tfidf='TF',
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["Exact_Cosine_None_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Cosine,None,IDF',
            tfidf='IDF',
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["Exact_Cosine_None_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Cosine,None,TF-IDF',
            tfidf='TF-IDF',
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["Exact_Cosine_None_None"]=NearestNeighborsConfig(
            nearest_neighbors_method=ExactNearestNeighbors,
            description='ExactNearestNeighbors,Cosine,None,None',
            tfidf='None',
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["PQ_Euclidean_Spectural_100d_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Euclidean,SpectralEmbedding,100,TF',
            tfidf='TF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["PQ_Euclidean_Spectural_100d_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Euclidean,SpectralEmbedding,100,IDF',
            tfidf='IDF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["PQ_Euclidean_Spectural_100d_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Euclidean,SpectralEmbedding,100,TF-IDF',
            tfidf='TF-IDF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["PQ_Euclidean_Spectural_100d_None"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Euclidean,SpectralEmbedding,100,None',
            tfidf='None',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["PQ_Euclidean_Spectural_500d_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Euclidean,SpectralEmbedding,500,TF',
            tfidf='TF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["PQ_Euclidean_Spectural_500d_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Euclidean,SpectralEmbedding,500,IDF',
            tfidf='IDF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["PQ_Euclidean_Spectural_500d_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Euclidean,SpectralEmbedding,500,TF-IDF',
            tfidf='TF-IDF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["PQ_Euclidean_Spectural_500d_None"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Euclidean,SpectralEmbedding,500,None',
            tfidf='None',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["PQ_Euclidean_scBiMap_100d_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Euclidean,scBiMapEmbedding,100,TF',
            tfidf='TF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["PQ_Euclidean_scBiMap_100d_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Euclidean,scBiMapEmbedding,100,IDF',
            tfidf='IDF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["PQ_Euclidean_scBiMap_100d_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Euclidean,scBiMapEmbedding,100,TF-IDF',
            tfidf='TF-IDF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["PQ_Euclidean_scBiMap_100d_None"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Euclidean,scBiMapEmbedding,100,None',
            tfidf='None',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["PQ_Euclidean_scBiMap_500d_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Euclidean,scBiMapEmbedding,500,TF',
            tfidf='TF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["PQ_Euclidean_scBiMap_500d_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Euclidean,scBiMapEmbedding,500,IDF',
            tfidf='IDF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["PQ_Euclidean_scBiMap_500d_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Euclidean,scBiMapEmbedding,500,TF-IDF',
            tfidf='TF-IDF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["PQ_Euclidean_scBiMap_500d_None"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Euclidean,scBiMapEmbedding,500,None',
            tfidf='None',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["PQ_Euclidean_None_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Euclidean,None,TF',
            tfidf='TF',
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["PQ_Euclidean_None_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Euclidean,None,IDF',
            tfidf='IDF',
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["PQ_Euclidean_None_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Euclidean,None,TF-IDF',
            tfidf='TF-IDF',
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["PQ_Euclidean_None_None"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Euclidean,None,None',
            tfidf='None',
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["PQ_Cosine_Spectural_100d_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Cosine,SpectralEmbedding,100,TF',
            tfidf='TF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["PQ_Cosine_Spectural_100d_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Cosine,SpectralEmbedding,100,IDF',
            tfidf='IDF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["PQ_Cosine_Spectural_100d_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Cosine,SpectralEmbedding,100,TF-IDF',
            tfidf='TF-IDF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["PQ_Cosine_Spectural_100d_None"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Cosine,SpectralEmbedding,100,None',
            tfidf='None',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["PQ_Cosine_Spectural_500d_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Cosine,SpectralEmbedding,500,TF',
            tfidf='TF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["PQ_Cosine_Spectural_500d_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Cosine,SpectralEmbedding,500,IDF',
            tfidf='IDF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["PQ_Cosine_Spectural_500d_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Cosine,SpectralEmbedding,500,TF-IDF',
            tfidf='TF-IDF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["PQ_Cosine_Spectural_500d_None"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Cosine,SpectralEmbedding,500,None',
            tfidf='None',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["PQ_Cosine_scBiMap_100d_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Cosine,scBiMapEmbedding,100,TF',
            tfidf='TF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["PQ_Cosine_scBiMap_100d_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Cosine,scBiMapEmbedding,100,IDF',
            tfidf='IDF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["PQ_Cosine_scBiMap_100d_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Cosine,scBiMapEmbedding,100,TF-IDF',
            tfidf='TF-IDF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["PQ_Cosine_scBiMap_100d_None"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Cosine,scBiMapEmbedding,100,None',
            tfidf='None',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["PQ_Cosine_scBiMap_500d_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Cosine,scBiMapEmbedding,500,TF',
            tfidf='TF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["PQ_Cosine_scBiMap_500d_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Cosine,scBiMapEmbedding,500,IDF',
            tfidf='IDF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["PQ_Cosine_scBiMap_500d_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Cosine,scBiMapEmbedding,500,TF-IDF',
            tfidf='TF-IDF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["PQ_Cosine_scBiMap_500d_None"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Cosine,scBiMapEmbedding,500,None',
            tfidf='None',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["PQ_Cosine_None_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Cosine,None,TF',
            tfidf='TF',
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["PQ_Cosine_None_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Cosine,None,IDF',
            tfidf='IDF',
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["PQ_Cosine_None_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Cosine,None,TF-IDF',
            tfidf='TF-IDF',
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["PQ_Cosine_None_None"]=NearestNeighborsConfig(
            nearest_neighbors_method=ProductQuantization,
            description='ProductQuantization,Cosine,None,None',
            tfidf='None',
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["HNSW_Euclidean_Spectural_100d_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Euclidean,SpectralEmbedding,100,TF',
            tfidf='TF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["HNSW_Euclidean_Spectural_100d_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Euclidean,SpectralEmbedding,100,IDF',
            tfidf='IDF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["HNSW_Euclidean_Spectural_100d_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Euclidean,SpectralEmbedding,100,TF-IDF',
            tfidf='TF-IDF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["HNSW_Euclidean_Spectural_100d_None"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Euclidean,SpectralEmbedding,100,None',
            tfidf='None',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["HNSW_Euclidean_Spectural_500d_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Euclidean,SpectralEmbedding,500,TF',
            tfidf='TF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["HNSW_Euclidean_Spectural_500d_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Euclidean,SpectralEmbedding,500,IDF',
            tfidf='IDF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["HNSW_Euclidean_Spectural_500d_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Euclidean,SpectralEmbedding,500,TF-IDF',
            tfidf='TF-IDF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["HNSW_Euclidean_Spectural_500d_None"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Euclidean,SpectralEmbedding,500,None',
            tfidf='None',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["HNSW_Euclidean_scBiMap_100d_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Euclidean,scBiMapEmbedding,100,TF',
            tfidf='TF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["HNSW_Euclidean_scBiMap_100d_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Euclidean,scBiMapEmbedding,100,IDF',
            tfidf='IDF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["HNSW_Euclidean_scBiMap_100d_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Euclidean,scBiMapEmbedding,100,TF-IDF',
            tfidf='TF-IDF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["HNSW_Euclidean_scBiMap_100d_None"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Euclidean,scBiMapEmbedding,100,None',
            tfidf='None',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["HNSW_Euclidean_scBiMap_500d_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Euclidean,scBiMapEmbedding,500,TF',
            tfidf='TF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["HNSW_Euclidean_scBiMap_500d_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Euclidean,scBiMapEmbedding,500,IDF',
            tfidf='IDF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["HNSW_Euclidean_scBiMap_500d_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Euclidean,scBiMapEmbedding,500,TF-IDF',
            tfidf='TF-IDF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["HNSW_Euclidean_scBiMap_500d_None"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Euclidean,scBiMapEmbedding,500,None',
            tfidf='None',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["HNSW_Euclidean_None_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Euclidean,None,TF',
            tfidf='TF',
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["HNSW_Euclidean_None_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Euclidean,None,IDF',
            tfidf='IDF',
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["HNSW_Euclidean_None_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Euclidean,None,TF-IDF',
            tfidf='TF-IDF',
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["HNSW_Euclidean_None_None"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Euclidean,None,None',
            tfidf='None',
            nearest_neighbors_kw=dict(metric='euclidean')) 
config_dict["HNSW_Cosine_Spectural_100d_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Cosine,SpectralEmbedding,100,TF',
            tfidf='TF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["HNSW_Cosine_Spectural_100d_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Cosine,SpectralEmbedding,100,IDF',
            tfidf='IDF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["HNSW_Cosine_Spectural_100d_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Cosine,SpectralEmbedding,100,TF-IDF',
            tfidf='TF-IDF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["HNSW_Cosine_Spectural_100d_None"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Cosine,SpectralEmbedding,100,None',
            tfidf='None',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["HNSW_Cosine_Spectural_500d_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Cosine,SpectralEmbedding,500,TF',
            tfidf='TF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["HNSW_Cosine_Spectural_500d_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Cosine,SpectralEmbedding,500,IDF',
            tfidf='IDF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["HNSW_Cosine_Spectural_500d_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Cosine,SpectralEmbedding,500,TF-IDF',
            tfidf='TF-IDF',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["HNSW_Cosine_Spectural_500d_None"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Cosine,SpectralEmbedding,500,None',
            tfidf='None',
            dimension_reduction_method=SpectralEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["HNSW_Cosine_scBiMap_100d_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Cosine,scBiMapEmbedding,100,TF',
            tfidf='TF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["HNSW_Cosine_scBiMap_100d_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Cosine,scBiMapEmbedding,100,IDF',
            tfidf='IDF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["HNSW_Cosine_scBiMap_100d_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Cosine,scBiMapEmbedding,100,TF-IDF',
            tfidf='TF-IDF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["HNSW_Cosine_scBiMap_100d_None"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Cosine,scBiMapEmbedding,100,None',
            tfidf='None',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=100),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["HNSW_Cosine_scBiMap_500d_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Cosine,scBiMapEmbedding,500,TF',
            tfidf='TF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["HNSW_Cosine_scBiMap_500d_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Cosine,scBiMapEmbedding,500,IDF',
            tfidf='IDF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["HNSW_Cosine_scBiMap_500d_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Cosine,scBiMapEmbedding,500,TF-IDF',
            tfidf='TF-IDF',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["HNSW_Cosine_scBiMap_500d_None"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Cosine,scBiMapEmbedding,500,None',
            tfidf='None',
            dimension_reduction_method=scBiMapEmbedding,
            dimension_reduction_kw=dict(n_dimensions=500),
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["HNSW_Cosine_None_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Cosine,None,TF',
            tfidf='TF',
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["HNSW_Cosine_None_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Cosine,None,IDF',
            tfidf='IDF',
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["HNSW_Cosine_None_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Cosine,None,TF-IDF',
            tfidf='TF-IDF',
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["HNSW_Cosine_None_None"]=NearestNeighborsConfig(
            nearest_neighbors_method=HNSW,
            description='HNSW,Cosine,None,None',
            tfidf='None',
            nearest_neighbors_kw=dict(metric='cosine')) 
config_dict["LowHash_Jaccard_None_TF"]=NearestNeighborsConfig(
        nearest_neighbors_method=LowHash,
        description='LowHash,Jaccard,None,TF',
        tfidf='TF',
        nearest_neighbors_kw=dict(
        lowhash_fraction=0.01,
        max_bucket_size=max_bucket_size,
        repeats=100,
        seed=458,
        ))
config_dict["LowHash_Jaccard_None_IDF"]=NearestNeighborsConfig(
        nearest_neighbors_method=LowHash,
        description='LowHash,Jaccard,None,IDF',
        tfidf='IDF',
        nearest_neighbors_kw=dict(
        lowhash_fraction=0.01,
        max_bucket_size=max_bucket_size,
        repeats=100,
        seed=458,
        ))
config_dict["LowHash_Jaccard_None_TF-IDF"]=NearestNeighborsConfig(
        nearest_neighbors_method=LowHash,
        description='LowHash,Jaccard,None,TF-IDF',
        tfidf='TF-IDF',
        nearest_neighbors_kw=dict(
        lowhash_fraction=0.01,
        max_bucket_size=max_bucket_size,
        repeats=100,
        seed=458,
        ))
config_dict["LowHash_Jaccard_None_None"]=NearestNeighborsConfig(
        nearest_neighbors_method=LowHash,
        description='LowHash,Jaccard,None,None',
        tfidf='None',
        nearest_neighbors_kw=dict(
        lowhash_fraction=0.01,
        max_bucket_size=max_bucket_size,
        repeats=100,
        seed=458,
        ))
config_dict["WeightedLowHash_Jaccard_None_TF"]=NearestNeighborsConfig(
        nearest_neighbors_method=WeightedLowHash,
        description='WeightedLowHash,Jaccard,None,TF',
        tfidf='TF',
        nearest_neighbors_kw=dict(
        lowhash_fraction=0.01,
        max_bucket_size=max_bucket_size,
        repeats=100,
        seed=458,
        ))
config_dict["WeightedLowHash_Jaccard_None_IDF"]=NearestNeighborsConfig(
        nearest_neighbors_method=WeightedLowHash,
        description='WeightedLowHash,Jaccard,None,IDF',
        tfidf='IDF',
        nearest_neighbors_kw=dict(
        lowhash_fraction=0.01,
        max_bucket_size=max_bucket_size,
        repeats=100,
        seed=458,
        ))
config_dict["WeightedLowHash_Jaccard_None_TF-IDF"]=NearestNeighborsConfig(
        nearest_neighbors_method=WeightedLowHash,
        description='WeightedLowHash,Jaccard,None,TF-IDF',
        tfidf='TF-IDF',
        nearest_neighbors_kw=dict(
        lowhash_fraction=0.01,
        max_bucket_size=max_bucket_size,
        repeats=100,
        seed=458,
        ))
config_dict["WeightedLowHash_Jaccard_None_None"]=NearestNeighborsConfig(
        nearest_neighbors_method=WeightedLowHash,
        description='WeightedLowHash,Jaccard,None,None',
        tfidf='None',
        nearest_neighbors_kw=dict(
        lowhash_fraction=0.01,
        max_bucket_size=max_bucket_size,
        repeats=100,
        seed=458,
        ))
config_dict["MinHash_Jaccard_None_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=LowHash,
            description='MinHash,Jaccard,None,TF',
            tfidf='TF',
            nearest_neighbors_kw=dict(
            lowhash_count=20,
            max_bucket_size=max_bucket_size,
            repeats=100,
            seed=458,
            ))
config_dict["MinHash_Jaccard_None_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=LowHash,
            description='MinHash,Jaccard,None,IDF',
            tfidf='IDF',
            nearest_neighbors_kw=dict(
            lowhash_count=20,
            max_bucket_size=max_bucket_size,
            repeats=100,
            seed=458,
            ))
config_dict["MinHash_Jaccard_None_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=LowHash,
            description='MinHash,Jaccard,None,TF-IDF',
            tfidf='TF-IDF',
            nearest_neighbors_kw=dict(
            lowhash_count=20,
            max_bucket_size=max_bucket_size,
            repeats=100,
            seed=458,
            ))
config_dict["MinHash_Jaccard_None_None"]=NearestNeighborsConfig(
            nearest_neighbors_method=LowHash,
            description='MinHash,Jaccard,None,None',
            tfidf='None',
            nearest_neighbors_kw=dict(
            lowhash_count=20,
            max_bucket_size=max_bucket_size,
            repeats=100,
            seed=458,
            ))
config_dict["WeightedMinHash_Jaccard_None_TF"]=NearestNeighborsConfig(
            nearest_neighbors_method=WeightedLowHash,
            description='WeightedMinHash,Jaccard,None,TF',
            tfidf='TF',
            nearest_neighbors_kw=dict(
            lowhash_count=20,
            max_bucket_size=max_bucket_size,
            repeats=100,
            seed=458,
            ))
config_dict["WeightedMinHash_Jaccard_None_IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=WeightedLowHash,
            description='WeightedMinHash,Jaccard,None,IDF',
            tfidf='IDF',
            nearest_neighbors_kw=dict(
            lowhash_count=20,
            max_bucket_size=max_bucket_size,
            repeats=100,
            seed=458,
            ))
config_dict["WeightedMinHash_Jaccard_None_TF-IDF"]=NearestNeighborsConfig(
            nearest_neighbors_method=WeightedLowHash,
            description='WeightedMinHash,Jaccard,None,TF-IDF',
            tfidf='TF-IDF',
            nearest_neighbors_kw=dict(
            lowhash_count=20,
            max_bucket_size=max_bucket_size,
            repeats=100,
            seed=458,
            ))
config_dict["WeightedMinHash_Jaccard_None_None"]=NearestNeighborsConfig(
            nearest_neighbors_method=WeightedLowHash,
            description='WeightedMinHash,Jaccard,None,None',
            tfidf='None',
            nearest_neighbors_kw=dict(
            lowhash_count=20,
            max_bucket_size=max_bucket_size,
            repeats=100,
            seed=458,
            ))

In [4]:
import pickle
with open('config_dict.pkl', 'wb') as file:  
    pickle.dump(config_dict, file)  

In [42]:
mytest1 = []
for method in simple_names:
    if '100' not in method and 'scBiMap' in method:
        if 'HNSW' in method:
            mytest1.append(method)
        elif 'PQ' in method:
            mytest1.append(method)
        elif 'Exact' in method:
            mytest1.append(method)
print(mytest1)
len(mytest1)

['Exact_Euclidean_scBiMap_500d_TF', 'Exact_Euclidean_scBiMap_500d_IDF', 'Exact_Euclidean_scBiMap_500d_TF-IDF', 'Exact_Euclidean_scBiMap_500d_None', 'Exact_Cosine_scBiMap_500d_TF', 'Exact_Cosine_scBiMap_500d_IDF', 'Exact_Cosine_scBiMap_500d_TF-IDF', 'Exact_Cosine_scBiMap_500d_None', 'PQ_Euclidean_scBiMap_500d_TF', 'PQ_Euclidean_scBiMap_500d_IDF', 'PQ_Euclidean_scBiMap_500d_TF-IDF', 'PQ_Euclidean_scBiMap_500d_None', 'PQ_Cosine_scBiMap_500d_TF', 'PQ_Cosine_scBiMap_500d_IDF', 'PQ_Cosine_scBiMap_500d_TF-IDF', 'PQ_Cosine_scBiMap_500d_None', 'HNSW_Euclidean_scBiMap_500d_TF', 'HNSW_Euclidean_scBiMap_500d_IDF', 'HNSW_Euclidean_scBiMap_500d_TF-IDF', 'HNSW_Euclidean_scBiMap_500d_None', 'HNSW_Cosine_scBiMap_500d_TF', 'HNSW_Cosine_scBiMap_500d_IDF', 'HNSW_Cosine_scBiMap_500d_TF-IDF', 'HNSW_Cosine_scBiMap_500d_None']


24

In [11]:
mytest2 = []
for method in simple_names:
    if '100' not in method and 'scBiMap' in method:
        if 'Exact' in method:
            mytest2.append(method)
    elif 'Min' in method or 'Low' in method:
        mytest2.append(method)
    elif 'Exact' in method and 'None_' in method:
        mytest2.append(method)
print(mytest2)
len(mytest2)

['Exact_Euclidean_scBiMap_500d_TF', 'Exact_Euclidean_scBiMap_500d_IDF', 'Exact_Euclidean_scBiMap_500d_TF-IDF', 'Exact_Euclidean_scBiMap_500d_None', 'Exact_Euclidean_None_TF', 'Exact_Euclidean_None_IDF', 'Exact_Euclidean_None_TF-IDF', 'Exact_Euclidean_None_None', 'Exact_Cosine_scBiMap_500d_TF', 'Exact_Cosine_scBiMap_500d_IDF', 'Exact_Cosine_scBiMap_500d_TF-IDF', 'Exact_Cosine_scBiMap_500d_None', 'Exact_Cosine_None_TF', 'Exact_Cosine_None_IDF', 'Exact_Cosine_None_TF-IDF', 'Exact_Cosine_None_None', 'LowHash_Jaccard_None_TF', 'LowHash_Jaccard_None_IDF', 'LowHash_Jaccard_None_TF-IDF', 'LowHash_Jaccard_None_None', 'WeightedLowHash_Jaccard_None_TF', 'WeightedLowHash_Jaccard_None_IDF', 'WeightedLowHash_Jaccard_None_TF-IDF', 'WeightedLowHash_Jaccard_None_None', 'MinHash_Jaccard_None_TF', 'MinHash_Jaccard_None_IDF', 'MinHash_Jaccard_None_TF-IDF', 'MinHash_Jaccard_None_None', 'WeightedMinHash_Jaccard_None_TF', 'WeightedMinHash_Jaccard_None_IDF', 'WeightedMinHash_Jaccard_None_TF-IDF', 'WeightedMin

32

In [44]:
config_dict['Exact_Euclidean_scBiMap_500d_TF']

NearestNeighborsConfig(description='ExactNearestNeighbors,Euclidean,scBiMapEmbedding,500,TF', tfidf='TF', dimension_reduction_method=<class 'dim_reduction.scBiMapEmbedding'>, nearest_neighbors_method=<class 'nearest_neighbors.ExactNearestNeighbors'>)