##### Enrichment analysis

In [28]:
def prepare_gsea_ora(data, data_type, ora_top_n, top_n_dotplot):
    
    algo = 'leiden'
    
    if data_type == 'all':
        df = pd.read_pickle(os.path.join(
            _DATA_HOME, 'cell_types', data, 'data', 'grnboost2', f'{algo}_communities', 
            f'raw_data_communities_info.pickle'
        ))
    else:
        df = pd.read_pickle(os.path.join(
            _DATA_HOME, 'cell_types', data, 'data', 'grnboost2', f'{algo}_communities', 
            f'raw_data_{data_type}_type_communities_info.pickle'
        ))

    gsea_input = pd.concat([
        pd.DataFrame({
            'cluster': f'cluster_{i}',
            'gene': [el[: el.find(' ')] for el in vals.split('; ')],
            'centrality': [float(el[el.find('=') + 1:el.find(')')]) for el in vals.split('; ')]
        }) for i, vals in df['all_sorted_genes'].iteritems()
    ], axis=0).reset_index(drop=True)

    ora_input = pd.concat([
        pd.DataFrame({
            'cluster': f'cluster_{i}',
            'gene': [el[: el.find(' ')] for el in vals.split('; ')][:ora_top_n]
        }) for i, vals in df['all_sorted_genes'].iteritems()
    ], axis=0).reset_index(drop=True)
    
    gsea_input.to_csv('tmp/tmp_gsea_input.tsv', sep='\t', index_label=False)
    ora_input.to_csv('tmp/tmp_ora_input.tsv', sep='\t', index_label=False)

    print('Saved data, please run in terminal:')
    print()
    print('source("src/func.R")')
    print(r"markers_df_ora <- read.table('tmp/tmp_ora_input.tsv', sep='\t')")
    print(f'out_ora <- run_ora(markers_df_ora, is_clusters=T, '
          f'cell_type_for_community_ana="{data} {data_type}", top_n_dotplot={top_n_dotplot})')
    print(r"markers_df_gsea <- read.table('tmp/tmp_gsea_input.tsv', sep='\t')")
    print("out_gsea <- run_gsea(markers_df_gsea, is_clusters=T)")
    print()
    
    
widget = interactive(
    prepare_gsea_ora, 
    {'manual': True},
    data=['Macrophage', 'T_cells', 'DC', 'Monocyte', 
          'NK_cell', 'B_cell', 'Epithelial_cells', 'Neutrophils', 
          'Pre-B_cell_CD34-'], 
    ora_top_n=[50, 60, 40, 70, 30, 80, 20, 10],
    data_type=['all', 'C', 'M', 'S'],
    top_n_dotplot=[3, 5, 10]
)
display(widget)

interactive(children=(Dropdown(description='data', options=('Macrophage', 'T_cells', 'DC', 'Monocyte', 'NK_cel…

##### Enrichr analysis

In [29]:
def prepare_enrichr(data, data_type, top_n):
    
    algo = 'leiden'
    
    df = pd.read_pickle(
        os.path.join(_DATA_HOME, 'cell_types', data, 'data', 
                     'grnboost2', f'{algo}_communities', f'raw_data_communities_info.pickle')
    )

    if data_type == 'all':
        df = pd.read_pickle(os.path.join(
            _DATA_HOME, 'cell_types', data, 'data', 'grnboost2', f'{algo}_communities', 
            f'raw_data_communities_info.pickle'
        ))
    else:
        df = pd.read_pickle(os.path.join(
            _DATA_HOME, 'cell_types', data, 'data', 'grnboost2', f'{algo}_communities', 
            f'raw_data_{data_type}_type_communities_info.pickle'
        ))
    
    df_input = pd.concat([
        pd.DataFrame({
            'cluster': f'cluster_{i}',
            'gene': [el[: el.find(' ')] for el in vals.split('; ')][:top_n]
        }) for i, vals in df['all_sorted_genes'].iteritems()
    ], axis=0).reset_index(drop=True)
    
    for cl in df_input['cluster'].unique():
        x = df_input[df_input['cluster'] == cl]['gene']
        print(colored(cl, 'cyan'))
        print(' '.join(x))
        print('\n'.join(x))
    
widget = interactive(
    prepare_enrichr, 
    {'manual': True},
    data=['Macrophage', 'T_cells', 'DC', 'Monocyte', 
          'NK_cell', 'B_cell', 'Epithelial_cells', 'Neutrophils', 
          'Pre-B_cell_CD34-'], 
    data_type = ['all', 'C', 'M', 'S'],
    top_n=[50, 60, 40, 70, 30, 80, 20, 10]
)
display(widget)

interactive(children=(Dropdown(description='data', options=('Macrophage', 'T_cells', 'DC', 'Monocyte', 'NK_cel…