# This notebook demonstrates the Workflow 9

In [1]:
import biclusterco, importlib, asyncio

In [2]:
GeneCoocurrenceByBiclusterObject = biclusterco.GeneCoocurrenceByBicluster()

# In the cell below, input a list of genes as a text file URL, as shown. The rest of the notebook will run itself.

In [3]:
curated_geneset = GeneCoocurrenceByBiclusterObject.run_getinput('https://raw.githubusercontent.com/NCATS-Tangerine/cq-notebooks/master/FA_gene_sets/FA_1_core_complex.txt')

In [4]:
curated_geneset

['ncbigene:2175',
 'ncbigene:2187',
 'ncbigene:2176',
 'ncbigene:2178',
 'ncbigene:2188',
 'ncbigene:2189',
 'ncbigene:55120',
 'ncbigene:57697',
 'ncbigene:2177',
 'ncbigene:55215',
 'ncbigene:29089']

In [5]:
loop = asyncio.get_event_loop()

In [6]:
related_biclusters_and_genes_for_each_input_gene = loop.run_until_complete(GeneCoocurrenceByBiclusterObject.find_related_biclusters_async(curated_geneset))

In [7]:
bicluster_occurences_dict = GeneCoocurrenceByBiclusterObject.bicluster_occurences_dict(related_biclusters_and_genes_for_each_input_gene)

In [8]:
unique_biclusters = GeneCoocurrenceByBiclusterObject.unique_biclusters(bicluster_occurences_dict)

In [9]:
genes_in_unique_biclusters = GeneCoocurrenceByBiclusterObject.genes_in_unique_biclusters(unique_biclusters, related_biclusters_and_genes_for_each_input_gene)

In [10]:
dict_of_genes_in_unique_biclusters_not_in_inputs = GeneCoocurrenceByBiclusterObject.genes_in_unique_biclusters_not_in_input_gene_list(curated_geneset, genes_in_unique_biclusters)

In [11]:
dict_of_genes_in_unique_biclusters_not_in_inputs

defaultdict(dict,
            {'ncbigene:60496': 1,
             'ncbigene:11057': 1,
             'ncbigene:84448': 1,
             'ncbigene:55902': 1,
             'ncbigene:8165': 2,
             'ncbigene:8852': 1,
             'ncbigene:10327': 1,
             'ncbigene:144245': 3,
             'ncbigene:23452': 1,
             'ncbigene:415': 1,
             'ncbigene:64422': 2,
             'ncbigene:476': 1,
             'ncbigene:27087': 1,
             'ncbigene:283870': 3,
             'ncbigene:686': 2,
             'ncbigene:10950': 1,
             'ncbigene:150590': 2,
             'ncbigene:285668': 1,
             'ncbigene:55262': 1,
             'ncbigene:157657': 1,
             'ncbigene:64753': 1,
             'ncbigene:1184': 1,
             'ncbigene:9075': 1,
             'ncbigene:79827': 1,
             'ncbigene:1690': 3,
             'ncbigene:11151': 1,
             'ncbigene:79958': 3,
             'ncbigene:1742': 1,
             'ncbigene:27000': 4,
   

## The above is a dictionary which represents the genes, present in uniquely occurring biclusters, for a given set of input genes. The form of the dictionary is {output_gene : # of occurrences of this gene across relevant biclusters}

## The above list is truncated by python... Let's find the gene with the most occurrences.

In [12]:
importlib.reload(biclusterco) #this is a nice thing or locale module development

<module 'biclusterco' from '/Users/colincurtis/Documents/renci/translator-workflows/WorkFlow9/biclusterco.py'>

In [13]:
sorted_list_of_output_genes = GeneCoocurrenceByBiclusterObject.sorted_list_of_output_genes(dict_of_genes_in_unique_biclusters_not_in_inputs)

In [14]:
sorted_list_of_output_genes

[(15, 'ncbigene:8558'),
 (15, 'ncbigene:701'),
 (15, 'ncbigene:113220'),
 (14, 'ncbigene:891'),
 (14, 'ncbigene:54892'),
 (13, 'ncbigene:90990'),
 (13, 'ncbigene:8788'),
 (13, 'ncbigene:64946'),
 (13, 'ncbigene:348235'),
 (13, 'ncbigene:29896'),
 (13, 'ncbigene:26747'),
 (12, 'ncbigene:9702'),
 (12, 'ncbigene:9585'),
 (12, 'ncbigene:8879'),
 (12, 'ncbigene:83879'),
 (12, 'ncbigene:55107'),
 (12, 'ncbigene:51072'),
 (12, 'ncbigene:3162'),
 (12, 'ncbigene:29781'),
 (12, 'ncbigene:285643'),
 (12, 'ncbigene:148581'),
 (12, 'ncbigene:11127'),
 (11, 'ncbigene:85417'),
 (11, 'ncbigene:6738'),
 (11, 'ncbigene:5810'),
 (11, 'ncbigene:57479'),
 (11, 'ncbigene:57448'),
 (11, 'ncbigene:55614'),
 (11, 'ncbigene:54829'),
 (11, 'ncbigene:51659'),
 (11, 'ncbigene:4752'),
 (11, 'ncbigene:4750'),
 (11, 'ncbigene:432'),
 (11, 'ncbigene:347240'),
 (11, 'ncbigene:282809'),
 (11, 'ncbigene:25885'),
 (11, 'ncbigene:197342'),
 (11, 'ncbigene:10769'),
 (10, 'ncbigene:9184'),
 (10, 'ncbigene:89839'),
 (10, 'ncb