# This notebook demonstrates the Workflow 9

In [1]:
import biclusterco, importlib, asyncio

In [2]:
GeneCoocurrenceByBiclusterObject = biclusterco.GeneCoocurrenceByBicluster()

# In the cell below, input a list of genes as a text file URL, as shown. The rest of the notebook will run itself.

In [6]:
curated_geneset = GeneCoocurrenceByBiclusterObject.run_getinput('https://raw.githubusercontent.com/NCATS-Tangerine/cq-notebooks/master/FA_gene_sets/FA_1_core_complex.txt')

In [4]:
curated_geneset = ['ncbigene:2187']

In [7]:
curated_geneset

['ncbigene:2175',
 'ncbigene:2187',
 'ncbigene:2176',
 'ncbigene:2178',
 'ncbigene:2188',
 'ncbigene:2189',
 'ncbigene:55120',
 'ncbigene:57697',
 'ncbigene:2177',
 'ncbigene:55215',
 'ncbigene:29089']

In [8]:
loop = asyncio.get_event_loop()

In [9]:
related_biclusters_and_genes_for_each_input_gene = loop.run_until_complete(GeneCoocurrenceByBiclusterObject.find_related_biclusters_async(curated_geneset))

In [10]:
bicluster_occurences_dict = GeneCoocurrenceByBiclusterObject.bicluster_occurences_dict(related_biclusters_and_genes_for_each_input_gene)

In [11]:
unique_biclusters = GeneCoocurrenceByBiclusterObject.unique_biclusters(bicluster_occurences_dict)

In [12]:
len(unique_biclusters)

58

In [10]:
unique_biclusters

['480',
 '653',
 '1085',
 '169',
 '604',
 '626',
 '643',
 '775',
 '799',
 '921',
 '452',
 '456',
 '679',
 '686',
 '764',
 '1313',
 '251',
 '615',
 '698',
 '18',
 '204',
 '323',
 '482',
 '555',
 '1221',
 '176',
 '569',
 '717',
 '864',
 '1303',
 '168',
 '610',
 '660',
 '680',
 '740',
 '748',
 '1116',
 '1342',
 '196',
 '320',
 '365',
 '1073',
 '1329',
 '1355',
 '268',
 '611',
 '47',
 '158',
 '518',
 '720',
 '25',
 '331',
 '471',
 '504',
 '663',
 '809',
 '912',
 '964']

In [11]:
genes_in_unique_biclusters = GeneCoocurrenceByBiclusterObject.genes_in_unique_biclusters(unique_biclusters, related_biclusters_and_genes_for_each_input_gene)

In [12]:
dict_of_genes_in_unique_biclusters_not_in_inputs = GeneCoocurrenceByBiclusterObject.genes_in_unique_biclusters_not_in_input_gene_list(curated_geneset, genes_in_unique_biclusters)

In [13]:
dict_of_genes_in_unique_biclusters_not_in_inputs

defaultdict(dict,
            {'ncbigene:9582': 4,
             'ncbigene:9824': 7,
             'ncbigene:79915': 7,
             'ncbigene:6790': 7,
             'ncbigene:9212': 8,
             'ncbigene:332': 5,
             'ncbigene:641': 3,
             'ncbigene:699': 10,
             'ncbigene:701': 6,
             'ncbigene:80178': 4,
             'ncbigene:57082': 4,
             'ncbigene:9133': 7,
             'ncbigene:991': 4,
             'ncbigene:995': 5,
             'ncbigene:157313': 4,
             'ncbigene:113130': 8,
             'ncbigene:55143': 7,
             'ncbigene:983': 7,
             'ncbigene:81620': 4,
             'ncbigene:1058': 5,
             'ncbigene:1063': 9,
             'ncbigene:64946': 4,
             'ncbigene:79019': 4,
             'ncbigene:79172': 4,
             'ncbigene:55165': 6,
             'ncbigene:150468': 8,
             'ncbigene:63967': 4,
             'ncbigene:55789': 5,
             'ncbigene:81624': 4,
             

## The above is a dictionary which represents the genes, present in uniquely occurring biclusters, for a given set of input genes. The form of the dictionary is {output_gene : # of occurrences of this gene across relevant biclusters}

## The above list is truncated by python... Let's find the gene with the most occurrences.

In [35]:
importlib.reload(biclusterco) #this is a nice thing or locale module development

<module 'biclusterco' from '/Users/colincurtis/Documents/renci/translator-workflows/WorkFlow9/biclusterco.py'>

In [14]:
sorted_list_of_output_genes = GeneCoocurrenceByBiclusterObject.sorted_list_of_output_genes(dict_of_genes_in_unique_biclusters_not_in_inputs)

In [15]:
sorted_list_of_output_genes

[(31, 'ncbigene:1'),
 (10, 'ncbigene:699'),
 (10, 'ncbigene:3833'),
 (9, 'ncbigene:9787'),
 (9, 'ncbigene:64151'),
 (9, 'ncbigene:4751'),
 (9, 'ncbigene:24137'),
 (9, 'ncbigene:1063'),
 (8, 'ncbigene:9833'),
 (8, 'ncbigene:9212'),
 (8, 'ncbigene:890'),
 (8, 'ncbigene:83540'),
 (8, 'ncbigene:56992'),
 (8, 'ncbigene:4998'),
 (8, 'ncbigene:3832'),
 (8, 'ncbigene:150468'),
 (8, 'ncbigene:113130'),
 (7, 'ncbigene:9837'),
 (7, 'ncbigene:983'),
 (7, 'ncbigene:9824'),
 (7, 'ncbigene:9493'),
 (7, 'ncbigene:9133'),
 (7, 'ncbigene:79915'),
 (7, 'ncbigene:7272'),
 (7, 'ncbigene:6790'),
 (7, 'ncbigene:55723'),
 (7, 'ncbigene:55247'),
 (7, 'ncbigene:55143'),
 (7, 'ncbigene:5347'),
 (7, 'ncbigene:51203'),
 (7, 'ncbigene:29127'),
 (7, 'ncbigene:22974'),
 (7, 'ncbigene:220134'),
 (7, 'ncbigene:157570'),
 (7, 'ncbigene:11065'),
 (7, 'ncbigene:10460'),
 (7, 'ncbigene:10112'),
 (7, 'ncbigene:10024'),
 (6, 'ncbigene:9928'),
 (6, 'ncbigene:9700'),
 (6, 'ncbigene:83990'),
 (6, 'ncbigene:79801'),
 (6, 'ncbige

In [16]:
len(sorted_list_of_output_genes)

4486