# Workflow 8 - FA

This notebook implements all three modeuls of Workflow 8 for Fanconi Anemia. See <a href="https://docs.google.com/presentation/d/1IkAzjSrOMzOLx5z8GqRVKmVd1GKrpIEb_xF4g4RlI1U/edit?usp=sharing">here</a> for an overview of Workflow 8.  

Notebook written by: Samson Fong, John Earls, Theo Knijnenburg, Chris Churas and Aaron Gary. 

## Libraries and such

In [13]:
%load_ext autoreload 
%autoreload 2

import json
from pprint import pprint
from wf8_module1 import doid_to_genes_and_tissues, doid_to_genes_direct
from wf8_module2 import call_biggim
import numpy as np
import pandas as pd
import time
import sys
sys.path.insert(0, "wf8")
from wf8_module3 import DDOT_Client

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Module 1 - Getting FA genes and tissues

In [14]:
genes = doid_to_genes_direct('13636')

INFO:root:Geting HP ids from DOID


http://biothings.io/explorer/api/v2/directinput2output?input_prefix=doid&output_prefix=ncbigene&input_value=13636&format=translator
Returned 22 genes


In [5]:
genes,tissues = doid_to_genes_and_tissues('13636',direct=True,N=1000)
#will return 20 tissues, this will take a while

INFO:root:Geting HP ids from DOID


http://biothings.io/explorer/api/v2/directinput2output?input_prefix=doid&output_prefix=ncbigene&input_value=13636&format=translator
Returned 22 genes


In [15]:
#here are the 20 tissues; Maureen selected 4 from thos
tissues = ['animal', 'aorta', 'artery', 'bone', 'brain_ventricle', 'cardiovascular_system', 'cornea', 'duodenum', 'esophagus', 'gonad', 'head', 'heart', 'hematopoietic_system', 'lens', 'liver', 'neck', 'testis', 'trachea', 'umbilical_cord', 'uterus']
print(tissues)
print(len(tissues))
tissues = ['esophagus','hematopoietic_system', 'neck', 'trachea']
print(tissues)

['animal', 'aorta', 'artery', 'bone', 'brain_ventricle', 'cardiovascular_system', 'cornea', 'duodenum', 'esophagus', 'gonad', 'head', 'heart', 'hematopoietic_system', 'lens', 'liver', 'neck', 'testis', 'trachea', 'umbilical_cord', 'uterus']
20
['esophagus', 'hematopoietic_system', 'neck', 'trachea']


## Module 2 - Getting FA interacting genes
We ask for N=200 new genes.

In [16]:
newgenes = call_biggim(genes, tissues, average_columns=True, return_genes=True, N=200)

Sent: GET http://biggim.ncats.io/api/metadata/tissue/esophagus?None
Sent: GET http://biggim.ncats.io/api/metadata/tissue/hematopoietic_system?None
Sent: GET http://biggim.ncats.io/api/metadata/tissue/neck?None
Sent: GET http://biggim.ncats.io/api/metadata/tissue/trachea?None
Returned 56 Big GIM columns
['GIANT_trachea_KnownFunctionalInteraction', 'GIANT_esophagus_ProbabilityOfFunctionalInteraction', 'GIANT_thyroid_gland_KnownFunctionalInteraction', 'GIANT_spleen_KnownFunctionalInteraction', 'GIANT_leukocyte_KnownFunctionalInteraction', 'GIANT_serum_KnownFunctionalInteraction', 'GIANT_macrophage_ProbabilityOfFunctionalInteraction', 'GIANT_tonsil_ProbabilityOfFunctionalInteraction', 'GIANT_b_lymphocyte_KnownFunctionalInteraction', 'GIANT_hematopoietic_stem_cell_KnownFunctionalInteraction', 'GIANT_thymocyte_KnownFunctionalInteraction', 'GIANT_megakaryocyte_ProbabilityOfFunctionalInteraction', 'GIANT_granulocyte_ProbabilityOfFunctionalInteraction', 'GIANT_mononuclear_phagocyte_ProbabilityO

In [17]:
print(newgenes)
print(len(newgenes))

['10036', '10038', '10051', '10112', '10198', '1033', '10346', '10403', '10459', '10459', '10561', '1058', '10592', '10615', '1062', '1063', '1070', '10733', '10926', '10964', '11004', '11065', '11073', '1111', '11130', '11169', '11200', '11339', '1164', '1434', '146909', '1479', '1633', '1719', '1786', '1854', '2072', '2146', '2175', '2175', '2176', '2176', '2177', '2177', '2178', '2178', '2187', '2188', '2189', '2189', '2237', '22974', '23234', '23262', '23310', '23586', '24137', '2537', '25788', '25842', '259266', '26271', '2633', '27338', '29028', '29089', '29089', '29127', '3014', '3070', '3109', '3123', '3148', '3161', '3383', '3429', '3430', '3431', '3433', '3434', '3437', '3553', '3627', '3659', '3832', '3838', '3965', '4001', '4061', '4085', '4171', '4172', '4173', '4174', '4175', '4176', '4288', '4436', '4599', '4599', '4600', '4605', '4678', '4938', '4939', '4998', '5001', '51001', '51053', '5111', '51203', '51512', '51514', '51659', '5359', '5378', '5427', '5437', '54739', 

## Module 2- Running Big GIM for the second round, now producing the interaction matrix (df) that can serve as an input for DDOT

In [18]:
df = call_biggim(newgenes, tissues, average_columns=True, query_id2=newgenes)

Sent: GET http://biggim.ncats.io/api/metadata/tissue/esophagus?None
Sent: GET http://biggim.ncats.io/api/metadata/tissue/hematopoietic_system?None
Sent: GET http://biggim.ncats.io/api/metadata/tissue/neck?None
Sent: GET http://biggim.ncats.io/api/metadata/tissue/trachea?None
Returned 56 Big GIM columns
['GIANT_trachea_KnownFunctionalInteraction', 'GIANT_esophagus_ProbabilityOfFunctionalInteraction', 'GIANT_thyroid_gland_KnownFunctionalInteraction', 'GIANT_spleen_KnownFunctionalInteraction', 'GIANT_leukocyte_KnownFunctionalInteraction', 'GIANT_serum_KnownFunctionalInteraction', 'GIANT_macrophage_ProbabilityOfFunctionalInteraction', 'GIANT_tonsil_ProbabilityOfFunctionalInteraction', 'GIANT_b_lymphocyte_KnownFunctionalInteraction', 'GIANT_hematopoietic_stem_cell_KnownFunctionalInteraction', 'GIANT_thymocyte_KnownFunctionalInteraction', 'GIANT_megakaryocyte_ProbabilityOfFunctionalInteraction', 'GIANT_granulocyte_ProbabilityOfFunctionalInteraction', 'GIANT_mononuclear_phagocyte_ProbabilityO

In [19]:
print(df)


        Gene1  Gene2      mean
0       11004   9232  0.368221
1        6672   4061  0.129451
2        6119   1062  0.234955
3       90417   7468  0.211236
4        4938   3627  0.319159
5        9700   9212  0.345341
6       79017  11200  0.138107
7        3832   2178  0.111251
8       51001   7398  0.220075
9       55723  10926  0.180632
10       7516   6240  0.067369
11      11339   4436  0.261415
12       4288   4175  0.337594
13      29028   9787  0.331672
14     146909   6240  0.237392
15      51053   5427  0.335962
16       7072     86  0.215559
17       2176    990  0.055694
18      29089   4288  0.157703
19      54809   1164  0.058256
20     259266   7112  0.266163
21      54892   4061  0.037102
22     146909   9700  0.270933
23       4176    672  0.336263
24      10733   9055  0.287064
25      10038   4171  0.252080
26      10112   5984  0.330004
27       8624   5111  0.275214
28      81620  22974  0.319053
29      55215   4436  0.291501
...       ...    ...       ...
15262   

### Create DDOT

In [20]:
ddot = DDOT_Client.from_dataframe(df)

In [21]:
ddot.call(alpha=0.03, beta=0.8)
ddot.wait_for_hiview_url()

'http://hiview-test.ucsd.edu/26316667-410e-11e9-9fc6-0660b7976219?type=test&server=http://dev2.ndexbio.org'