# Workflow 8 - FA

This notebook implements all three modeuls of Workflow 8 for Fanconi Anemia. See <a href="https://docs.google.com/presentation/d/1IkAzjSrOMzOLx5z8GqRVKmVd1GKrpIEb_xF4g4RlI1U/edit?usp=sharing">here</a> for an overview of Workflow 8.  

Notebook written by: Samson Fong, John Earls, Theo Knijnenburg, Chris Churas and Aaron Gary. 

## Libraries and such

In [1]:
%load_ext autoreload
%autoreload 2

import json
from pprint import pprint
from wf8_module1 import doid_to_genes_and_tissues, doid_to_genes_direct
from wf8_module2 import call_biggim
import numpy as np
import pandas as pd
import time
import sys
sys.path.insert(0, "wf8")
from wf8_module3 import DDOT_Client

ModuleNotFoundError: No module named 'autoreload '

## Module 1 - Getting FA genes and tissues

In [2]:
genes = doid_to_genes_direct('13636')

INFO:root:Geting HP ids from DOID


http://biothings.io/explorer/api/v2/directinput2output?input_prefix=doid&output_prefix=ncbigene&input_value=13636&format=translator
Returned 22 genes


In [5]:
genes,tissues = doid_to_genes_and_tissues('13636',direct=True,N=1000)
#will return 20 tissues, this will take a while

INFO:root:Geting HP ids from DOID


http://biothings.io/explorer/api/v2/directinput2output?input_prefix=doid&output_prefix=ncbigene&input_value=13636&format=translator
Returned 22 genes


In [3]:
#here are the 20 tissues; Maureen selected 4 from thos
tissues = ['animal', 'aorta', 'artery', 'bone', 'brain_ventricle', 'cardiovascular_system', 'cornea', 'duodenum', 'esophagus', 'gonad', 'head', 'heart', 'hematopoietic_system', 'lens', 'liver', 'neck', 'testis', 'trachea', 'umbilical_cord', 'uterus']
print(tissues)
print(len(tissues))
tissues = ['esophagus','hematopoietic_system', 'neck', 'trachea']
print(tissues)

['animal', 'aorta', 'artery', 'bone', 'brain_ventricle', 'cardiovascular_system', 'cornea', 'duodenum', 'esophagus', 'gonad', 'head', 'heart', 'hematopoietic_system', 'lens', 'liver', 'neck', 'testis', 'trachea', 'umbilical_cord', 'uterus']
20
['esophagus', 'hematopoietic_system', 'neck', 'trachea']


## Module 2 - Getting FA interacting genes
We ask for N=200 new genes.

In [4]:
newgenes = call_biggim(genes, tissues, average_columns=True, return_genes=True, N=200)

Sent: GET http://biggim.ncats.io/api/metadata/tissue/esophagus?None
Sent: GET http://biggim.ncats.io/api/metadata/tissue/hematopoietic_system?None
Sent: GET http://biggim.ncats.io/api/metadata/tissue/neck?None
Sent: GET http://biggim.ncats.io/api/metadata/tissue/trachea?None
Returned 56 Big GIM columns
['GIANT_eosinophil_KnownFunctionalInteraction', 'GIANT_trachea_KnownFunctionalInteraction', 'GIANT_hematopoietic_stem_cell_ProbabilityOfFunctionalInteraction', 'GIANT_blood_plasma_ProbabilityOfFunctionalInteraction', 'GIANT_esophagus_KnownFunctionalInteraction', 'GTEx_Esophagus_Correlation', 'GTEx_Spleen_Correlation', 'GIANT_leukocyte_ProbabilityOfFunctionalInteraction', 'GIANT_basophil_KnownFunctionalInteraction', 'GIANT_dendritic_cell_ProbabilityOfFunctionalInteraction', 'GIANT_lymphocyte_KnownFunctionalInteraction', 'GIANT_tonsil_KnownFunctionalInteraction', 'GIANT_spleen_ProbabilityOfFunctionalInteraction', 'GIANT_dendritic_cell_KnownFunctionalInteraction', 'GIANT_lymphocyte_Probabil

In [5]:
print(newgenes)
print(len(newgenes))

['10036', '10038', '10051', '10112', '10198', '1033', '10346', '10403', '10459', '10459', '10561', '1058', '10592', '10615', '1062', '1063', '1070', '10733', '10926', '10964', '11004', '11065', '11073', '1111', '11130', '11169', '11200', '11339', '1164', '1434', '146909', '1479', '1633', '1719', '1786', '1854', '2072', '2146', '2175', '2175', '2176', '2176', '2177', '2177', '2178', '2178', '2187', '2188', '2189', '2189', '2237', '22974', '23234', '23262', '23310', '23586', '24137', '2537', '25788', '25842', '259266', '26271', '2633', '27338', '29028', '29089', '29089', '29127', '3014', '3070', '3109', '3123', '3148', '3161', '3383', '3429', '3430', '3431', '3433', '3434', '3437', '3553', '3627', '3659', '3832', '3838', '3965', '4001', '4061', '4085', '4171', '4172', '4173', '4174', '4175', '4176', '4288', '4436', '4599', '4599', '4600', '4605', '4678', '4938', '4939', '4998', '5001', '51001', '51053', '5111', '51203', '51512', '51514', '51659', '5359', '5378', '5427', '5437', '54739', 

## Module 2- Running Big GIM for the second round, now producing the interaction matrix (df) that can serve as an input for DDOT

In [6]:
df = call_biggim(newgenes, tissues, average_columns=True, query_id2=newgenes)

Sent: GET http://biggim.ncats.io/api/metadata/tissue/esophagus?None
Sent: GET http://biggim.ncats.io/api/metadata/tissue/hematopoietic_system?None
Sent: GET http://biggim.ncats.io/api/metadata/tissue/neck?None
Sent: GET http://biggim.ncats.io/api/metadata/tissue/trachea?None
Returned 56 Big GIM columns
['GIANT_eosinophil_KnownFunctionalInteraction', 'GIANT_trachea_KnownFunctionalInteraction', 'GIANT_hematopoietic_stem_cell_ProbabilityOfFunctionalInteraction', 'GIANT_blood_plasma_ProbabilityOfFunctionalInteraction', 'GIANT_esophagus_KnownFunctionalInteraction', 'GTEx_Esophagus_Correlation', 'GTEx_Spleen_Correlation', 'GIANT_leukocyte_ProbabilityOfFunctionalInteraction', 'GIANT_basophil_KnownFunctionalInteraction', 'GIANT_dendritic_cell_ProbabilityOfFunctionalInteraction', 'GIANT_lymphocyte_KnownFunctionalInteraction', 'GIANT_tonsil_KnownFunctionalInteraction', 'GIANT_spleen_ProbabilityOfFunctionalInteraction', 'GIANT_dendritic_cell_KnownFunctionalInteraction', 'GIANT_lymphocyte_Probabil

In [7]:
print(df)


        Gene1  Gene2      mean
0       83990   9055  0.172713
1        8638   5359  0.235809
2       55055  24137  0.210224
3        4436   2177  0.142053
4        8726   6772  0.153772
5       23262    891  0.066324
6        7157   3109  0.072104
7        9232    991  0.379232
8       25788   6241  0.255296
9         899    890  0.355783
10       9246   6672  0.348175
11       9700   1062  0.248130
12       4176   2237  0.277309
13      25842    891  0.179842
14       9232    890  0.368362
15      64151  10733  0.333228
16      55143    990  0.313073
17       2176   1479  0.050917
18      10561   1070  0.064060
19      24137   5427  0.327218
20      51512   5557  0.288119
21      55723   2188  0.035606
22       6790   3965  0.041469
23     146909   4998  0.220369
24       3014   2237  0.278706
25      10561  10346  0.371767
26       4172    991  0.320819
27       8624   5932  0.245307
28     146909   5437  0.084050
29       9700   8638  0.054235
...       ...    ...       ...
15262   

### Create DDOT

In [8]:
ddot = DDOT_Client.from_dataframe(df)

In [12]:
ddot.call(alpha=0.03, beta=0.8)
ddot.wait_for_hiview_url()

'http://hiview-test.ucsd.edu/64e86f23-410f-11e9-9fc6-0660b7976219?type=test&server=http://dev2.ndexbio.org'