In [1]:
from main import init_system
from api.apiutils import Relation
api, reporting = init_system("/Users/ra-mit/development/discovery_proto/test/all_chemical.pickle")

Loading: */Users/ra-mit/development/discovery_proto/test/all_chemical.pickle*

### Help Menu

You can use the system through an **API** object. API objects are returnedby the *init_system* function, so you can get one by doing:

***your_api_object = init_system('path_to_stored_model')***

Once you have access to an API object there are a few concepts that are useful to use the API. **content** refers to actual values of a given field. For example, if you have a table with an attribute called __Name__ and values *Olu, Mike, Sam*, content refers to the actual values, e.g. Mike, Sam, Olu.

**schema** refers to the name of a given field. In the previous example, schema refers to the word__Name__ as that's how the field is called.

Finally, **entity** refers to the *semantic type* of the content. This is in experimental state. For the previous example it would return *'person'* as that's what those names refer to.

Certain functions require a *field* as input. In general a field is specified by the source name (e.g. table name) and the field name (e.g. attribute name). For example, if we are interested in finding content similar to the one of the attribute *year* in the table *Employee* we can provide the field in the following way:

field = ('Employee', 'year') # field = [<source_name>, <field_name>)


### 1.  Find columns with schema names similar to X

In [3]:
schema_name_of_interest = "ligand"
res = api.schema_name_search(schema_name_of_interest)
res.pretty_print_columns()

SOURCE: KiDatabase.csv			 FIELD  Ligand Name
SOURCE: KiDatabase.csv			 FIELD Ligand ID


### 2.  Find columns that contain value X

In [4]:
res = api.keyword_search("caffeine")
res.print_columns()

Hit(nid='-1158539825', source_name='KiDatabase.csv', field_name=' Ligand Name', score=0.40522423)
Hit(nid='1165321500', source_name='drug_interactions', field_name='description', score=0.2963111)
Hit(nid='147483850', source_name='formulations', field_name='ingredient', score=0.27028728)
Hit(nid='1984994667', source_name='drug_interactions', field_name='name', score=0.26401526)
Hit(nid='1829363994', source_name='BindingDB_All_clean.csv', field_name='BindingDB Ligand Name', score=0.26348928)
Hit(nid='-1899566326', source_name='reference', field_name='reference_name', score=0.25669906)


### 3.  Find columns whose content is similar to column X

In [None]:
column = ('drug_interactions', 'drug_id1')
drs_column = api.drs_from_raw_field(column)
similar_columns = api.similar_content_to(drs_column)
similar_columns.print_columns()

### 4.  Find tables that contain a column with the name X

In [None]:
column_name = "ingredient"
res = api.schema_name_search(column_name)
res.print_tables()

### 5.  Find a virtual schema

In [7]:
# Virtual schema: SMILES, CAS, name
res_smiles = api.schema_name_search("SMILES", max_results=100)
res_smiles = api.table(res_smiles)

res_CAS = api.schema_name_search("CAS", max_results = 100)
res_CAS = api.table(res_CAS)

res_name = api.schema_name_search("name", max_results = 100)
res_name = api.table(res_name)

res1 = api.intersection(res_smiles, res_CAS)
api.table(res1)
res2 = api.intersection(res1, res_name)

real_res = api.union(res1, res2)

real_res.print_tables()


KiDatabase.csv


### 7.  Find tables that are similar to table T

In [9]:
table = "drug_indication"
drs_table = api.drs_from_table(table)
res = api.similar_content_to(drs_table)
res.print_tables()

compound_properties


### 8.  Find columns on which tables X and Y join

#### Example 1

In [10]:
table1 = "molecule_dictionary"
drs_t1 = api.drs_from_table(table1)
table2 = "ontology"
drs_t2 = api.drs_from_table(table2)
res = api.paths_between(drs_t1, drs_t2, Relation.PKFK)
res.print_columns()

Hit(nid=1699088556, source_name='ontology', field_name='id', score=1.0)


In [None]:
res.why_id(1699088556)

In [11]:
res.how_id(1699088556)

['molecule_dictionary:inorganic_flag -> OP.PKFK , -> ontology:id\n',
 'molecule_dictionary:first_in_class -> OP.PKFK , -> ontology:id\n',
 'molecule_dictionary:availability_type -> OP.PKFK , -> ontology:id\n',
 'molecule_dictionary:natural_product -> OP.PKFK , -> ontology:id\n',
 'molecule_dictionary:chirality -> OP.PKFK , -> ontology:id\n',
 'molecule_dictionary:prodrug -> OP.PKFK , -> ontology:id\n']

#### Example 2

In [None]:
table1 = "research_companies"
drs_t1 = api.drs_from_table(table1)
table2 = "research_stem"
drs_t2 = api.drs_from_table(table2)
res = api.paths_between(drs_t1, drs_t2, Relation.PKFK)
res.print_columns()

### 9.  Find join paths between X and Y

In [12]:
table1 = "research_stem"
drs_t1 = api.drs_from_table(table1)
table2 = "usan_stems"
drs_t2 = api.drs_from_table(table2)
res = api.paths_between(drs_t1, drs_t2, Relation.PKFK)
res.paths()

[[Hit(nid=1008445607, source_name='research_stem', field_name='res_stem_id', score=-1),
  Hit(nid=1901999977, source_name='research_companies', field_name='res_stem_id', score=1.0)],
 [Hit(nid=1008445607, source_name='research_stem', field_name='res_stem_id', score=-1),
  Hit(nid=1279896639, source_name='usan_stems', field_name='usan_stem_id', score=1.0)]]

### 10. Find all tables that join on table T

#### Example 1

In [13]:
table1 = "research_stem"
drs_t1 = api.drs_from_table(table1)
res = api.pkfk_of(drs_t1)
res.print_tables()

usan_stems
research_companies


#### Example 2

In [15]:
table1 = "drugs"
drs_t1 = api.drs_from_table(table1)
res = api.pkfk_of(drs_t1)
res.print_columns()

Hit(nid=3221333167, source_name='drugs_vw', field_name='name', score=1.0)


#### Example 3

In [None]:
table1 = "drug_interactions"
drs_t1 = api.drs_from_table(table1)
res = api.pkfk_of(drs_t1)
res.print_tables()

### 11. Find all columns that can be added to table T

In [None]:
# TODO: 
table1 = "drugs"
drs_t1 = api.drs_from_table(table1)
res = api.pkfk_of(drs_t1)
res.print_columns()

In [16]:
reporting.print_pkfk_relations()

Hit(nid=2728516608, source_name='molecule_dictionary', field_name='inorganic_flag', score=-1) - Hit(nid=1699088556, source_name='ontology', field_name='id', score=1.0)
Hit(nid=1699088556, source_name='ontology', field_name='id', score=-1) - Hit(nid=2728516608, source_name='molecule_dictionary', field_name='inorganic_flag', score=1.0)
Hit(nid=1699088556, source_name='ontology', field_name='id', score=-1) - Hit(nid=690794884, source_name='assays', field_name='src_id', score=1.0)
Hit(nid=1699088556, source_name='ontology', field_name='id', score=-1) - Hit(nid=1469816517, source_name='activities', field_name='standard_flag', score=1.0)
Hit(nid=1699088556, source_name='ontology', field_name='id', score=-1) - Hit(nid=2866335884, source_name='metabolism', field_name='pathway_id', score=1.0)
Hit(nid=1699088556, source_name='ontology', field_name='id', score=-1) - Hit(nid=1811291602, source_name='vertice', field_name='ontology_id', score=1.0)
Hit(nid=1699088556, source_name='ontology', field_na

In [17]:
reporting.print_content_sim_relations()

Hit(nid=2728516608, source_name='molecule_dictionary', field_name='inorganic_flag', score=-1) - Hit(nid=690794884, source_name='assays', field_name='src_id', score=1)
Hit(nid=2728516608, source_name='molecule_dictionary', field_name='inorganic_flag', score=-1) - Hit(nid=1469816517, source_name='activities', field_name='standard_flag', score=1)
Hit(nid=2728516608, source_name='molecule_dictionary', field_name='inorganic_flag', score=-1) - Hit(nid=760549000, source_name='molecule_dictionary', field_name='first_in_class', score=1)
Hit(nid=2728516608, source_name='molecule_dictionary', field_name='inorganic_flag', score=-1) - Hit(nid=1783483454, source_name='molecule_dictionary', field_name='prodrug', score=1)
Hit(nid=2728516608, source_name='molecule_dictionary', field_name='inorganic_flag', score=-1) - Hit(nid=53008014, source_name='molecule_dictionary', field_name='availability_type', score=1)
Hit(nid=2728516608, source_name='molecule_dictionary', field_name='inorganic_flag', score=-1) 