In [1]:
from main import init_system
api = init_system("/Users/ra-mit/development/discovery_proto/test/network.pickle")

Loading: */Users/ra-mit/development/discovery_proto/test/network.pickle*

### Help Menu

You can use the system through an **API** object. API objects are returnedby the *init_system* function, so you can get one by doing:

***your_api_object = init_system(<path_to_stored_model>)***

Once you have access to an API object there are a few concepts that are useful to use the API. **content** refers to actual values of a given field. For example, if you have a table with an attribute called __Name__ and values *Mike, Sam, Olu*, content refers to the actual values, e.g. Mike, Sam, Olu.

**schema** refers to the name of a given field. In the previous example, schema refers to the word__Name__ as that's how the field is called.

Finally, **entity** refers to the *semantic type* of the content. This is in experimental state. For the previous example it would return *'person'* as that's what those names refer to.

Certain functions require a *field* as input. In general a field is specified by the source name (e.g. table name) and the field name (e.g. attribute name). For example, if we are interested in finding content similar to the one of the attribute *year* in the table *Employee* we can provide the field in the following way:

field = ('Employee', 'year') # field = [<source_name>, <field_name>)


# Discovery Primitives

### Keyword, schema and entity search

In [3]:
res = api.keyword_search("Madden")
api.output(res)

source: Se_person.csv					 field: Last Name
source: Drupal_employee_directory.csv					 field: Personal Url
source: Employee_directory.csv					 field: Personal Url
source: Library_subject_offered.csv					 field: Responsible Faculty Name
source: Employee_directory.csv					 field: Last Name
source: Se_person.csv					 field: Last Name
source: Warehouse_users.csv					 field: Last Name
source: Library_course_instructor.csv					 field: Instructor Name
source: Drupal_employee_directory.csv					 field: Last Name
source: Employee_directory.csv					 field: Preferred Last Name Upper


In [4]:
res = api.schema_search("department")
api.output(res)

source: Mit_student_directory.csv					 field: Department
source: Student_degree_program.csv					 field: Department
source: Library_course_instructor.csv					 field: Department
source: Sis_course_description.csv					 field: Department
source: short_cis_course_catalog.csv					 field: Department Code
source: short_cis_course_catalog.csv					 field: Department Name
source: subject_grouping_slice.csv					 field: Department Code
source: Sis_department.csv					 field: Department Name
source: Sis_course_description.csv					 field: Department Name
source: short_subject_summary.csv					 field: Department Code


In [5]:
res = api.entity_search("organization")
api.output(res)

source: Ctx_object_attribute_lov.csv					 field: Oal Label
source: Ctx_object_attribute_lov.csv					 field: Oal Object
source: Fclt_organization.csv					 field: Organization
source: Fclt_organization.csv					 field: Dlc Key
source: Fclt_organization.csv					 field: Hr Department Name
source: Master_dept_hierarchy.csv					 field: Dlc Name
source: Master_dept_hierarchy.csv					 field: Master Dept Hier Level 2 Name
source: Master_dept_hierarchy.csv					 field: Master Dept Hier Level 4 Name
source: Sdo_coord_op_methods.csv					 field: Legacy Name
source: Sdo_coord_op_params.csv					 field: Parameter Name


### Content, schema, entity similarities

In [6]:
field = ("Mit_student_directory.csv", "Full Name")
res = api.similar_content_fields(field)
api.output(res)

source: Mit_student_directory.csv					 field: Full Name Uppercase


In [7]:
field = ("Mit_student_directory.csv", "Full Name")
res = api.similar_schema_fields(field)
api.output(res)

source: Employee_directory.csv					 field: Full Name
source: Se_person.csv					 field: Full Name
source: Drupal_employee_directory.csv					 field: Full Name


In [8]:
field = ("Se_person.csv", "Full Name")
res = api.similar_entities_fields(field)
api.output(res)

source: short_cis_course_catalog.csv					 field: Subject Description
source: short_course_catalog_subject_offered.csv					 field: Meet Time
source: Warehouse_users.csv					 field: Middle Name
source: short_subject_summary.csv					 field: Subject Id
source: Tip_material.csv					 field: Tip Material Key
source: short_subject_summary.csv					 field: Cluster List
source: Drupal_employee_directory.csv					 field: Office Location
source: Sis_course_description.csv					 field: From Term
source: Master_dept_hierarchy_links.csv					 field: Dlc Key
source: Sis_course_description.csv					 field: Course Option


# Combining Primitives

### AND

In [9]:
r1 = api.schema_search("department", max_results=50)
r2 = api.entity_search("organization", max_results=50)
res = api.and_conjunctive(r1, r2)
api.output(res)

source: Fclt_organization.csv					 field: Hr Department Name
source: Hr_org_unit_new.csv					 field: Hr Department Code
source: Hr_org_unit_new.csv					 field: Hr Department Name
source: Hr_org_unit_new.csv					 field: Hr Department Name Long


### OR

In [10]:
r1 = api.keyword_search("Madden", max_results=50)
r2 = api.keyword_search("Stonebraker", max_results=50)
res = api.or_conjunctive(r1, r2)
api.output(res)

source: Employee_directory.csv					 field: Last Name
source: Moira_list.csv					 field: Moira List Description
source: Warehouse_users.csv					 field: Last Name
source: Employee_directory.csv					 field: Directory Full Name
source: Drupal_employee_directory.csv					 field: Last Name
source: Library_course_instructor.csv					 field: Instructor Name
source: Employee_directory.csv					 field: Email Address
source: Se_person.csv					 field: Full Name
source: Employee_directory.csv					 field: Personal Url
source: Employee_directory.csv					 field: Preferred Last Name Upper
source: Zip_usa.csv					 field: City Name
source: Moira_list.csv					 field: Moira List Name
source: Moira_list.csv					 field: Moira List Key
source: Employee_directory.csv					 field: Full Name
source: Warehouse_users.csv					 field: Email Address
source: Library_subject_offered.csv					 field: Responsible Faculty Name
source: Drupal_employee_directory.csv					 field: Personal Url
source: Employee_directory.csv					 fi

# Discovery Functions

### Join path

In [11]:
field1 = ("Fclt_rooms.csv", "Building Room")
field2 = ("Fac_rooms.csv", "Room")
res = api.join_path(field1, field2)
api.output(res)

### Find matching schema

In [12]:
sn = "Fclt_organization.csv"
res = api.schema_complement(sn)
api.output_raw(res)

Hit(nid=355897770, source_name='Fclt_org_dlc_key.csv', field_name='Fclt Organization Key', score=1.0)
Hit(nid=2799362288, source_name='Fclt_organization.csv', field_name='Fclt Organization Key', score=0.9879518072289156)
Hit(nid=2280816136, source_name='All_hversion_view.csv', field_name='Parent Version', score=1.0)
Hit(nid=1266149968, source_name='All_version_hview_wdepth.csv', field_name='Parent Version', score=1.0)
Hit(nid=2299711430, source_name='Fclt_organization.csv', field_name='Organization Id', score=0.9879518072289156)
Hit(nid=355897770, source_name='Fclt_org_dlc_key.csv', field_name='Fclt Organization Key', score=1.0)
Hit(nid=3003106695, source_name='Student_department.csv', field_name='Department Code', score=1.0)
Hit(nid=3931661923, source_name='Fac_organization.csv', field_name='Organization Level', score=1)
Hit(nid=759833849, source_name='Fclt_organization_hist.csv', field_name='Organization Level', score=1)
Hit(nid=3348719160, source_name='Fac_organization.csv', field_n

### Find tables matching schema

In [13]:
res = api.find_tables_matching_schema("name, department", 10)
api.output_raw(res)

('Master_dept_dcode_parent.csv', [('d', 3.8367095)])
('Fac_organization.csv', [('d', 3.8181076)])


In [14]:
res = api.find_tables_matching_schema("course, department", 30)
api.output_raw(res)

('Fac_organization.csv', [('d', 3.8181076)])
('Sdo_units_of_measure.csv', [('c', 3.8199923)])
('Master_dept_dcode_parent.csv', [('d', 3.8367095)])


In [15]:
res = api.find_tables_matching_schema("Table, type", 40)
api.output_raw(res)

('Sdo_datums.csv', [('y', 3.3867753)])
('Sdo_datums_old_format.csv', [('y', 3.1314743)])
('Fclt_building.csv', [('y', 2.7094202)])
('Sdo_datum_geodetic.csv', [('y', 3.3867753)])
('Fclt_building_list.csv', [('y', 2.611222)])
('Fclt_building_hist_1.csv', [('y', 2.611222)])
('Fac_building.csv', [('y', 2.8472006)])
('Sdo_datum_engineering.csv', [('y', 3.6137006)])
('Sdo_datums_old_snapshot.csv', [('y', 3.1314743)])
('Sdo_units_of_measure.csv', [('b', 3.8671162)])
('Sdo_datum_vertical.csv', [('y', 3.6137006)])
