# PubMed Parser - Examples

In [1]:
import os
import sys
import uuid

In [2]:
sys.path.insert(0, os.path.dirname(os.getcwd()))

In [3]:
from pubmed_querier.pubmed_parser import PubMedParser
from pubmed_querier.field import StringField, NumericField, ListField

## A collection of dictionaries

A collection of dictionaries describing some animals.

In [18]:
animals = [
    {"name": "zebra", "color": ["black", "white"], "appendages": {"leg": 4, "arm": 0, "tail": 1}},
    {"name": "octopus", "color": ["orange", "red"], "appendages": {"leg": 0, "arm": 8, "tail": 0}},
    {"name": "chimp", "color": ["brown", "white"], "appendages": {"leg": 2, "arm": 2, "tail": 0}},
    {"name": "gibbon", "color": ["black"], "appendages": {"leg": 2, "arm": 2, "tail": 1}},
    {"name": "frog", "color": ["green"], "appendages": {"leg": 4, "arm": 0, "tail": 0}},
    {"name": "caterpillar", "color": ["green", "yellow"], "appendages": {"leg": 10, "arm": 0, "tail": 1}},
]

Create a database of these animals - a map of unique identifiers to the animal properties.

In [19]:
database = {uuid.uuid4(): animal for animal in animals}

Create field instances for the things you want to query.

In [24]:
fields = [
    StringField(full_name="name", abbr_name="NM", entry_mapping=(lambda x: x["name"])),
    ListField(full_name="color", abbr_name="CL", entry_mapping=(lambda x: x["color"])),
    NumericField(full_name="arms", abbr_name="AM", entry_mapping=(lambda x: x["appendages"]["arm"])),
    NumericField(full_name="tails", abbr_name="TL", entry_mapping=(lambda x: x["appendages"]["tail"]))
]

Link database entries to the fields.

In [25]:
for field in fields:
    field.reset_entries(database)

Initiate a parser with these fields.

In [26]:
parser = PubMedParser(fields)

Make some queries.

In [27]:
query = "z*[name] or ?green[CL]"
for entry in parser.get_object_set(query):
    print(database[entry])

{'name': 'caterpillar', 'color': ['green', 'yellow'], 'appendages': {'leg': 10, 'arm': 0, 'tail': 1}}
{'name': 'zebra', 'color': ['black', 'white'], 'appendages': {'leg': 4, 'arm': 0, 'tail': 1}}
{'name': 'frog', 'color': ['green'], 'appendages': {'leg': 4, 'arm': 0, 'tail': 0}}


In [32]:
query = ">4[AM] or =1[tails]"
for entry in parser.get_object_set(query):
    print(database[entry])

{'name': 'gibbon', 'color': ['black'], 'appendages': {'leg': 2, 'arm': 2, 'tail': 1}}
{'name': 'caterpillar', 'color': ['green', 'yellow'], 'appendages': {'leg': 10, 'arm': 0, 'tail': 1}}
{'name': 'octopus', 'color': ['orange', 'red'], 'appendages': {'leg': 0, 'arm': 8, 'tail': 0}}
{'name': 'zebra', 'color': ['black', 'white'], 'appendages': {'leg': 4, 'arm': 0, 'tail': 1}}


## SQL queries

Say the fields defined above were columns in a SQL table, rather than dictionary entries. In this case we can translate the PubMed-like query into a SQL query.

In [35]:
field_names, sql = parser.get_sql_query(query=">4[AM] or =1[tails]", table_name="animal.db")
print(sql)

SELECT * FROM animal.db WHERE ("arms" > 4 OR "tails" = 1)


In [37]:
field_names, sql = parser.get_sql_query(query="z*[name] or ?green[CL]", table_name="animal.db")
print(sql)

SELECT * FROM animal.db WHERE ("name" LIKE 'z%' OR )
