### Overview





#### Notebook setup

In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
import httpx

client = httpx.Client(
    base_url="https://siren:9220",
    verify=False,
    auth=("sirenadmin", "password"),
    headers={"Content-Type": "application/json"},
)
client

<httpx.Client at 0x7ff21c250190>

<IPython.core.display.Javascript object>

### Working queries

Querying for `city:Rockville` works, and doing a joined query for companies that have been reported on by NYT works.

In [3]:
endpoint = "/siren/company/_search"

body = {
    "size": 1,
    "query": {
        "query_string": {"query": "city:Rockville"},
    },
}

client.post(endpoint, json=body).json()

{'pit_id': 'd6299123-99d0-43ab-b46b-432517a659a9',
 'took': 3,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 73, 'relation': 'eq'},
  'max_score': 6.9030094,
  'hits': [{'_index': 'company',
    '_type': 'Company',
    '_id': 'Oiy32XcBu9D2HElYT786',
    '_score': 6.9030094,
    '_ignored': ['deadpooled_date'],
    '_source': {'webpage': '',
     'phone_number': '240-453-6331',
     'location': '44.163276, -69.0915779',
     'email_address': 'custsupport@appliedcellsci.com',
     'city': 'Rockville',
     'description': 'drug discovery research',
     'homepage_url': 'http://www.appliedcellsci.com',
     'founded_month': 0,
     'revenuecurrency': '',
     'hasstatus': 'acquired',
     'one_competitor': None,
     'statecode': 'MD',
     'deadpooled_date': '',
     'url': 'http://www.crunchbase.com/company/applied-cell-sciences',
     'freebaseid': '',
     'number_of_employees': None,
     'revenue': '',
     'pe

<IPython.core.display.Javascript object>

In [4]:
endpoint = "/siren/company/_search"

# Only companies that have been reported on by NYT according to `article` index
body = {
    "size": 1,
    "query": {
        "join": {
            "indices": ["article"],
            "on": ["id", "companies"],
            "request": {
                "query": {"query_string": {"query": 'article.source:"New York Times"'}},
            },
        },
    },
}

client.post(endpoint, json=body).json()

{'pit_id': '2420c59b-82b7-441a-b787-d4eb910bc4ff',
 'took': 3,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 921, 'relation': 'eq'},
  'max_score': 1.0,
  'hits': [{'_index': 'company',
    '_type': 'Company',
    '_id': '6Sy32XcBu9D2HElYQ6bx',
    '_score': 1.0,
    '_ignored': ['deadpooled_date'],
    '_source': {'webpage': '',
     'phone_number': '',
     'location': None,
     'email_address': 'info@favorites.bz',
     'city': None,
     'description': 'Visual Web Directory',
     'homepage_url': 'http://www.favorites.bz',
     'founded_month': 10,
     'revenuecurrency': '',
     'hasstatus': '',
     'one_competitor': 'company/allmyfaves',
     'statecode': None,
     'deadpooled_date': '',
     'url': 'http://www.crunchbase.com/company/favorites',
     'freebaseid': '',
     'number_of_employees': 2,
     'revenue': '',
     'permalink': 'favorites',
     'founded_year': 2012,
     'id': 'company/favorite

<IPython.core.display.Javascript object>

### Bug

Trying to query for `city:Rockville` AND companies reported on by NYT throws a DSL parsing error.

In [5]:
endpoint = "/siren/company/_search"

# Only companies that have been reported on by NYT according to `article` index
body = {
    "size": 1,
    "query": {
        "query_string": {"query": "city:Rockville"},
        "join": {
            "indices": ["article"],
            "on": ["id", "companies"],
            "request": {
                "query": {"query_string": {"query": 'article.source:"New York Times"'}},
            },
        },
    },
}

client.post(endpoint, json=body).json()

{'error': {'root_cause': [{'type': 'parsing_exception',
    'reason': '[query_string] malformed query, expected [END_OBJECT] but found [FIELD_NAME]',
    'line': 1,
    'col': 68}],
  'type': 'parsing_exception',
  'reason': '[query_string] malformed query, expected [END_OBJECT] but found [FIELD_NAME]',
  'line': 1,
  'col': 68},
 'status': 400}

<IPython.core.display.Javascript object>