In [4]:
import pysolr
import spacy
import requests
import datetime
import json
import dateutil.parser
nlp = spacy.load('en_core_web_lg')

### Make a request to the Mapquest service

In [5]:
#MapQuest Geocode service.  More info: https://developer.mapquest.com/documentation/geocoding-api/
mapquest_url='https://www.mapquestapi.com/geocoding/v1/address?key=AuqdPFEWYhm7rZRN5hX5HeWSKgaO2u7d&location='
def geocode(text):
    req = requests.get(mapquest_url+text)
    jsn = req.json()
    loc = jsn["results"][0]["locations"][0]
    return loc

In [17]:
#us-map-with-latitude-longitude-united-states-latitude-longitude.jpg

#Disambiguate to the nearest lat/lng whole numbers:
def near(loc):
    latlng = loc["latLng"]
    return round(latlng["lat"]),round(latlng["lng"])

#Normalize to City|State|Country:
def norm(loc):
    loctypes = {"City":"","State":"","Country":""}
    location = None
    for i in range(8):
        aai = "adminArea"+str(i)
        aat = aai+"Type"
        if aat in loc.keys() and loc[aat] in loctypes.keys():
            loctypes[loc[aat]] = aai
    for typ in loctypes.keys():
        fld = loctypes[typ]
        if fld in loc.keys() and len(loc[fld]):
            if not location:
                location = loc[fld]
            else:
                location += "|" + loc[fld]
    if not location:
        location = "_UNKNOWN_"
    return location.replace(" ", "_")

### Try a couple examples

In [20]:
#Normalize to City|State|Country
print(norm(geocode("Raleigh, NC")))
print(norm(geocode("Raleigh")))
print(norm(geocode("Chapel Hill")))
print(norm(geocode("Durham")))
print(norm(geocode("RTP, NC")))
print(norm(geocode("Charlotte, NC")))
print(norm(geocode("Bangkok")))

Raleigh|NC|US
Raleigh|NC|US
Chapel_Hill|NC|US
Durham|NC|US
Durham|NC|US
Charlotte|NC|US
Bangkok|TH


In [21]:
print(near(geocode("Raleigh, NC")))
print(near(geocode("Raleigh")))
print(near(geocode("Chapel Hill")))
print(near(geocode("Durham")))
print(near(geocode("RTP, NC")))
#(36, -79)

print(near(geocode("Charlotte, NC")))
#(35, -81)

print(near(geocode("Bangkok")))

(36, -79)
(36, -79)
(36, -79)
(36, -79)
(36, -79)
(35, -81)
(14, 100)


### Get entities with SpaCy

In [22]:
def recognize_location(text):
    doc = nlp(text)
    locs = []
    for gpe in filter(lambda w: w.ent_type_ in ['GPE','LOC'], doc):
        print("Text:",text,"\t|\tLocation:",gpe)

In [23]:
recognize_location("Kevin alone in NYC")
recognize_location("Indiana Jones in India")
recognize_location("action hero movie in LA")
recognize_location("Boxing Revenge in Moscow")

Text: Kevin alone in NYC 	|	Location: NYC
Text: Indiana Jones in India 	|	Location: India
Text: action hero movie in LA 	|	Location: LA
Text: Boxing Revenge in Moscow 	|	Location: Moscow


### Recognizing location entities with SpaCy

In [25]:
def extract_location_entities(text):
    #debug here:
    # https://explosion.ai/demos/displacy?text=Kevin%20McAllister%20in%20New%20York%20NY&model=en_core_web_lg&cpu=1&cph=1

    #merge entities and noun chunks into one token
    doc = nlp(text)
    spans = list(doc.ents)# + list(doc.noun_chunks)
    for span in spans:
        span.merge()

    relations = []
    for gpe in filter(lambda w: w.ent_type_ in ['GPE','LOC'], doc):
        if gpe.dep_ in ('attr', 'dobj'):
            subject = [w for w in gpe.head.lefts if w.dep_ == 'nsubj']
            if subject:
                subject = subject[0]
                relations.append((subject, gpe))
        elif gpe.dep_ == 'pobj' and gpe.head.dep_ == 'prep':
            relations.append((gpe.head.head, gpe))
        else:
            relations.append((None,gpe))

    return relations

### Subject/Object dependency examples

In [26]:
print(extract_location_entities("Kevin McAllister in New York NY"))
print(extract_location_entities("Kevin alone in NYC"))
print(extract_location_entities("Indiana Jones in India"))
print(extract_location_entities("California action hero"))
print(extract_location_entities("boxing revenge in moscow"))
print(extract_location_entities("Riddick in the underverse"))

[(Kevin McAllister, New York NY)]
[(alone, NYC)]
[(Indiana Jones, india)]
[(None, California)]
[(revenge, moscow)]
[]


### Enrich a query with a location using SpaCy and Mapquest

In [32]:
def enrich_query_location(q):
    enrichment = {"q":q}
    entities = extract_location_entities(q)
    if len(entities) and len(entities[0])==2:
        #found a location - look it up in mapquest and disambiguate
        entity = entities[0][1].text
        latlng = near(geocode(entity))
        if latlng and len(latlng)==2:
            # It's a valid location! 
            # ...add the filter query and remove the text from the query:
            fq = "lat:"+str(latlng[0]) + " lng:"+str(latlng[1])
            enrichment["fq"] = fq
            enrichment["q"] = q.replace(entity,"")
    return enrichment
enrich_query_location("Kevin alone in NYC")

{'q': 'Kevin alone in ', 'fq': 'lat:41 lng:-74'}

In [9]:
#Solr Client
solr = pysolr.Solr('http://localhost:8983/solr/tmdb')

#Print the Title, Release Date, and Overview from TMDB
def printresults(res):
    for r in res:
        print('\n---')
        print('\n\t'.join([r["title"][0],r["lat"],r["lng"],r["overview"][0]]))

def solrquery(q):
    return "title_en:("+q+")^1.2 overview_en:("+q+")"
        
#Enrich and Search a text query
def search(query):
    enriched = enrich_query_location(query)
    q = solrquery(enriched["q"])
    print("Searching for `" + query + "` ...")
    print("  q = " + q)
    if "fq" in enriched and isinstance(enriched["fq"], str):
        fq = enriched["fq"]
        print("  fq = " + fq)
        res = solr.search(q=q,fq=fq,fl="title,overview,lat,lng",rows=3)
    else:
        print("  ...no enrichments")
        res = solr.search(q=q,fl="title,overview,release_date",rows=3)

    return res

### Let's try it with some good examples

In [1]:
printresults(search("harry potter last 5 years"))

NameError: name 'printresults' is not defined

In [11]:
printresults(search("indiana jones 1/1/1980 to 12/31/1987"))

Searching for `indiana jones 1/1/1980 to 12/31/1987` ...
  q = title_en:(indiana jones )^1.2 overview_en:(indiana jones )
  fq = release_date:[1980-01-01T00:00:00Z TO 1988-01-01T00:00:00Z]

---
Indiana Jones and the Temple of Doom
	1984-05-23T00:00:00Z
	After arriving in India, Indiana Jones is asked by a desperate village to find a mystical stone. He agrees – and stumbles upon a secret cult plotting a terrible plan in the catacombs of an ancient palace.

---
Raiders of the Lost Ark
	1981-06-12T00:00:00Z
	When Dr. Indiana Jones – the tweed-suited professor who just happens to be a celebrated archaeologist – is hired by the government to locate the legendary Ark of the Covenant, he finds himself up against the entire Nazi regime.

---
Guyana Tragedy: The Story of Jim Jones
	1980-04-15T00:00:00Z
	The story of the Peoples Temple cult led by Jim Jones and the events leading up to one of the largest mass suicides in history.


In [12]:
printresults(search("harry potter goblet"))

Searching for `harry potter goblet` ...
  q = title_en:(harry potter goblet)^1.2 overview_en:(harry potter goblet)
  ...no enrichments

---
Harry Potter and the Goblet of Fire
	2005-11-05T00:00:00Z
	Harry starts his fourth year at Hogwarts, competes in the treacherous Triwizard Tournament and faces the evil Lord Voldemort. Ron and Hermione help Harry manage the pressure – but Voldemort lurks, awaiting his chance to destroy Harry and all that he stands for.

---
Harry Potter and the Philosopher's Stone
	2001-11-16T00:00:00Z
	Harry Potter has lived under the stairs at his aunt and uncle's house his whole life. But on his 11th birthday, he learns he's a powerful wizard -- with a place waiting for him at the Hogwarts School of Witchcraft and Wizardry. As he learns to harness his newfound powers with the help of the school's kindly headmaster, Harry uncovers the truth about his parents' deaths -- and about the villain who's to blame.

---
A Very Potter Musical
	2009-04-09T00:00:00Z
	In Apri