In [1]:
import pysolr
import spacy
import requests
import datetime
import json
import dateutil.parser
nlp = spacy.load('en_core_web_lg')

### Make a request to the Mapquest service

In [4]:
#MapQuest Geocode service.  More info: https://developer.mapquest.com/documentation/geocoding-api/
mapquest_url='https://www.mapquestapi.com/geocoding/v1/address?key=AuqdPFEWYhm7rZRN5hX5HeWSKgaO2u7d&location='
def mapquest(text):
    req = requests.get(mapquest_url+text)
    jsn = req.json()
    loc = jsn["results"][0]["locations"][0]
    return loc

In [5]:
#us-map-with-latitude-longitude-united-states-latitude-longitude.jpg
def near(loc):
    latlng = loc["latLng"]
    return round(latlng["lat"]),round(latlng["lng"])

def norm(loc):
    loctypes = {"City":"adminArea4","County":"adminArea3","State":"adminArea2","Country":"adminArea1"}
    location = None
    for fld in loctypes:
        if len(loc[fld]) and not location:
            location = loc[fld]
    return location

### Try a couple examples

In [7]:
print(near(mapquest("Raleigh, NC")))
print(near(mapquest("Raleigh")))
print(near(mapquest("Chapel Hill")))
print(near(mapquest("Durham")))
print(near(mapquest("Rochester")))
'''
(36, -79)
(36, -79)
(36, -79)
(36, -79)
(43, -78)
'''

(36, -79)
(36, -79)
(36, -79)
(36, -79)
(43, -78)


'\n(36, -79)\n(36, -79)\n(36, -79)\n(36, -79)\n(43, -78)\n'

### Get entities with SpaCy

In [8]:
def recognize_location(text):
    doc = nlp(text)
    locs = []
    for gpe in filter(lambda w: w.ent_type_ in ['GPE','LOC'], doc):
        print("Text:",text,"\t|\tLocation:",gpe)

In [9]:
recognize_entities("Kevin alone in NYC")
recognize_entities("Indiana Jones in India")
recognize_entities("action hero movie in LA")
recognize_entities("Boxing Revenge in Moscow")

NameError: name 'recognize_entities' is not defined

### Recognizing location entities with SpaCy

In [112]:
def extract_location_entities(text):
    #debug here:
    # https://explosion.ai/demos/displacy?text=Kevin%20McAllister%20in%20New%20York%20NY&model=en_core_web_lg&cpu=1&cph=1
    # merge entities and noun chunks into one token
    doc = nlp(text)
    spans = list(doc.ents)# + list(doc.noun_chunks)
    for span in spans:
        span.merge()

    relations = []
    for gpe in filter(lambda w: w.ent_type_ in ['GPE','LOC'], doc):
        if gpe.dep_ in ('attr', 'dobj'):
            subject = [w for w in gpe.head.lefts if w.dep_ == 'nsubj']
            if subject:
                subject = subject[0]
                relations.append((subject, gpe))
        elif gpe.dep_ == 'pobj' and gpe.head.dep_ == 'prep':
            relations.append((gpe.head.head, gpe))
        else:
            relations.append((None,gpe))

    return relations

### Dependency examples

In [113]:
print(extract_location_entities("Kevin McAllister in New York NY"))
print(extract_location_entities("Kevin alone in NYC"))
print(extract_location_entities("Indiana Jones in india"))
print(extract_location_entities("California action hero"))
print(extract_location_entities("boxing revenge in moscow"))

[(Kevin McAllister, New York NY)]
[(alone, NYC)]
[(Indiana Jones, india)]
[(None, California)]
[(revenge, moscow)]


### Enrich a query with a location using SpaCy and Mapquest

In [8]:
def enrich_query_location(q):
    
    entities = duckling(q)
    enrichment = {"q":q}
    query = ""
    cursor = 0
    for e in entities:
        dim = e["dim"]
        val = e["value"]
        typ = val["type"]
        if dim=="time" and typ=="interval":

            values = val["values"]

            #Remove the interval text from the query
            query += q[cursor:e["start"]]
            query += q[e["end"]:]
            cursor = e["end"]

            #Add the filter, convert to q and fq solr queries
            fromdt = solrdate(values[0]["from"]["value"])
            todt = solrdate(values[0]["to"]["value"])
            enrichment["fq"] = "release_date:[" + fromdt + " TO " + todt + "]"
            enrichment["q"]=query
            
    return enrichment

In [9]:
#Solr Client
solr = pysolr.Solr('http://localhost:8983/solr/tmdb')

#Print the Title, Release Date, and Overview from TMDB
def printresults(res):
    for r in res:
        print('\n---')
        print('\n\t'.join([r["title"][0],r["release_date"],r["overview"][0]]))

def solrquery(q):
    return "title_en:("+q+")^1.2 overview_en:("+q+")"
        
#Enrich and Search a text query
def search(query):
    enriched = enrich_date_interval(query)
    q = solrquery(enriched["q"])
    print("Searching for `" + query + "` ...")
    print("  q = " + q)
    if "fq" in enriched and isinstance(enriched["fq"], str):
        print("  fq = " + enriched["fq"])
        res = solr.search(q=q,fq=enriched["fq"],fl="title,overview,release_date",rows=3)
    else:
        print("  ...no enrichments")
        res = solr.search(q=q,fl="title,overview,release_date",rows=3)

    return res

### Let's try it with some good examples

In [1]:
printresults(search("harry potter last 5 years"))

NameError: name 'printresults' is not defined

In [11]:
printresults(search("indiana jones 1/1/1980 to 12/31/1987"))

Searching for `indiana jones 1/1/1980 to 12/31/1987` ...
  q = title_en:(indiana jones )^1.2 overview_en:(indiana jones )
  fq = release_date:[1980-01-01T00:00:00Z TO 1988-01-01T00:00:00Z]

---
Indiana Jones and the Temple of Doom
	1984-05-23T00:00:00Z
	After arriving in India, Indiana Jones is asked by a desperate village to find a mystical stone. He agrees – and stumbles upon a secret cult plotting a terrible plan in the catacombs of an ancient palace.

---
Raiders of the Lost Ark
	1981-06-12T00:00:00Z
	When Dr. Indiana Jones – the tweed-suited professor who just happens to be a celebrated archaeologist – is hired by the government to locate the legendary Ark of the Covenant, he finds himself up against the entire Nazi regime.

---
Guyana Tragedy: The Story of Jim Jones
	1980-04-15T00:00:00Z
	The story of the Peoples Temple cult led by Jim Jones and the events leading up to one of the largest mass suicides in history.


In [12]:
printresults(search("harry potter goblet"))

Searching for `harry potter goblet` ...
  q = title_en:(harry potter goblet)^1.2 overview_en:(harry potter goblet)
  ...no enrichments

---
Harry Potter and the Goblet of Fire
	2005-11-05T00:00:00Z
	Harry starts his fourth year at Hogwarts, competes in the treacherous Triwizard Tournament and faces the evil Lord Voldemort. Ron and Hermione help Harry manage the pressure – but Voldemort lurks, awaiting his chance to destroy Harry and all that he stands for.

---
Harry Potter and the Philosopher's Stone
	2001-11-16T00:00:00Z
	Harry Potter has lived under the stairs at his aunt and uncle's house his whole life. But on his 11th birthday, he learns he's a powerful wizard -- with a place waiting for him at the Hogwarts School of Witchcraft and Wizardry. As he learns to harness his newfound powers with the help of the school's kindly headmaster, Harry uncovers the truth about his parents' deaths -- and about the villain who's to blame.

---
A Very Potter Musical
	2009-04-09T00:00:00Z
	In Apri