In [1]:
import pysolr
import spacy
import requests
import datetime
import json
import dateutil.parser
nlp = spacy.load('en_core_web_lg')

### Make a request to the Mapquest service

In [2]:
#MapQuest Geocode service.  More info: https://developer.mapquest.com/documentation/geocoding-api/
mapquest_url='https://www.mapquestapi.com/geocoding/v1/address?key=FVfj6GGCXVEGUjXvokn4IbXdNAHIbbH0&location='
def geocode(text):
    req = requests.get(mapquest_url+text)
    jsn = req.json()
    loc = None
    if "results" in jsn and len(jsn["results"]) and "locations" in jsn["results"][0] and len(jsn["results"][0]["locations"]):
        loc = jsn["results"][0]["locations"][0]
    return loc

In [3]:
#Transform a Mapquest latLng point to a Solr geofilt point
def latlng_point(loc):
    latlng = loc["latLng"]
    return str(latlng["lat"]) + ',' + str(latlng["lng"])

### Get entities with SpaCy

In [4]:
def extract_chunked_locations(text):
    #Use a basic finite state machine to chunk proper noun GPEs as one location
    #Maintain positions of the tokens for removal from the query
    doc = nlp(text)
    gpes = []
    for s in doc.sents:
        curr = []
        start = None
        end = None
        last = None
        isgpe = False
        for t in s:
            if (t.pos_ == 'PROPN' and t.ent_type_ == 'GPE') or (isgpe == True and t.text==','):
                isgpe = True
                if t.text!=',':
                    curr.append(t.text)
                    if (start is None):
                        start = t.doc[t.i:t.i].start_char
                    last = t
            elif isgpe == True:
                end = last.doc[last.i:last.i].start_char + len(last)
                gpes.append({"text":' '.join(curr),"start":start,"end":end})
                curr = []
                start = None
                end = None
                isgpe = False
            else:
                isgpe = False
        if(len(curr)):
            end = last.doc[last.i:last.i].start_char + len(last)
            gpes.append({"text":' '.join(curr),"start":start,"end":end})                 
            curr = []
            start = None
            end = None
            isgpe = False
    return gpes

In [5]:
print(extract_chunked_locations("Kevin McCallister in New York NY"))
print(extract_chunked_locations("Kevin alone in NYC"))
print(extract_chunked_locations("Indiana Jones India"))
print(extract_chunked_locations("Los Angeles escape"))
print(extract_chunked_locations("boxing revenge in moscow"))
print(extract_chunked_locations("boxing revenge in Moscow"))
print(extract_chunked_locations("Riddick in the underverse"))

[{'text': 'New York NY', 'start': 21, 'end': 32}]
[{'text': 'NYC', 'start': 15, 'end': 18}]
[{'text': 'India', 'start': 17, 'end': 22}]
[{'text': 'California', 'start': 0, 'end': 10}]
[]
[{'text': 'Moscow', 'start': 18, 'end': 24}]
[]


### Enrich a query with a location using SpaCy and Mapquest

In [6]:
def enrich_query_location(q):
    #q=*:*&fq={!geofilt%20sfield=location}&pt=18.36336,-66.07684&d=50&fl=title,overview,location*
    enrichment = {"q":q}
    entities = extract_chunked_locations(q)
    if len(entities):
        #found a location - look it up in mapquest and disambiguate
        entity = entities[0]
        geocoded = geocode(entity["text"])
        if geocoded:
            # It's a valid location!
            # ...add the filter query and remove the text from the query:
            latlng = latlng_point(geocoded)
            fq = "{!geofilt sfield=location}"
            pt = latlng
            d = 20
            enrichment["fq"] = fq
            enrichment["pt"] = pt
            enrichment["d"] = d
            enrichment["q"] = q[0:entity["start"]] + q[entity["end"]:]
    return enrichment

#Test it out:
enrich_query_location("Kevin alone in New York, NY")

{'q': 'Kevin alone in ',
 'fq': '{!geofilt sfield=location}',
 'pt': '40.713054,-74.007228',
 'd': 20}

In [7]:
#Solr Client
solr = pysolr.Solr('http://localhost:8983/solr/tmdb')

#Print the Title, Release Date, and Overview from TMDB
def printresults(res):
    for r in res:
        print('\n---')
        if "location" in r and len(r["location"]):
            print('\n\t'.join([r["title"][0],r["location"][0],r["overview"][0]]))
        else:
            print('\n\t'.join([r["title"][0],"NO LOCATION DATA",r["overview"][0]]))

def solrquery(q):
    return "title_en:("+q+")^1.2 overview_en:("+q+")"
        
#Enrich and Search a text query
def search(query):
    enriched = enrich_query_location(query)
    q = solrquery(enriched["q"])
    print("Searching for `" + query + "` ...")
    print("  q = " + q)
    if "fq" in enriched and isinstance(enriched["fq"], str):
        fq = enriched["fq"]
        pt = enriched["pt"]
        d = enriched["d"]
        print("  fq = " + fq)
        print("  pt = " + pt)
        print("  d = " + str(d))
        res = solr.search(q=q,fq=fq,pt=pt,d=d,fl="title,overview,location,location_city,location_state,location_country",rows=3)
    else:
        print("  ...no enrichments")
        res = solr.search(q=q,fl="title,overview,release_date",rows=3)

    return res

### Let's try it with some good examples

In [8]:
printresults(search("Kevin McCallister in New York NY"))

Searching for `Kevin McCallister in New York NY` ...
  q = title_en:(Kevin McCallister in )^1.2 overview_en:(Kevin McCallister in )
  fq = {!geofilt sfield=location}
  pt = 40.713054,-74.007228
  d = 20

---
Home Alone 2: Lost In New York
	28.260731,-82.420978
	Instead of flying to Florida with his folks, Kevin ends up alone in New York, where he gets a hotel room with his dad's credit card—despite problems from a clerk and meddling bellboy. But when Kevin runs into his old nemeses, the Wet Bandits, he's determined to foil their plans to rob a toy store on Christmas eve.


In [13]:
printresults(search("Indiana Jones India"))

Searching for `Indiana Jones India` ...
  q = title_en:( Jones India)^1.2 overview_en:( Jones India)
  fq = {!geofilt sfield=location}
  pt = 39.919991,-86.2818
  d = 20

---
Indiana Jones and the Temple of Doom
	39.919991,-86.2818
	After arriving in India, Indiana Jones is asked by a desperate village to find a mystical stone. He agrees – and stumbles upon a secret cult plotting a terrible plan in the catacombs of an ancient palace.

---
Indiana Jones and the Last Crusade
	39.919991,-86.2818
	When Dr. Henry Jones Sr. suddenly goes missing while pursuing the Holy Grail, eminent archaeologist Indiana must team up with Marcus Brody, Sallah and Elsa Schneider to follow in his father's footsteps and stop the Nazis from recovering the power of eternal life.

---
Armour of God
	39.919991,-86.2818
	Jackie Chan stars as Asian Hawk, an Indiana Jones-style adventurer looking to make a fortune in exotic antiquities. After Hawk discovers a mysterious sword in Africa, a band of Satan-worshipping mo

In [18]:
printresults(search("Los Angeles escape"))

Searching for `Los Angeles escape` ...
  q = title_en:( escape)^1.2 overview_en:( escape)
  fq = {!geofilt sfield=location}
  pt = 34.052238,-118.243344
  d = 20

---
Escape from L.A.
	34.052238,-118.243344
	This time, a cataclysmic temblor hits Los Angeles, turning it into an island. The president views the quake as a sign from above, expels Los Angeles from the country and makes it a penal colony for those found guilty of moral crimes. When his daughter, part of a resistance movement, steals the control unit for a doomsday weapon, Snake again gets tapped to save the day.

---
The Snow Creature
	36.827085,139.890341
	A botanical expedition to the Himalayas captures a Yeti and brings it back alive to Los Angeles, where it escapes and runs amok, seeking food.

---
Blade Runner
	34.052238,-118.243344
	In the smog-choked dystopian Los Angeles of 2019, blade runner Rick Deckard is called out of retirement to terminate a quartet of replicants who have escaped to Earth seeking their creator 

In [58]:
printresults(search("boxing revenge in Moscow"))

Searching for `boxing revenge in Moscow` ...
  q = title_en:(boxing revenge in )^1.2 overview_en:(boxing revenge in )
  fq = {!geofilt sfield=location}
  pt = 55.751634,37.618704
  d = 20

---
Rocky IV
	55.751634,37.618704
	Rocky must come out of retirement to battle a gargantuan Soviet fighter named Drago, who brutally punished Rocky's friend and former rival, Apollo Creed. Seeking revenge in the name of his fallen comrade and his country, Rocky agrees to fight Drago in Moscow on Christmas, and the bout changes both fighters -- and the world.


In [22]:
printresults(search("Midnight in France"))

Searching for `Midnight in France` ...
  q = title_en:(Midnight in )^1.2 overview_en:(Midnight in )
  fq = {!geofilt sfield=location}
  pt = 46.623974,2.462247
  d = 20

---
A Midnight Clear
	46.623974,2.462247
	Set in 1944 France, an American Intelligence Squad locates a German Platoon wishing to surrender rather than die in Germany's final war offensive. The two groups of men, isolated from the war at present, put aside their differences and spend Christmas together before the surrender plan turns bad and both sides are forced to fight the other.
