In [1]:
import pysolr
import requests
import datetime
import json
import dateutil.parser

In [2]:
#Format dates for Solr in ISO-8601 Zulu
def solrdate(text):
    dt = dateutil.parser.parse(text)
    return dt.strftime('%Y-%m-%dT%H:%M:%SZ')

#Pretty print JSON
def pretty(data):
    print(json.dumps(data, indent=4, sort_keys=True))

### Make a request to the Duckling service

In [3]:
#Duckling service.  More info: https://github.com/facebook/duckling
duckling_url='http://localhost:8000/parse'
def duckling(text):
    req = requests.post(duckling_url, data = {"locale":"en_US","text":text})
    return req.json()

### Try a couple examples

In [4]:
pretty(duckling("13 out of 50 of the United States"))

[
    {
        "body": "13",
        "dim": "number",
        "end": 2,
        "latent": false,
        "start": 0,
        "value": {
            "type": "value",
            "value": 13
        }
    },
    {
        "body": "50",
        "dim": "number",
        "end": 12,
        "latent": false,
        "start": 10,
        "value": {
            "type": "value",
            "value": 50
        }
    }
]


In [5]:
pretty(duckling("50 cents"))

[
    {
        "body": "50 cents",
        "dim": "amount-of-money",
        "end": 8,
        "latent": false,
        "start": 0,
        "value": {
            "type": "value",
            "unit": "cent",
            "value": 50
        }
    }
]


In [6]:
pretty(duckling("Dec 1990 to March 1987"))

[
    {
        "body": "Dec 1990 to March",
        "dim": "time",
        "end": 17,
        "latent": false,
        "start": 0,
        "value": {
            "from": {
                "grain": "month",
                "value": "1990-12-01T00:00:00.000-08:00"
            },
            "to": {
                "grain": "month",
                "value": "1991-04-01T00:00:00.000-08:00"
            },
            "type": "interval",
            "values": [
                {
                    "from": {
                        "grain": "month",
                        "value": "1990-12-01T00:00:00.000-08:00"
                    },
                    "to": {
                        "grain": "month",
                        "value": "1991-04-01T00:00:00.000-08:00"
                    },
                    "type": "interval"
                }
            ]
        }
    },
    {
        "body": "March 1987",
        "dim": "time",
        "end": 22,
        "latent": false,
        "sta

In [8]:
pretty(duckling("last 5 years"))

[
    {
        "body": "last 5 years",
        "dim": "time",
        "end": 12,
        "latent": false,
        "start": 0,
        "value": {
            "from": {
                "grain": "year",
                "value": "2014-01-01T00:00:00.000-08:00"
            },
            "to": {
                "grain": "year",
                "value": "2019-01-01T00:00:00.000-08:00"
            },
            "type": "interval",
            "values": [
                {
                    "from": {
                        "grain": "year",
                        "value": "2014-01-01T00:00:00.000-08:00"
                    },
                    "to": {
                        "grain": "year",
                        "value": "2019-01-01T00:00:00.000-08:00"
                    },
                    "type": "interval"
                }
            ]
        }
    }
]


In [9]:
pretty(duckling("1992"))

[
    {
        "body": "1992",
        "dim": "number",
        "end": 4,
        "latent": false,
        "start": 0,
        "value": {
            "type": "value",
            "value": 1992
        }
    }
]


In [23]:
pretty(duckling("1990s"))

[
    {
        "body": "1990",
        "dim": "number",
        "end": 4,
        "latent": false,
        "start": 0,
        "value": {
            "type": "value",
            "value": 1990
        }
    }
]


### Enrich a query with a year interval using Duckling and Solr

In [10]:
def enrich_date_interval(q):
    entities = duckling(q)
    enrichment = {"q":q}
    query = ""
    cursor = 0
    for e in entities:
        dim = e["dim"]
        val = e["value"]
        typ = val["type"]
        values = None
        
        if dim=="time" and typ=="interval":
            values = val["values"]
            
        if values:
            #Remove the interval text from the query
            query += q[cursor:e["start"]]
            query += q[e["end"]:]
            cursor = e["end"]

            #Add the filter, convert to q and fq solr queries
            fromdt = solrdate(values[0]["from"]["value"])
            todt = solrdate(values[0]["to"]["value"])
            enrichment["fq"] = "release_date:[" + fromdt + " TO " + todt + "]"
            enrichment["q"]=query
            
    return enrichment

In [11]:
#Solr Client
solr = pysolr.Solr('http://localhost:8983/solr/tmdb')

#Print the Title, Release Date, and Overview from TMDB
def printresults(res):
    for r in res:
        print('\n---')
        print('\n\t'.join([r["title"][0],r["release_date"],r["overview"][0]]))

def solrquery(q):
    return "title_en:("+q+")^1.2 overview_en:("+q+")"
        
#Enrich and Search a text query
def search(query,func=enrich_date_interval):
    enriched = func(query)
    q = solrquery(enriched["q"])
    print("Searching for `" + query + "` ...")
    print("  q = " + q)
    if "fq" in enriched and isinstance(enriched["fq"], str):
        print("  fq = " + enriched["fq"])
        res = solr.search(q=q,fq=enriched["fq"],fl="title,overview,release_date",rows=3)
    else:
        print("  ...no enrichments")
        res = solr.search(q=q,fl="title,overview,release_date",rows=3)

    return res

### Let's try it with some good examples

In [13]:
printresults(search("star trek last 6 years"))

Searching for `star trek last 6 years` ...
  q = title_en:(star trek )^1.2 overview_en:(star trek )
  fq = release_date:[2013-01-01T00:00:00Z TO 2019-01-01T00:00:00Z]

---
Star Trek Into Darkness
	2013-05-05T00:00:00Z
	When the crew of the Enterprise is called back home, they find an unstoppable force of terror from within their own organization has detonated the fleet and everything it stands for, leaving our world in a state of crisis.  With a personal score to settle, Captain Kirk leads a manhunt to a war-zone world to capture a one man weapon of mass destruction. As our heroes are propelled into an epic chess game of life and death, love will be challenged, friendships will be torn apart, and sacrifices must be made for the only family Kirk has left: his crew.

---
Star Trek Beyond
	2016-07-07T00:00:00Z
	The USS Enterprise crew explores the furthest reaches of uncharted space, where they encounter a mysterious new enemy who puts them and everything the Federation stands for to the 

In [17]:
printresults(search("indiana jones 1/1/1980 to 12/31/1987"))

Searching for `indiana jones 1/1/1980 to 12/31/1987` ...
  q = title_en:(indiana jones )^1.2 overview_en:(indiana jones )
  fq = release_date:[1980-01-01T00:00:00Z TO 1988-01-01T00:00:00Z]

---
Indiana Jones and the Temple of Doom
	1984-05-23T00:00:00Z
	After arriving in India, Indiana Jones is asked by a desperate village to find a mystical stone. He agrees – and stumbles upon a secret cult plotting a terrible plan in the catacombs of an ancient palace.

---
Raiders of the Lost Ark
	1981-06-12T00:00:00Z
	When Dr. Indiana Jones – the tweed-suited professor who just happens to be a celebrated archaeologist – is hired by the government to locate the legendary Ark of the Covenant, he finds himself up against the entire Nazi regime.

---
Guyana Tragedy: The Story of Jim Jones
	1980-04-15T00:00:00Z
	The story of the Peoples Temple cult led by Jim Jones and the events leading up to one of the largest mass suicides in history.


In [19]:
printresults(search("harry potter goblet"))

Searching for `harry potter goblet` ...
  q = title_en:(harry potter goblet)^1.2 overview_en:(harry potter goblet)
  ...no enrichments

---
Harry Potter and the Goblet of Fire
	2005-11-05T00:00:00Z
	Harry starts his fourth year at Hogwarts, competes in the treacherous Triwizard Tournament and faces the evil Lord Voldemort. Ron and Hermione help Harry manage the pressure – but Voldemort lurks, awaiting his chance to destroy Harry and all that he stands for.

---
Harry Potter and the Philosopher's Stone
	2001-11-16T00:00:00Z
	Harry Potter has lived under the stairs at his aunt and uncle's house his whole life. But on his 11th birthday, he learns he's a powerful wizard -- with a place waiting for him at the Hogwarts School of Witchcraft and Wizardry. As he learns to harness his newfound powers with the help of the school's kindly headmaster, Harry uncovers the truth about his parents' deaths -- and about the villain who's to blame.

---
A Very Potter Musical
	2009-04-09T00:00:00Z
	In Apri

In [20]:
printresults(search("1992 comedies"))

Searching for `1992 comedies` ...
  q = title_en:(1992 comedies)^1.2 overview_en:(1992 comedies)
  ...no enrichments

---
The Comedians of Comedy
	2005-03-01T00:00:00Z
	The Comedians of Comedy is an occasional stand-up comedy tour featuring Patton Oswalt, Zach Galifianakis, Brian Posehn and Maria Bamford that was documented in a 2005 film and 2005 Comedy Central television series of the same name, both directed by Michael Blieden.

---
The Original Kings of Comedy
	2000-08-18T00:00:00Z
	The Original Kings of Comedy achieves the seemingly impossible task of capturing the rollicking and sly comedy routines of stand-up and sitcom vets Steve Harvey, D.L. Hughley, Cedric the Entertainer, and Bernie Mac and the magic of experiencing a live concert show. Director Spike Lee and his crew plant a multitude of cameras in a packed stadium and onstage (as well as backstage, as they follow the comedians) to catch the vivid immediacy of the show, which is as much about the audience as it is about the

In [21]:
def enrich_date_interval_extended(q):
    entities = duckling(q)
    enrichment = {"q":q}
    query = ""
    cursor = 0
    for e in entities:
        dim = e["dim"]
        val = e["value"]
        typ = val["type"]
        values = None
        
        if dim=="time" and typ=="interval":

            values = val["values"]

        elif dim=="number" and isinstance(val["value"],int) and val["value"]>=1900 and val["value"]<=2019:
            
            values = [{
                    "from": {
                        "grain": "year",
                        "value": str(val["value"]) + "-01-01T00:00:00.000-08:00"
                    },
                    "to": {
                        "grain": "year",
                        "value": str(val["value"]) + "-12-31T23:59:59.999-08:00"
                    },
                    "type": "interval"
                }]
            
        if values:
            #Remove the interval text from the query
            query += q[cursor:e["start"]]
            query += q[e["end"]:]
            cursor = e["end"]

            #Add the filter, convert to q and fq solr queries
            fromdt = solrdate(values[0]["from"]["value"])
            todt = solrdate(values[0]["to"]["value"])
            enrichment["fq"] = "release_date:[" + fromdt + " TO " + todt + "]"
            enrichment["q"]=query
            
    return enrichment

In [22]:
printresults(search("1992 comedies",func=enrich_date_interval_extended))

Searching for `1992 comedies` ...
  q = title_en:( comedies)^1.2 overview_en:( comedies)
  fq = release_date:[1992-01-01T00:00:00Z TO 1992-12-31T23:59:59Z]

---
Year of the Comet
	1992-04-24T00:00:00Z
	Year of the Comet is a 1992 romantic comedy adventure film about the pursuit of the most valuable bottle of wine in history. The title refers to the year it was bottled, 1811, which was known for the Great Comet of 1811, and also as one of the best years in history for European wine.

---
Singles
	1992-09-18T00:00:00Z
	Romantic comedy about six of Seattle's young people, most of whom live in the same apartment building and whose lives revolve around the city's ever-expanding music scene. The interrelated stories about each character's progress through the singles scene are intriguing and often very funny, and the soundtrack is a grunge fanatic's dream, with the likes of Soundgarden, Pearl Jam and Mudhoney.

---
Mistress
	1992-07-24T00:00:00Z
	A comedy about a screenwriter (Wuhl) whose ol