In [7]:
from datetime import datetime
from elasticsearch_dsl import DocType, Date, Integer, Keyword, Text, connections
import pandas as pd


In [2]:
# Define a default Elasticsearch client
connections.create_connection(hosts=['localhost'])

<Elasticsearch([{'host': 'localhost'}])>

In [21]:
# create the mappings in elasticsearch
Article.init()

q_a_df = pd.read_csv('../minerva/data/q_a_all.csv')

q_a_df.tail()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Q,A,SubjectOfBusinessTitle,FloorLanguage,date,personSpeaking
27367,27367,27367,"BOS Mr. Speaker, yesterday, an American giant ...","BOS Mr. Speaker, I would like to thank the hon...",International Trade,FR,2016-02-04,"Ms. Brigitte Sansoucy (Saint-Hyacinthe—Bagot, ..."
27368,27368,27368,"BOS Mr. Speaker, the government has agreed to ...","BOS Mr. Speaker, that could not be further fro...",Employment Insurance,FR,2013-02-01,Hon. Stéphane Dion (Saint-Laurent—Cartierville...
27369,27369,27369,"BOS Mr. Speaker, while SMEs like the businesse...","BOS Mr. Speaker, we know that it is very impor...",Taxation,FR,2017-02-24,Mr. Alexandre Boulerice (Rosemont—La Petite-Pa...
27370,27370,27370,"BOS Mr. Speaker, as researcher Alain Deneault ...","BOS Mr. Speaker, our government is committed t...",Taxation,FR,2017-02-24,"Hon. Bill Morneau (Minister of Finance, Lib.)"
27371,27371,27371,"BOS Mr. Speaker, Canadians are tired of the cu...","BOS Mr. Speaker, I was very pleased last year ...",Government Accountability,EN,2017-02-24,"Ms. Cheryl Hardcastle (Windsor—Tecumseh, NDP)"


In [31]:

index = 'hansard'

class Article(DocType):
    title = Text(analyzer='snowball', fields={'raw': Keyword()})
    body = Text(analyzer='snowball')
#     answer = Text(analyzer='snowball')
    personSpeaking = Text()
    floorLanguage = Text()
    tags = Keyword()
    published_from = Date()
    lines = Integer()

    class Meta:
        index = 'hansard'

    def save(self, ** kwargs):
        self.lines = len(self.body.split())
        return super(Article, self).save(** kwargs)

    def is_published(self):
        return datetime.now() > self.published_from



In [53]:
from elasticsearch_dsl import Index, DocType, Text, analyzer


articles = Index(index)

# delete the index, ignore if it doesn't exist
articles.delete(ignore=404)

# # create the index in elasticsearch
# blogs.create()

# register a doc_type with the index
articles.doc_type(Article)


# You can attach custom analyzers to the index
analyzer = analyzer('customer_analyzer',
    tokenizer="standard",
    filter=["standard", "lowercase", "stop", "snowball"],
    char_filter=["html_strip"]
)

articles.analyzer(analyzer)

In [54]:
%%time


# create and save and article
for index, row in q_a_df.iterrows():
#     print (row['Q'], row['A'])
    article = Article(title=row['SubjectOfBusinessTitle'], tags=['test'])
    article.body = row['Q'] + ' | ' + row['A']
    article.floorLanguage = row['FloorLanguage']
    article.personSpeaking = row['personSpeaking']
    article.published_from = row['date']
    article.save()
#     break
    


CPU times: user 29.8 s, sys: 1.33 s, total: 31.1 s
Wall time: 3min 32s


In [74]:
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search

client = Elasticsearch()

def search(term):
    fields = ['title', 'body']

    s = Search().using(client).query('match',  body=term)
    response = s.execute()
    print('Total %d hits found.' % response.hits.total)
    for hit in response:
        print('/%s/%s/%s returned with score %f' % (hit.meta.index, 
                                                    hit.meta.doc_type, 
                                                    hit.meta.id, 
                                                    hit.meta.score))
        print(hit.title, hit.body)

search('highest standard of openness')

# Display cluster health
print(connections.get_connection().cluster.health())

Total 26387 hits found.
/hansard/doc/m0bG32EBTk4QDs3iyorS returned with score 14.441756
Foreign Affairs BOS Mr. Speaker, in 2014, the Prime Minister had this to say about the deployment of troops to fight ISIS, “Canadians expect the highest standard of openness and honesty from a leader who wants to send our forces to war. EOS BOS ” He asked the government to allow the House of Commons to debate and vote on the issue, and that is what the previous government did. EOS BOS Will the Prime Minister walk the talk and allow debate and a vote in the House? EOS | BOS Mr. Speaker, our government is committed to actively contributing to greater security and peace in the world, and we welcome a healthy debate both in the House and with Canadians. EOS BOS During my consultations with the defence policy review, we heard from Canadians, and peace operations was at the top there. EOS BOS Members should keep in mind that peace operations are just one small part. EOS BOS We have to look at conflict med