In [None]:
# !pip install transformers[torch]
# !pip install sentence_transformers

In [None]:
from transformers import AutoTokenizer
from sentence_transformers import SentenceTransformer
import json
import sys
import torch
import requests

In [None]:
device = 'cpu'

mpnet = SentenceTransformer('stsb-mpnet-base-v2')
distilroberta = SentenceTransformer('stsb-distilroberta-base-v2')
mpnet.max_seq_length = 512
distilroberta.max_seq_length = 512
usenc = SentenceTransformer('distiluse-base-multilingual-cased-v1', device=device)
usenc.max_seq_length = 512

In [None]:
seq = "13 cases of measles, 2 new cases of measles were contracted from abroad Published Sunday, February 24, 2019 12:57PM PST  Last Updated Sunday, February 24, 2019 8:06PM PST  There are now 13 people with the measles, according to health officials in Vancouver. Vancouver Coastal Health said the two new cases are not related to the outbreak involving two French language schools. The health authority said the two people were infected while they were travelling, but has only specified that one of them went to the Philippines. A spokesperson with the Ministry of Health confirmed to CTV News the second person had also travelled to the island nation.  VCH said the first measles patient arrived at the YVR on Feb. 11 on Philippine Airlines flight PR 116 from Manila and had a one-night layover. The patient left Vancouver to Edmonton on Air Canada Flight AC236 on Feb. 12. \"The safety and security of our passengers, employees and community is our top priority. We recommend anyone who was on the flights or in the terminal at YVR on those days and may be experiencing symptoms to contact 811 to talk to a nurse,\" YVR spokesperson Chris Devauld said in an e-mail. The second measles patient is a resident of the Lower Mainland and was infectious when they went to a restaurant and mall in Richmond, according to the health authority. VCH identified several places where people may have been exposed to the highly contagious virus, including: YVR, international arrivals and customs area between 4 and 7 p.m. on Feb. 11 Shuttle bus from YVR to La Quinta Inn in Richmond between 5 to 7 p.m. on Feb. 11 Shuttle bus from YVR to La Quinta Inn in Richmond between 8 and 10:30 a.m. on Feb. 12 YVR, Air Canada domestic departures area between 9 a.m. and noon on Feb. 12 Little Onpin Restaurant in Richmond between 6 and 10 p.m. on Feb. 16 Toys R Us at Lansdowne Centre between 11 a.m. and 9 p.m. on Feb. 17 Toys R Us at Lansdowne Centre between 11 a.m. and 9 p.m. on Feb. 18 'We can do better'  Health officials said those who have never had the measles or did not have the two doses of the vaccine are at the highest risk of contracting the virus. They encourage people to get fully immunized, adding the shot is free for everyone. Health Minister Adrian Dix assures all British Columbians that there is enough vaccine to deal with the demand. \"Frankly, people shouldn't be getting measles in the 21st century in British Columbia,\" he said. \"While our healthcare system is doing very well, I think can we do better and we will do better.\" Health Minister Dix says BC is in communications with other provinces and Washington State about the measles outbreak. Says 12 cases is too many and again implores people to get vaccinated @CTVVancouverpic.twitter.com/c1NjsbpeNi Other cases linked to French schools  On Friday, Vancouver Coastal Health announced there was a total of 10 cases that were \"very clearly associated\" with the schools, Ecole Secondaire Jules-Verne and Ecole Rose-Des-Vents. Officials said 33 students at the two schools have been ordered to stay home because they haven't provided proof of measles vaccination.  The other three cases were contracted while travelling and one unrelated case was acquired locally, according to VCH. Health officials are reminding people they can get vaccinations by visiting their family doctor, a walk-in clinic or urgent primary care centre, or a pharmacy for adults or children over five years old. Calling ahead is also recommended."

In [None]:
mpnet_embedding = mpnet.encode(seq)
distilroberta_embedding = distilroberta.encode(seq)
use_embedding = usenc.encode(seq)

In [None]:
query_mpnet = {"size":10, 
         "_source": "content_t",
         "query": {
            "script_score": {
              "query" : {
                "match_all" : {}
                },
              "script": {
                "source": "(1.0+cosineSimilarity(params.queryVector, 'mpnet_embedding'))",
                "params": {
                  "queryVector": mpnet_embedding.tolist()
                    }
                }
            }
          }
        }

In [None]:
query_distilroberta = {"size":10, 
         "_source": "content_t",
         "query": {
            "script_score": {
              "query" : {
                "bool":{
                  "filter":{
                    "match":{
                      "content_t":"Vancouver"
                      }
                    }
                  }
                },
              "script": {
                "source": "(1.0+cosineSimilarity(params.queryVector, 'distilroberta_embedding'))",
                "params": {
                  "queryVector": distilroberta_embedding.tolist()
                    }
                }
            }
          }
        }

In [None]:
query_usenc = {"size":10, 
         "_source": "content_t",
         "query": {
            "script_score": {
              "query" : {
                "match_all" : {}
                },
              "script": {
                "source": "(1.0+cosineSimilarity(params.queryVector, 'use_embedding'))",
                "params": {
                  "queryVector": use_embedding.tolist()
                    }
                }
            }
          }
        }

In [None]:
response = requests.get('http://localhost:9200/test/_search?pretty', json=query_mpnet)
print(json.dumps(response.json(), indent=1))

In [None]:
response = requests.get('http://localhost:9200/test/_search', json=query_distilroberta)
print(json.dumps(response.json(), indent=1))

In [None]:
response = requests.get('http://localhost:9200/test/_search', json=query_usenc)
print(json.dumps(response.json(), indent=1))

In [None]:
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division, print_function, unicode_literals

from sumy.parsers.html import HtmlParser
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lex_rank import LexRankSummarizer as Summarizer
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words

LANGUAGE = "english"
SENTENCES_COUNT = 7

import nltk; nltk.download('punkt')

In [None]:
parser = PlaintextParser.from_string(seq, Tokenizer(LANGUAGE))
stemmer = Stemmer(LANGUAGE)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
summarization = ""
for sentence in summarizer(parser.document, SENTENCES_COUNT):
    summarization += str(sentence)
    summarization += " "

In [None]:
summarization

In [None]:
mpnet_embedding_extract = mpnet.encode(summarization)
distilroberta_embedding_extract = distilroberta.encode(summarization)

In [None]:
query_mpnet_extract = {"size":10, 
         "_source": "content_t",
         "query": {
            "script_score": {
              "query" : {
                "match_all" : {}
                },
              "script": {
                "source": "(1.0+cosineSimilarity(params.queryVector, 'mpnet_embedding_extract'))",
                "params": {
                  "queryVector": mpnet_embedding_extract.tolist()
                    }
                }
            }
          }
        }

In [None]:
query_distilroberta_extract = {"size":10, 
         "_source": "content_t",
         "query": {
            "script_score": {
              "query" : {
                "match_all" : {}
                },
              "script": {
                "source": "(1.0+cosineSimilarity(params.queryVector, 'distilroberta_embedding_extract'))",
                "params": {
                  "queryVector": distilroberta_embedding_extract.tolist()
                    }
                }
            }
          }
        }

In [None]:
response = requests.get('http://localhost:9200/test/_search', json=query_mpnet_extract)
print(json.dumps(response.json(), indent=1))

In [None]:
response = requests.get('http://localhost:9200/test/_search', json=query_distilroberta_extract)
print(json.dumps(response.json(), indent=1))