# Imports

In [1]:
import os
import json
import numpy as np
from pathlib import Path
from qbsum.corpora import NewsCorpus
from qbsum.summarizers import MMR

# Setup

In [2]:
with open('directories.json') as f:
        directories = json.load(f)
baseDir = Path(os.getcwd())
corpus = 'news'
documentsDir = baseDir / directories[corpus]['documents']
queriesDir = baseDir / directories[corpus]['queries']
referencesDir = baseDir / directories[corpus]['references']

# Load the test corpus:
corpus = NewsCorpus(documentsDir, queriesDir, referencesDir)

In [3]:
mmr = MMR()
docs = corpus.document_sets()[0]
reference = corpus.references()[0][0]

# Test 1

How different can the query be before it's not the first chosen sentence?

In [4]:
lda = 1.0
L = 50
#query01 = "Bombardier Inc (BBDb.TO) said would sell two businesses million cut jobs it expands divisions company shares fell as much as percent disappointing cash flow forecast"
query01 = "Bombardier Inc (BBDb.TO) said would sell two businesses million cut jobs it expands divisions company shares fell as much as percent disappointing cash flow forecast"
print("LAMBDA:", lda)
print("QUERY:", query01)
print("REFERENCE:"," ".join(reference))

LAMBDA: 1.0
QUERY: Bombardier Inc (BBDb.TO) said would sell two businesses million cut jobs it expands divisions company shares fell as much as percent disappointing cash flow forecast
REFERENCE: Bombardier Inc (BBDb.TO) said on Thursday it would sell two businesses for around $900 million (£689 million) and cut 5,000 jobs and as it expands its stronger corporate jet and rail divisions, but the company's shares fell as much as 26 percent on a disappointing free cash flow forecast. "2018 free cash flow was lower than my expectations," said Morningstar analyst Chris Higgins. Jamie Koutsoukis, a senior analyst at Moody's, said by email she believes Bombardier has "an uncertain ability to generate positive free cash flow in 2019."


In [5]:
summary = mmr.summarize(docs,
                        query01,
                        max_length=L)
print("SUMMARY:"," ".join(summary))

SUMMARY: (Reuters) - Bombardier Inc (BBDb.TO) said on Thursday it would sell two businesses for around $900 million (£689 million) and cut 5,000 jobs and as it expands its stronger corporate jet and rail divisions, but the company's shares fell as much as 26 percent on a disappointing free cash flow forecast.


# Test 2

How long must be the summary it inludes the the correct answer using a normal query?

In [9]:
lda = 1.0
L = 500
query02 = "Bombardier (BBDb.TO) shares fell disappointing"
print("LAMBDA:", lda)
print("QUERY:", query02)
print("REFERENCE:"," ".join(reference))

LAMBDA: 1.0
QUERY: Bombardier (BBDb.TO) shares fell disappointing
REFERENCE: Bombardier Inc (BBDb.TO) said on Thursday it would sell two businesses for around $900 million (£689 million) and cut 5,000 jobs and as it expands its stronger corporate jet and rail divisions, but the company's shares fell as much as 26 percent on a disappointing free cash flow forecast. "2018 free cash flow was lower than my expectations," said Morningstar analyst Chris Higgins. Jamie Koutsoukis, a senior analyst at Moody's, said by email she believes Bombardier has "an uncertain ability to generate positive free cash flow in 2019."


In [10]:
summary = mmr.summarize(docs,
                        query02,
                        max_length=L)
print("SUMMARY:"," ".join(summary))

SUMMARY: (9)	Ratio of new orders received over aircraft deliveries, in units, excluding C Series aircraft orders and deliveries. Reference to generally accepted accounting principles (GAAP) means IFRS, unless indicated otherwise. 2018 guidance(3) updated: Revenues ~$16.5B; EBIT(1) ~$1B; and free cash flow(2) breakeven ±$150M, including net proceeds from Downsview sale (1)	Earnings and EBIT refer to EBIT before special items. Non-GAAP financial measures. * Questions raised over absence of C919 narrowbody jet (Updates with analyst comment) MONTRÉAL, Nov. 08, 2018 (GLOBE NEWSWIRE) -- Bombardier (BBD-B.TO) today reported its third quarter 2018 results marked by strong earnings growth. Group communications coordinator ZHUHAI, China, Nov 8 (Reuters) - China paraded industrial and military clout and sent deliberate signals to Western rivals that its aerospace and arms industries aim to catch them up on world markets, while playing down trade tensions at the nation's largest air show this week