In [1]:
from __future__ import unicode_literals, print_function
import spacy
from spacy import displacy

def load_doc(path):
    with open(path, 'r') as doc:
        return doc.read()

def write_to_file(filename, sentences):
    output = open(filename, 'w')
    for s in sentences:
        output.write(s)
    output.close()

nlp = spacy.load('en_core_web_lg')

In [2]:
# Use regex to clean useless lines
import re

text = load_doc('data/zoom_mda.txt')

# 1. Remove newlines that are truncations of a sentence
text = re.sub(r'([^\n])(\n)([^\n])', r'\1 \3', text)

# 2. Remove page breaks
text = re.sub(r'[0-9\n]+[\* ]+[\n]+\#\#\#\#\# Table of Contents', '', text)

# 3. Remove multiple linebreaks
text = re.sub(r'[\n]+', '\n', text)

# 4. Convert single linebreak into whitespace (this removes paragraph boundaries)
text = re.sub('\n', ' ', text)

# print(text)

In [3]:
# Load text into spacy
doc = nlp(text)

In [4]:
def has_subject(sent):
    # Returns true if complete sentence
    for tok in sent:
        if tok.dep_ in ('subj', 'nsubj'):
            return True
    return False
    
def has_metric(sent):
    # Returns true of sentence contains metric
    for ent in sent.ents:
        if ent.label_ in ('CARDINAL', 'PERCENT', 'MONEY', 'DATE', 'TIME'):
            return True
    return False

s = []

for sent in doc.sents:       
    if not has_subject(sent):    
        continue
        
    if has_metric(sent):
        s.append(sent)
        print(sent)

The last day of our fiscal year is January 31.
Our fiscal quarters end on April 30, July 31, October 31 and January 31.
We connect people through frictionless video, voice, chat and content sharing and enable face-to-face video experiences for thousands of people in a single meeting across disparate devices and locations.
The positive impact we have on our customers and hosts is reflected in our average customer Net Promoter Score (NPS), which was over 70 in 2018.
NPS, which can range from a low of -100 to a high of +100, measures the willingness of customers to recommend a companys products or services to others and is used as a proxy for gauging customers overall satisfaction with a companys product or service and the customers loyalty to the brand.
Our architecture can support tens of thousands of video participants in a single meeting.
Our 13 co-located data centers located worldwide enable us to provide both high-quality and high- definition, real-time video to our customers e

During the fiscal years ended January 31, 2017 and 2018, we repurchased 4,000,000 and 1,365,800 shares of our Series A convertible preferred stock from certain stockholders for an aggregate amount of $15.0 million and $4.6 million, respectively, and subsequently retired such shares.
We believe our existing cash, cash equivalents and marketable securities, together with cash provided by operations, will be sufficient to meet our needs for at least the next 12 months.
**Year Ended January  31,
Net cash provided by operating activities of $9.4 million for the fiscal year ended January 31, 2017 was primarily due to non-cash charges for depreciation and amortization of $1.2 million, amortization of deferred contract acquisition costs, primarily commissions, of $3.1 million, stock-based compensation of $1.0 million and provision for accounts receivable allowances of $0.3 million.
Changes in operating assets and liabilities were favorable to cash flows from operations by $3.7 million primaril

In [5]:
# Let's see how the NER engine does
for i in range(50):
    displacy.render(s[i], style="ent", jupyter=True)