In [2]:
import os

from IPython.display import display, Markdown
from pprint import pprint
import pandas as pd
from fuzzywuzzy import fuzz

from src.urls import cbc_urls
from src.scraping import extract_cbc_article_info
from src.nlp import nlp_analysis

In [3]:
cbc_urls

['https://www.cbc.ca/news/business/starbucks-greener-cup-1.5063861',
 'https://www.cbc.ca/news/business/rogers-media-magazines-1.5064054',
 'https://www.cbc.ca/news/business/budget-cmhc-home-buyers-1.5063204',
 'https://www.cbc.ca/news/business/eu-regulators-fine-google-online-ads-1.5063806',
 'https://www.cbc.ca/news/canada/nova-scotia/air-canada-max-8s-grounded-july-1-1.5062354',
 'https://www.cbc.ca/news/business/shoppers-drug-mart-superstore-self-checkout-loblaw-1.5056800',
 'https://www.cbc.ca/news/business/volkswagen-charged-with-defrauding-investors-1.5058925',
 'https://www.cbc.ca/news/technology/facebook-instagram-outage-cause-1.5056807']

### Scrape CBC Website for Articles and Perform NLP

In [4]:
articles = []
for article_id, article_url in enumerate(cbc_urls):
    article_text = extract_cbc_article_info(article_url)
    article_nlp = nlp_analysis(article_text)

    articles.append({
        'article_id': article_id,
        **article_text, 
        **article_nlp, 
    })


In [5]:
pprint(articles)

[{'article': ' Starbucks\xa0announced Wednesday\xa0it will pilot\xa0new '
             'greener to-go cups\xa0this year in Vancouver that will be both '
             'recyclable and compostable.\xa0 . Vancouver will join New York, '
             'San Francisco, Seattle and London to trial different cup options '
             'that will be\xa0chosen from the NextGen Cup Challenge winners '
             'that were announced earlier this month. . "We know how important '
             'this issue is to Canadians," said Michael Conway, executive vice '
             'president and president of Starbucks Canada in a media release. '
             '"We\'re committed to being a part of the solution.\xa0I\'m '
             'excited and proud that our customers in Vancouver will be among '
             'the first to sip coffee from a greener to-go cup." . In addition '
             'to the greener cups, the coffee company will roll out new '
             'recyclable strawless lids to stores across

### Correlate with IA Holdings

In [6]:
investment_advisor_holdings = pd.read_csv(
    os.path.join('data', 'internal', 'investment_advisor_holdings.csv'))

In [7]:
investment_advisor_holdings

Unnamed: 0,investment_advisor,security_name,security_id,market_value
0,Greg Warren,Alphabet Inc,GOOGL,1429000
1,Greg Warren,"Netflix, Inc.",NFLX,2353000
2,Greg Warren,Air Canada,AC,154000
3,Greg Warren,Royal Bank of Canada,RY,4649000
4,Greg Warren,Suncor Energy Inc.,SU,1999000
5,Greg Warren,Volkswagen AG,VOW3,991000
6,Greg Warren,"Amazon.com, Inc.",AMZN,743000
7,Greg Warren,Toronto-Dominion Bank,TD,3703000
8,Greg Warren,Kraft Heinz Co,KHC,4135000
9,Michelle Burns,"Facebook, Inc.",FB,928000


In [8]:
IA_articles = []

investment_advisors = investment_advisor_holdings['investment_advisor'].unique()
for ia in investment_advisors:
    articles_matched = []
    
    ia_holdings = investment_advisor_holdings[
        investment_advisor_holdings['investment_advisor']==ia]

    for article in articles:
        article_match = False
        article_id = article['article_id']
        organizations = [orgs[0] for orgs in article['organizations']]
        for org in organizations:
            for security_name in ia_holdings['security_name'].unique():
                org_sim = fuzz.token_set_ratio(org, security_name)
                if org_sim >= 80:
                    articles_matched.append(article_id)
                    article_match = True
                    break
            if article_match:
                break
    IA_articles.append({
        'investment_advisor': ia,
        'matched_articles': articles_matched
    })

In [9]:
IA_articles

[{'investment_advisor': 'Greg Warren', 'matched_articles': [2, 3, 4, 6, 7]},
 {'investment_advisor': 'Michelle Burns', 'matched_articles': [1, 5, 6, 7]}]

### Generate Markdown 

In [10]:
for ia_article in IA_articles:
    ia_name = ia_article['investment_advisor']
    ia_holdings = investment_advisor_holdings[
        investment_advisor_holdings['investment_advisor']==ia_name]
    
    display(Markdown(f"# Daily recommended articles for {ia_name}"))
    display(Markdown(f'**Holdings**: *{", ".join(ia_holdings["security_name"].tolist())}*'))

    matched_articles = ia_article['matched_articles']
    for article in articles:
        if article['article_id'] in matched_articles:
            orgs = [org for org, rank in article['organizations']]
            display(Markdown(f"### { article['title']}"))
            display(Markdown(f"**Summary**: {' '.join(article['pagerank_summary'])}"))
            display(Markdown(f"**Organizations**: {', '.join(orgs)}"))
            display(Markdown(f"**Read more**: {article['url']}"))
  

# Daily recommended articles for Greg Warren

**Holdings**: *Alphabet Inc, Netflix, Inc., Air Canada, Royal Bank of Canada, Suncor Energy Inc., Volkswagen AG, Amazon.com, Inc., Toronto-Dominion Bank, Kraft Heinz Co*

### Buying a home? CMHC could soon kick in 10% of the cost — for a price

**Summary**: Worse still, he says if it's done poorly Wright said the program has the potential to undo some of the sensible market cooling measures Ottawa has implemented in recent years: capping loan terms, setting minimum down payment levels and introducing mortgage 'stress tests' last year. The budget is far from clear on how much the buyer would owe; is it the same dollar amount the CMHC provided up front, or does the bill go up based on how much the house has appreciated in value? In addition to those stipulations, the program caps out at four times the applicant's annual income, which means it can only help homeowners looking to buy properties where the mortgage value plus the CMHC loan don't exceed $480,000.

**Organizations**: CMHC, RRSP, the First Time Home Buyer Incentive, the Canada Mortgage and Housing Corporation, Crown, Deloitte, the Royal Bank of Canada, the Canadian Centre for Policy Alternatives, Options For Homes, the Home Buyer's Plan

**Read more**: https://www.cbc.ca/news/business/budget-cmhc-home-buyers-1.5063204

### EU regulators fine Google $1.68B US for abusing online ads market

**Summary**: Microsoft filed an EU antitrust complaint about the service in 2009 and the EU Commission formally launched its probe in 2016, although it said at the time that Google had already made some changes to allow affected customers more freedom to show competing ads. "Today's decision is about how Google abused its dominance to stop websites using brokers other than the AdSense platform," Vestager said. The commission found Google and its parent company, Alphabet, breached EU antitrust rules by imposing restrictive clauses in contracts with websites that used AdSense, preventing Google rivals from placing their ads on these sites.

**Organizations**: Google, EU, European Union, Alphabet, Microsoft, the EU Commission, Android

**Read more**: https://www.cbc.ca/news/business/eu-regulators-fine-google-online-ads-1.5063806

### Air Canada grounds Boeing Max 8s until at least July 1

**Summary**: On Tuesday, Transport Canada said it would send a team to assist the U.S. Federal Aviation Administration in evaluating proposed design changes to update the software on the grounded Max jets. Originally, she was told she'd have to pay a fee to change her flight, but after the planes were grounded, she was offered the change for free, though her flight will now include a stopover in Toronto. The changes come on the heels of Transport Canada's decision to close Canadian airspace to the aircraft after a Max 8 jet operated by Ethiopian Airlines crashed on March 10, killing all 157 people on board, including 18 Canadians.

**Organizations**: Air Canada, FAA, WestJet, Transport Canada, Boeing, Transport Canada's, Ethiopian Airlines, Calif. Customers, Air Transat, Ann de Ste Croix

**Read more**: https://www.cbc.ca/news/canada/nova-scotia/air-canada-max-8s-grounded-july-1-1.5062354

### U.S. regulators charge Volkswagen, former CEO with defrauding investors

**Summary**: The charges from the U.S. Securities and Exchange Commission come two years after the German automaker settled with the U.S. over criminal and civil charges, as the company tries to distance itself from one if its darkest eras. In 2016 the Justice Department sued Volkswagen over the emissions-cheating software and the Federal Trade Commission sued the company, saying it made false claims in commercials promoting its "Clean Diesel" vehicles as environmentally friendly. The SEC said that between April 2014 and May 2015, Volkswagen issued more than $13 billion US in bonds and asset-backed securities in U.S. markets when senior executives knew that more than 500,000 vehicles in the country grossly exceeded legal vehicle emissions limits.

**Organizations**: Volkswagen, SEC, Winterkorn, the U.S. Securities and Exchange Commission, the Environmental Protection Agency, the Justice Department, the Federal Trade Commission, the Department of Justice, District Court, Volkswagen AG

**Read more**: https://www.cbc.ca/news/business/volkswagen-charged-with-defrauding-investors-1.5058925

### Facebook blames long outage on 'server configuration change'

**Summary**: It also said it was considering whether to refund advertisers for lost exposure due to the problems, which internet outage trackers showed affected users in Europe, Japan, and North and South America.  Facebook Inc. said on Thursday it had restored service to its main app and Instagram, after the world's largest social network suffered a major outage that frustrated users across the globe for about 24 hours. Media reports earlier said millions of users were affected, and thousands took to Twitter on Wednesday and Thursday to complain under the hashtag #facebookdown.

**Organizations**: Facebook, Facebook Inc., Twitter, DownDetector, BBC, Reuters, the New York Times, Amazon.com Inc., Apple Inc., the U.S. Federal Trade Commission

**Read more**: https://www.cbc.ca/news/technology/facebook-instagram-outage-cause-1.5056807

# Daily recommended articles for Michelle Burns

**Holdings**: *Facebook, Inc. , Loblaw Companies Ltd, Rogers Communications Inc, Freshii Inc, Tesla Inc, Restaurant Brands International Inc, Volkswagen AG*

### Rogers Media sells Maclean's, Chatelaine and other magazines to Toronto Life publisher

**Summary**: St Joseph's media calls itself Canada's biggest privately owned print, media and communications company, and their best known product is likely the magazine Toronto Life.  Rogers Media has struck a deal with the company that publishes Toronto Life to sell the company's remaining magazine brands for an undisclosed sum. The company says it plans to develop and grow the magazine brands "that Canadians have come to know and love."

**Organizations**: Toronto Life, Rogers Media, Rogers, St. Joseph Communications, Maclean's, Rogers Media Publishing, Maclean-Hunter, MoneySense, Ratehub Inc., FASHION Magazine

**Read more**: https://www.cbc.ca/news/business/rogers-media-magazines-1.5064054

### Superstore, Shoppers Drug Mart customers say they were forced to use self-checkout

**Summary**: "They're forcing me to use it and I don't think that's fair," said Linda Chaikowski, who was directed to self-checkout last week at a Shoppers in Winnipeg. "I find it distasteful that I can't even get that basic level of customer service," said Kaye, who grudgingly used the self-checkout machine to pay for his purchase. The two Walmart customers said their store now appears to have backtracked on not providing cashiers at certain hours.

**Organizations**: Superstore, Shoppers Drug Mart, Real Canadian Superstore, CBC News, Walmart, Loblaw Co., Loblaws, Walmart Canada, Winder, the Retail Advisors Network

**Read more**: https://www.cbc.ca/news/business/shoppers-drug-mart-superstore-self-checkout-loblaw-1.5056800

### U.S. regulators charge Volkswagen, former CEO with defrauding investors

**Summary**: The charges from the U.S. Securities and Exchange Commission come two years after the German automaker settled with the U.S. over criminal and civil charges, as the company tries to distance itself from one if its darkest eras. In 2016 the Justice Department sued Volkswagen over the emissions-cheating software and the Federal Trade Commission sued the company, saying it made false claims in commercials promoting its "Clean Diesel" vehicles as environmentally friendly. The SEC said that between April 2014 and May 2015, Volkswagen issued more than $13 billion US in bonds and asset-backed securities in U.S. markets when senior executives knew that more than 500,000 vehicles in the country grossly exceeded legal vehicle emissions limits.

**Organizations**: Volkswagen, SEC, Winterkorn, the U.S. Securities and Exchange Commission, the Environmental Protection Agency, the Justice Department, the Federal Trade Commission, the Department of Justice, District Court, Volkswagen AG

**Read more**: https://www.cbc.ca/news/business/volkswagen-charged-with-defrauding-investors-1.5058925

### Facebook blames long outage on 'server configuration change'

**Summary**: It also said it was considering whether to refund advertisers for lost exposure due to the problems, which internet outage trackers showed affected users in Europe, Japan, and North and South America.  Facebook Inc. said on Thursday it had restored service to its main app and Instagram, after the world's largest social network suffered a major outage that frustrated users across the globe for about 24 hours. Media reports earlier said millions of users were affected, and thousands took to Twitter on Wednesday and Thursday to complain under the hashtag #facebookdown.

**Organizations**: Facebook, Facebook Inc., Twitter, DownDetector, BBC, Reuters, the New York Times, Amazon.com Inc., Apple Inc., the U.S. Federal Trade Commission

**Read more**: https://www.cbc.ca/news/technology/facebook-instagram-outage-cause-1.5056807