In [1]:
import os

import pandas as pd
from fuzzywuzzy import fuzz

from src.scraping import extract_cnbc_article_info
from src.nlp import nlp_analysis
from src.urls import article_urls

### Get Scrape CNIB Website for Articles

In [30]:
article_list = []
for article_id, article_url in enumerate(article_urls):
    article_text = extract_cnbc_article_info(article_url)
    article_nlp = nlp_analysis(article_text)

    article_list.append({
        **article_text, 
        **article_nlp, 
        'url': article_url, 
        'article_id': article_id
    })


In [10]:
article_list[2]

{'title': "We'll go after big tech if necessary because DC is doing nothing: Arizona attorney general",
 'reporter': 'Michelle Fox',
 'summary': ['Arizona Attorney General Mark Brnovich says he and other state AGs are willing to go after tech giants who "dominate market share."',
  '"We as state AGs we are taking a look at maybe whether we should do something and if so what should be done," he says.',
  'They are doing so because of the "inaction or inability" of Washington D.C. to do anything, he says.'],
 'article': '\nArizona Attorney General Mark Brnovich told CNBC on Friday he is prepared to go after big tech companies.\n\nAnd he\'s not alone.\n"When you have these tech companies dominate the market share, they essentially are akin to the monopolies of old," Brnovich said on "Closing Bell." \n\n"We as state AGs we are taking a look at maybe whether we should do something and if so what should be done."\nBrnovich is one of several state attorneys general who spoke recently to the W

### Correlate with IA Holdings

In [11]:
investment_advisor_holdings = pd.read_csv(os.path.join('data', 'internal', 'investment_advisor_holdings.csv'))

In [12]:
investment_advisor_holdings

Unnamed: 0,investment_advisor,security_name,security_id,market_value
0,Greg Warren,Alphabet Inc,GOOGL,1429000
1,Greg Warren,"Netflix, Inc.",NFLX,2353000
2,Greg Warren,Enbridge Inc,ENB,154000
3,Greg Warren,Royal Bank of Canada,RY,4649000
4,Greg Warren,Suncor Energy Inc.,SU,1999000
5,Greg Warren,Volkswagen AG,VOW3,991000
6,Greg Warren,"Amazon.com, Inc.",AMZN,743000
7,Greg Warren,Toronto-Dominion Bank,TD,3703000
8,Greg Warren,Kraft Heinz Co,KHC,4135000
9,Michelle Burns,"Facebook, Inc.",FB,928000


In [13]:
IA_articles = []

investment_advisors = investment_advisor_holdings['investment_advisor'].unique()
for ia in investment_advisors:
    articles_matched = []
    
    ia_holdings = investment_advisor_holdings[investment_advisor_holdings['investment_advisor']==ia]

    for article in article_list:
        article_match = False
        article_id = article['article_id']
        organizations = [orgs[0] for orgs in article['organizations']]
        for org in organizations:
            for security_name in ia_holdings['security_name'].unique():
                org_sim = fuzz.token_set_ratio(org, security_name)
                if org_sim > 80:
                    articles_matched.append(article_id)
                    article_match = True
                    break
            if article_match:
                break
    IA_articles.append({
        'investment_advisor': ia,
        'matched_articles': articles_matched
    })
     
IA_articles

[{'investment_advisor': 'Greg Warren', 'matched_articles': [0, 3, 4, 5, 6]},
 {'investment_advisor': 'Michelle Burns', 'matched_articles': [1, 2, 3, 6]}]

In [36]:
for ia_article in IA_articles:
    print('Investment Advisor:', ia_article['investment_advisor'])
    print()
    print('# Daily articles')
    matched_articles = ia_article['matched_articles']
    for article in article_list:
        if article['article_id'] in matched_articles:
            print()
            print("##", article['title'])
            print()
            print('Summary:', ' '.join(article['pagerank_summary']))
            print()
            print("Read more:", article['url'])
    print('\n---\n')

Investment Advisor: Greg Warren

# Daily articles

## Steven Spielberg reportedly met with Netflix's Ted Sarandos, sparking hopes for Academy Awards' truce

Summary: Earlier this week, media mogul Jeffrey Katzenberg said that Spielberg had no plan to campaign against Netflix, saying at the South By Southwest conference in Texas that "he is not going to the academy in April with some sort of plan," according to a report by the Hollywood Reporter. Those comments have led some to believe that Spielberg was ready to wage war on platforms like Netflix, especially after the streaming service's film "Roma" took home three Oscars last month. However, Spielberg reportedly met with Netflix's Chief Content Officer Ted Sarandos earlier this week, a sign that, perhaps, these rumors of discord are just that, rumors.

Read more: https://www.cnbc.com/2019/03/15/spielberg-and-netflixs-sarandos-meet-sparking-hope-for-oscars-truce.html

## Facebook stock falls after the executive in charge of all its pro