# 1. Install and Import Baseline Dependencies

In [3]:
#%pip install transformers



In [4]:
from transformers import PegasusTokenizer, PegasusForConditionalGeneration
from bs4 import BeautifulSoup
import requests

# 2. Setup Summarization Model

In [5]:
model_name = "human-centered-summarization/financial-summarization-pegasus"
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name)

# 3. Summarize a Single Article

In [6]:
url = "https://au.finance.yahoo.com/news/china-restricting-tesla-use-uncovers-a-significant-challenge-for-elon-musk-expert-161921664.html"
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
paragraphs = soup.find_all('p')

In [13]:
paragraphs[0].text

"Stay in the loop every day with Yahoo Finance's free Fully Briefed newsletter."

In [26]:
text = [paragraph.text for paragraph in paragraphs]
words = ' '.join(text).split(' ')[:400]
ARTICLE = ' '.join(words)

In [27]:
ARTICLE

'Stay in the loop every day with Yahoo Finance\'s free Fully Briefed newsletter. Renewed political tensions between the U.S. and China — which came to light this week as the Biden administration sat down with their Chinese counterparts for the first time to discuss a range of issues — could ensnarl vehicle maker Tesla (TSLA), which has pushed successfully into China in recent years. In fact, the heightened rhetoric between the two super economic superpowers may have already had blowback on Elon Musk\'s electric car company. The Chinese government is restricting the use of Tesla vehicles by military staff and employees of vital state-owned companies, The Wall Street Journal reported Friday. Chinese officials reportedly have concerns that Tesla\'s cars — outfitted with various data collecting capabilities — could serve as leakers of national security secrets. "I have been saying for months now that Tesla\'s level of integration of their business, of their research, of their sort of geo-t

In [28]:
input_ids = tokenizer.encode(ARTICLE, return_tensors='pt')
output = model.generate(input_ids, max_length=55, num_beams=5, early_stopping=True)
summary = tokenizer.decode(output[0], skip_special_tokens=True)

In [29]:
summary

'Tensions between U.S. and China may have already had blowback. Tesla has been successful in China in recent years'

# 4. Building a News and Sentiment Pipeline

In [30]:
monitored_tickers = ['GME', 'TSLA', 'BTC']

## 4.1. Search for Stock News using Google and Yahoo Finance

In [43]:
def search_for_stock_news_urls(ticker):
    search_url = "https://www.google.com/search?q=yahoo+finance+{}&tbm=nws".format(ticker)
    r = requests.get(search_url)
    soup = BeautifulSoup(r.text, 'html.parser')
    atags = soup.find_all('a')
    hrefs = [link['href'] for link in atags]
    return hrefs 

In [46]:
raw_urls = {ticker:search_for_stock_news_urls(ticker) for ticker in monitored_tickers}
raw_urls

{'GME': ['/?sa=X&ved=0ahUKEwjtkM-dr8DvAhUDqXEKHe7wCNMQOwgC',
  '/?output=search&ie=UTF-8&tbm=nws&sa=X&ved=0ahUKEwjtkM-dr8DvAhUDqXEKHe7wCNMQPAgE',
  '/search?q=yahoo+finance+GME&tbm=nws&ie=UTF-8&gbv=1&sei=c7NWYK2LGYPSxgPu4aOYDQ',
  '/search?q=yahoo+finance+GME&ie=UTF-8&source=lnms&sa=X&ved=0ahUKEwjtkM-dr8DvAhUDqXEKHe7wCNMQ_AUIBygA',
  '/search?q=yahoo+finance+GME&ie=UTF-8&tbm=shop&source=lnms&sa=X&ved=0ahUKEwjtkM-dr8DvAhUDqXEKHe7wCNMQ_AUICSgC',
  '/search?q=yahoo+finance+GME&ie=UTF-8&tbm=vid&source=lnms&sa=X&ved=0ahUKEwjtkM-dr8DvAhUDqXEKHe7wCNMQ_AUICigD',
  '/search?q=yahoo+finance+GME&ie=UTF-8&tbm=isch&source=lnms&sa=X&ved=0ahUKEwjtkM-dr8DvAhUDqXEKHe7wCNMQ_AUICygE',
  'https://maps.google.com/maps?q=yahoo+finance+GME&um=1&ie=UTF-8&sa=X&ved=0ahUKEwjtkM-dr8DvAhUDqXEKHe7wCNMQ_AUIDCgF',
  '/search?q=yahoo+finance+GME&ie=UTF-8&tbm=bks&source=lnms&sa=X&ved=0ahUKEwjtkM-dr8DvAhUDqXEKHe7wCNMQ_AUIDSgG',
  '/advanced_search',
  '/search?q=yahoo+finance+GME&ie=UTF-8&tbm=nws&source=lnt&tbs=qdr:h&sa

In [52]:
raw_urls['GME']

['/?sa=X&ved=0ahUKEwjtkM-dr8DvAhUDqXEKHe7wCNMQOwgC',
 '/?output=search&ie=UTF-8&tbm=nws&sa=X&ved=0ahUKEwjtkM-dr8DvAhUDqXEKHe7wCNMQPAgE',
 '/search?q=yahoo+finance+GME&tbm=nws&ie=UTF-8&gbv=1&sei=c7NWYK2LGYPSxgPu4aOYDQ',
 '/search?q=yahoo+finance+GME&ie=UTF-8&source=lnms&sa=X&ved=0ahUKEwjtkM-dr8DvAhUDqXEKHe7wCNMQ_AUIBygA',
 '/search?q=yahoo+finance+GME&ie=UTF-8&tbm=shop&source=lnms&sa=X&ved=0ahUKEwjtkM-dr8DvAhUDqXEKHe7wCNMQ_AUICSgC',
 '/search?q=yahoo+finance+GME&ie=UTF-8&tbm=vid&source=lnms&sa=X&ved=0ahUKEwjtkM-dr8DvAhUDqXEKHe7wCNMQ_AUICigD',
 '/search?q=yahoo+finance+GME&ie=UTF-8&tbm=isch&source=lnms&sa=X&ved=0ahUKEwjtkM-dr8DvAhUDqXEKHe7wCNMQ_AUICygE',
 'https://maps.google.com/maps?q=yahoo+finance+GME&um=1&ie=UTF-8&sa=X&ved=0ahUKEwjtkM-dr8DvAhUDqXEKHe7wCNMQ_AUIDCgF',
 '/search?q=yahoo+finance+GME&ie=UTF-8&tbm=bks&source=lnms&sa=X&ved=0ahUKEwjtkM-dr8DvAhUDqXEKHe7wCNMQ_AUIDSgG',
 '/advanced_search',
 '/search?q=yahoo+finance+GME&ie=UTF-8&tbm=nws&source=lnt&tbs=qdr:h&sa=X&ved=0ahUKEwjtkM

## 4.2. Strip out unwanted URLs

In [53]:
import re

In [54]:
exclude_list = ['maps', 'policies', 'preferences', 'accounts', 'support','googel.com']

In [56]:
def strip_unwanted_urls(urls, exclude_list):
    val = []
    for url in urls: 
        if 'https://' in url and not any(exclude_word in url for exclude_word in exclude_list):
            res = re.findall(r'(https?://\S+)', url)[0].split('&')[0]
            val.append(res)
    return list(set(val))

In [59]:
cleaned_urls = {ticker:strip_unwanted_urls(raw_urls[ticker], exclude_list) for ticker in monitored_tickers}
cleaned_urls

{'GME': ['https://finance.yahoo.com/news/gamestop-gme-dips-more-broader-224510835.html',
  'https://finance.yahoo.com/news/gamestop-2-460-roller-coaster-131733513.html',
  'https://au.finance.yahoo.com/news/gme-gamestop-surge-230431651.html',
  'https://finance.yahoo.com/news/gamestop-gme-stock-sinks-market-224510746.html',
  'https://au.finance.yahoo.com/news/we-should-see-the-gme-short-squeeze-continuing-s-3-partners-174542296.html',
  'https://au.finance.yahoo.com/news/gamestop-gme-earnings-expected-grow-163004341.html',
  'https://finance.yahoo.com/news/redditors-ryan-cohen-needs-help-183202679.html',
  'https://finance.yahoo.com/news/gamestop-bull-roaring-kitty-isnt-060454056.html',
  'https://finance.yahoo.com/news/bill-gross-says-made-10-184652396.html',
  'https://finance.yahoo.com/news/gamestop-gme-outpaces-stock-market-214509325.html'],
 'TSLA': ['https://au.finance.yahoo.com/news/china-restricting-tesla-use-uncovers-a-significant-challenge-for-elon-musk-expert-161921664.html

## 4.3. Search and Scrape Cleaned URLs

In [63]:
def scrape_and_process(URLs):
    ARTICLES = []
    for url in URLs: 
        r = requests.get(url)
        soup = BeautifulSoup(r.text, 'html.parser')
        paragraphs = soup.find_all('p')
        text = [paragraph.text for paragraph in paragraphs]
        words = ' '.join(text).split(' ')[:350]
        ARTICLE = ' '.join(words)
        ARTICLES.append(ARTICLE)
    return ARTICLES

In [64]:
articles = {ticker:scrape_and_process(cleaned_urls[ticker]) for ticker in monitored_tickers}
articles

{'GME': ["GameStop (GME) closed at $118.18 in the latest trading session, marking a -1.84% move from the prior day. This move lagged the S&P 500's daily loss of 0.81%. At the same time, the Dow lost 0.46%, and the tech-heavy Nasdaq lost 1.69%. Heading into today, shares of the video game retailer had lost 68.7% over the past month, lagging the Retail-Wholesale sector's loss of 1.84% and the S&P 500's gain of 3.56% in that time. Investors will be hoping for strength from GME as it approaches its next earnings release. The company is expected to report EPS of $1.46, up 14.96% from the prior-year quarter. Our most recent consensus estimate is calling for quarterly revenue of $2.24 billion, up 2.19% from the year-ago period. Investors might also notice recent changes to analyst estimates for GME. These revisions typically reflect the latest short-term business trends, which can change frequently. With this in mind, we can consider positive estimate revisions a sign of optimism about the co

In [73]:
articles['TSLA'][2]

'Delta Electronics Inc., a producer of power components for Tesla Inc. (NASDAQ: TSLA) and Apple Inc. (NASDAQ: AAPL), has cut its workforce in China by about half, the Financial Times reported Thursday. What Happened: Yancey Hai, Delta’s chairman, told Financial Times in an interview that the company’s target is to cut its direct labor force in China by 90 percent. Delta is a Taiwanese electronics company that manufactures power components such as cooling fans for notebook computers, solar inverters and motors for electric cars. In 2019, Delta relocated the production of telecom power equipment to Thailand and Taiwan after the U.S. imposed a 25% tax on such goods made in China as part of the trade war, according to FT. The company is now reportedly building four large factories in India to make photovoltaic inverters, industrial automation equipment, and information technology and communications gear. Why It Matters: The fallout from the U.S.-China trade war and soaring production costs

## 4.4. Summarise all Articles

In [69]:
def summarize(articles):
    summaries = []
    for article in articles:
        input_ids = tokenizer.encode(article, return_tensors='pt')
        output = model.generate(input_ids, max_length=55, num_beams=5, early_stopping=True)
        summary = tokenizer.decode(output[0], skip_special_tokens=True)
        summaries.append(summary)
    return summaries

In [74]:
summaries = {ticker:summarize(articles[ticker]) for ticker in monitored_tickers}
summaries

{'GME': ['Video game retailer has lost 68.7% over the past month. Heading into today, shares of the video game retailer had lost 68.7% over the past month.',
  'Video-game retailer to report first-quarter results next week. Options market indicates volatility ahead of earnings',
  'Video game retailer’s shares surge again, but not as much as in January',
  'Video game retailer has gained 417.58% over the past month.',
  'Short interest fell to $2.76 billion on Wednesday. Dusaniwsky says short sellers ‘are waiting in the wings’',
  'Video game retailer is expected to report quarterly earnings of $1.46 per share.',
  'Video game retailer’s stock has lost more than half its value in a decade. Investors need to finish the revolution they started.',
  'Redditor ‘Roaring Kitty’ has been piling into the video game retailer.',
  'Bill Gross says he made about $10 million shorting shares. He’s still selling call options at $250 and $300',
  'Video game retailer has moved 320.11% in the past mon

In [77]:
summaries['BTC']

['Bitcoin falls 6% after hitting new record high over the weekend. Ethereum drops 4.5% amid reports India could ban cryptocurrencies',
 'What is Bitcoin, is it a company stock, or is it a crypto asset?',
 '‘It’s not a great climate thing,’ he says. Coin uses more electricity per transaction than any other method',
 'Fed sees rates close to zero at least through 2023. Morgan Stanley sees cryptocurrency becoming an investable asset',
 'FundStrat sees crypto mining stocks outperforming Bitcoin. ‘Modern-age digital gold rush’ could continue, analyst says',
 'Bank of America says Bitcoin price can be moved 1% for just $93 million. Ethereum may Steal Bitcoin’s thunder before the bull run',
 'Analysts say stimulus cheques helped boost value. Bitcoin has tripled in value over last three months',
 'Bitcoin’s MVRV ratio is not at an extreme: Coin Metrics. Thai police use water cannon to break up protest',
 'Patrick Heusser sees rising wedge as sign of trend fatigue. Bitcoin has climbed from lows

# 5. Adding Sentiment Analysis

In [78]:
from transformers import pipeline
sentiment = pipeline('sentiment-analysis')

In [80]:
sentiment(summaries['BTC'])

[{'label': 'NEGATIVE', 'score': 0.9995866417884827},
 {'label': 'NEGATIVE', 'score': 0.998802125453949},
 {'label': 'NEGATIVE', 'score': 0.9996466040611267},
 {'label': 'NEGATIVE', 'score': 0.9548832178115845},
 {'label': 'NEGATIVE', 'score': 0.9773658514022827},
 {'label': 'NEGATIVE', 'score': 0.9890510439872742},
 {'label': 'POSITIVE', 'score': 0.9615785479545593},
 {'label': 'NEGATIVE', 'score': 0.9898933172225952},
 {'label': 'NEGATIVE', 'score': 0.9990748763084412},
 {'label': 'NEGATIVE', 'score': 0.9585254192352295}]

In [97]:
scores = {ticker:sentiment(summaries[ticker]) for ticker in monitored_tickers}
scores

{'GME': [{'label': 'NEGATIVE', 'score': 0.9995605945587158},
  {'label': 'NEGATIVE', 'score': 0.9946243166923523},
  {'label': 'NEGATIVE', 'score': 0.9979138970375061},
  {'label': 'NEGATIVE', 'score': 0.5935250520706177},
  {'label': 'NEGATIVE', 'score': 0.9984415173530579},
  {'label': 'NEGATIVE', 'score': 0.9877721071243286},
  {'label': 'NEGATIVE', 'score': 0.9997414350509644},
  {'label': 'NEGATIVE', 'score': 0.9411824345588684},
  {'label': 'NEGATIVE', 'score': 0.9984729886054993},
  {'label': 'POSITIVE', 'score': 0.8099527359008789}],
 'TSLA': [{'label': 'POSITIVE', 'score': 0.9909717440605164},
  {'label': 'NEGATIVE', 'score': 0.9971327185630798},
  {'label': 'NEGATIVE', 'score': 0.9996765851974487},
  {'label': 'POSITIVE', 'score': 0.9839766025543213},
  {'label': 'POSITIVE', 'score': 0.9760032892227173},
  {'label': 'NEGATIVE', 'score': 0.9987760186195374},
  {'label': 'NEGATIVE', 'score': 0.9867169260978699},
  {'label': 'POSITIVE', 'score': 0.9996755123138428},
  {'label': 

In [95]:
print(summaries['GME'][3], scores['GME'][3]['label'], scores['GME'][3]['score'])

Video game retailer has gained 417.58% over the past month. NEGATIVE 0.5935250520706177


In [88]:
scores['BTC'][0]['score']

0.9995866417884827

# 6. Exporting Results to CSV

In [98]:
summaries

{'GME': ['Video game retailer has lost 68.7% over the past month. Heading into today, shares of the video game retailer had lost 68.7% over the past month.',
  'Video-game retailer to report first-quarter results next week. Options market indicates volatility ahead of earnings',
  'Video game retailer’s shares surge again, but not as much as in January',
  'Video game retailer has gained 417.58% over the past month.',
  'Short interest fell to $2.76 billion on Wednesday. Dusaniwsky says short sellers ‘are waiting in the wings’',
  'Video game retailer is expected to report quarterly earnings of $1.46 per share.',
  'Video game retailer’s stock has lost more than half its value in a decade. Investors need to finish the revolution they started.',
  'Redditor ‘Roaring Kitty’ has been piling into the video game retailer.',
  'Bill Gross says he made about $10 million shorting shares. He’s still selling call options at $250 and $300',
  'Video game retailer has moved 320.11% in the past mon

In [99]:
scores

{'GME': [{'label': 'NEGATIVE', 'score': 0.9995605945587158},
  {'label': 'NEGATIVE', 'score': 0.9946243166923523},
  {'label': 'NEGATIVE', 'score': 0.9979138970375061},
  {'label': 'NEGATIVE', 'score': 0.5935250520706177},
  {'label': 'NEGATIVE', 'score': 0.9984415173530579},
  {'label': 'NEGATIVE', 'score': 0.9877721071243286},
  {'label': 'NEGATIVE', 'score': 0.9997414350509644},
  {'label': 'NEGATIVE', 'score': 0.9411824345588684},
  {'label': 'NEGATIVE', 'score': 0.9984729886054993},
  {'label': 'POSITIVE', 'score': 0.8099527359008789}],
 'TSLA': [{'label': 'POSITIVE', 'score': 0.9909717440605164},
  {'label': 'NEGATIVE', 'score': 0.9971327185630798},
  {'label': 'NEGATIVE', 'score': 0.9996765851974487},
  {'label': 'POSITIVE', 'score': 0.9839766025543213},
  {'label': 'POSITIVE', 'score': 0.9760032892227173},
  {'label': 'NEGATIVE', 'score': 0.9987760186195374},
  {'label': 'NEGATIVE', 'score': 0.9867169260978699},
  {'label': 'POSITIVE', 'score': 0.9996755123138428},
  {'label': 

In [100]:
cleaned_urls

{'GME': ['https://finance.yahoo.com/news/gamestop-gme-dips-more-broader-224510835.html',
  'https://finance.yahoo.com/news/gamestop-2-460-roller-coaster-131733513.html',
  'https://au.finance.yahoo.com/news/gme-gamestop-surge-230431651.html',
  'https://finance.yahoo.com/news/gamestop-gme-stock-sinks-market-224510746.html',
  'https://au.finance.yahoo.com/news/we-should-see-the-gme-short-squeeze-continuing-s-3-partners-174542296.html',
  'https://au.finance.yahoo.com/news/gamestop-gme-earnings-expected-grow-163004341.html',
  'https://finance.yahoo.com/news/redditors-ryan-cohen-needs-help-183202679.html',
  'https://finance.yahoo.com/news/gamestop-bull-roaring-kitty-isnt-060454056.html',
  'https://finance.yahoo.com/news/bill-gross-says-made-10-184652396.html',
  'https://finance.yahoo.com/news/gamestop-gme-outpaces-stock-market-214509325.html'],
 'TSLA': ['https://au.finance.yahoo.com/news/china-restricting-tesla-use-uncovers-a-significant-challenge-for-elon-musk-expert-161921664.html

In [105]:
range(len(summaries['GME']))

range(0, 10)

In [109]:
summaries['GME'][3]

'Video game retailer has gained 417.58% over the past month.'

In [112]:
def create_output_array(summaries, scores, urls):
    output = []
    for ticker in monitored_tickers:
        for counter in range(len(summaries[ticker])):
            output_this = [
                ticker,
                summaries[ticker][counter],
                scores[ticker][counter]['label'],
                scores[ticker][counter]['score'],
                urls[ticker][counter]
            ]
            output.append(output_this)
    return output

In [113]:
final_output = create_output_array(summaries, scores, cleaned_urls)
final_output

[['GME',
  'Video game retailer has lost 68.7% over the past month. Heading into today, shares of the video game retailer had lost 68.7% over the past month.',
  'NEGATIVE',
  0.9995605945587158,
  'https://finance.yahoo.com/news/gamestop-gme-dips-more-broader-224510835.html'],
 ['GME',
  'Video-game retailer to report first-quarter results next week. Options market indicates volatility ahead of earnings',
  'NEGATIVE',
  0.9946243166923523,
  'https://finance.yahoo.com/news/gamestop-2-460-roller-coaster-131733513.html'],
 ['GME',
  'Video game retailer’s shares surge again, but not as much as in January',
  'NEGATIVE',
  0.9979138970375061,
  'https://au.finance.yahoo.com/news/gme-gamestop-surge-230431651.html'],
 ['GME',
  'Video game retailer has gained 417.58% over the past month.',
  'NEGATIVE',
  0.5935250520706177,
  'https://finance.yahoo.com/news/gamestop-gme-stock-sinks-market-224510746.html'],
 ['GME',
  'Short interest fell to $2.76 billion on Wednesday. Dusaniwsky says sho

In [118]:
final_output.insert(0, ['Ticker', 'Summary', 'Label', 'Confidence', 'URL'])

In [119]:
final_output

[['Ticker', 'Summary', 'Label', 'Confidence', 'URL'],
 ['GME',
  'Video game retailer has lost 68.7% over the past month. Heading into today, shares of the video game retailer had lost 68.7% over the past month.',
  'NEGATIVE',
  0.9995605945587158,
  'https://finance.yahoo.com/news/gamestop-gme-dips-more-broader-224510835.html'],
 ['GME',
  'Video-game retailer to report first-quarter results next week. Options market indicates volatility ahead of earnings',
  'NEGATIVE',
  0.9946243166923523,
  'https://finance.yahoo.com/news/gamestop-2-460-roller-coaster-131733513.html'],
 ['GME',
  'Video game retailer’s shares surge again, but not as much as in January',
  'NEGATIVE',
  0.9979138970375061,
  'https://au.finance.yahoo.com/news/gme-gamestop-surge-230431651.html'],
 ['GME',
  'Video game retailer has gained 417.58% over the past month.',
  'NEGATIVE',
  0.5935250520706177,
  'https://finance.yahoo.com/news/gamestop-gme-stock-sinks-market-224510746.html'],
 ['GME',
  'Short interest f

In [121]:
import csv
with open('assetsummaries.csv', mode='w', newline='') as f:
    csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    csv_writer.writerows(final_output)