# Imports

In [51]:
from transformers import PegasusTokenizer, PegasusForConditionalGeneration, pipeline
from bs4 import BeautifulSoup
import requests
import re
import csv

# Summarization Model

In [5]:
# Let's load the model and the tokenizer 
model_name = "human-centered-summarization/financial-summarization-pegasus"
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name)

Downloading:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

# Summarizing a Single Article

In [8]:
url = "https://finance.yahoo.com/news/ftx-creditors-1-million-bankruptcy-142549111.html?guccounter=1&guce_referrer=aHR0cHM6Ly93d3cuZ29vZ2xlLmNvbS8&guce_referrer_sig=AQAAABj5_2d_pwiCQiOBAoEIUi_AbR4oGf5PSY39uc9K4tARJnZWzFIftaqrk7Fc7ZnGnk9gfZ2ek80FuAU3EEZWA0KKROOWeh962FZWUlJM-9IfcRpM_eBahEGjRRsIhMXLTW5TU-BbvPa1OrusN3Prfzz0k5kAN5nSXz4b6UF3ax0G"
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
paragraphs = soup.find_all('p')

In [11]:
paragraphs[0].text

"FTX may have more than 1 million creditors, a new court document in the crypto exchange's bankruptcy case shows, ten times more than what the company said it had when it filed for Chapter 11 on Friday."

In [10]:
text = [paragraph.text for paragraph in paragraphs]
words = ' '.join(text).split(' ')[:400]
ARTICLE = ' '.join(words)

In [13]:
ARTICLE

'FTX may have more than 1 million creditors, a new court document in the crypto exchange\'s bankruptcy case shows, ten times more than what the company said it had when it filed for Chapter 11 on Friday. "As set forth in the Debtors\' petitions, there are over one hundred thousand creditors in these Chapter 11 cases. In fact, there could be more than one million creditors in these Chapter 11 Cases," FTX legal counsel said in the document filed Tuesday. The revelation underscores concerns among analysts and industry watchers over how wide and far FTX\'s troubles could spread in the crypto industry. "This is a huge black mark for the industry," Devin Ryan, director of financial technology research with JMP Securities, told Yahoo Finance. "And there\'s still a heck of a lot of uncertainty in terms of where we go from here, where the money trail goes." Since the FTX saga began over the past week, bitcoin has sold off by more than 14% from $19,717 to $16,847, while the total market capitali

In [16]:
input_ids = tokenizer.encode(ARTICLE, return_tensors='pt')
output = model.generate(input_ids, max_length=100, num_beams=5, early_stopping=True)
summary = tokenizer.decode(output[0], skip_special_tokens=True)

In [17]:
summary

'FTX may have more than 1 million creditors, court document shows. Bitcoin has fallen by more than 14% since FTX filed for bankruptcy'

# Build a News and Sentiment Pipeline

In [18]:
monitored_tickers = ['ETH', 'FTX', 'BTC']

In [19]:
def search_for_stock_news_urls(ticker):
    search_url = "https://www.google.com/search?q=yahoo+finance+{}&tbm=nws".format(ticker)
    r = requests.get(search_url)
    soup = BeautifulSoup(r.text, 'html.parser')
    atags = soup.find_all('a')
    hrefs = [link['href'] for link in atags]
    return hrefs 

In [20]:
raw_urls = {ticker:search_for_stock_news_urls(ticker) for ticker in monitored_tickers}

In [21]:
raw_urls

{'ETH': ['/?sa=X&ved=0ahUKEwjgztnl7rD7AhXNumMGHTNFDzMQOwgC',
  '/search?q=yahoo+finance+ETH&tbm=nws&ie=UTF-8&gbv=1&sei=DOFzY-CYFM31juMPs4q9mAM',
  '/search?q=yahoo+finance+ETH&ie=UTF-8&source=lnms&sa=X&ved=0ahUKEwjgztnl7rD7AhXNumMGHTNFDzMQ_AUIBSgA',
  '/search?q=yahoo+finance+ETH&ie=UTF-8&tbm=vid&source=lnms&sa=X&ved=0ahUKEwjgztnl7rD7AhXNumMGHTNFDzMQ_AUIBygC',
  '/search?q=yahoo+finance+ETH&ie=UTF-8&tbm=isch&source=lnms&sa=X&ved=0ahUKEwjgztnl7rD7AhXNumMGHTNFDzMQ_AUICCgD',
  'https://maps.google.com/maps?q=yahoo+finance+ETH&um=1&ie=UTF-8&sa=X&ved=0ahUKEwjgztnl7rD7AhXNumMGHTNFDzMQ_AUICSgE',
  '/search?q=yahoo+finance+ETH&ie=UTF-8&tbm=shop&source=lnms&sa=X&ved=0ahUKEwjgztnl7rD7AhXNumMGHTNFDzMQ_AUICigF',
  '/search?q=yahoo+finance+ETH&ie=UTF-8&tbm=bks&source=lnms&sa=X&ved=0ahUKEwjgztnl7rD7AhXNumMGHTNFDzMQ_AUICygG',
  '/advanced_search',
  '/search?q=yahoo+finance+ETH&ie=UTF-8&tbm=nws&source=lnt&tbs=qdr:h&sa=X&ved=0ahUKEwjgztnl7rD7AhXNumMGHTNFDzMQpwUIDQ',
  '/search?q=yahoo+finance+ETH&ie=U

In [22]:
raw_urls['ETH']

['/?sa=X&ved=0ahUKEwjgztnl7rD7AhXNumMGHTNFDzMQOwgC',
 '/search?q=yahoo+finance+ETH&tbm=nws&ie=UTF-8&gbv=1&sei=DOFzY-CYFM31juMPs4q9mAM',
 '/search?q=yahoo+finance+ETH&ie=UTF-8&source=lnms&sa=X&ved=0ahUKEwjgztnl7rD7AhXNumMGHTNFDzMQ_AUIBSgA',
 '/search?q=yahoo+finance+ETH&ie=UTF-8&tbm=vid&source=lnms&sa=X&ved=0ahUKEwjgztnl7rD7AhXNumMGHTNFDzMQ_AUIBygC',
 '/search?q=yahoo+finance+ETH&ie=UTF-8&tbm=isch&source=lnms&sa=X&ved=0ahUKEwjgztnl7rD7AhXNumMGHTNFDzMQ_AUICCgD',
 'https://maps.google.com/maps?q=yahoo+finance+ETH&um=1&ie=UTF-8&sa=X&ved=0ahUKEwjgztnl7rD7AhXNumMGHTNFDzMQ_AUICSgE',
 '/search?q=yahoo+finance+ETH&ie=UTF-8&tbm=shop&source=lnms&sa=X&ved=0ahUKEwjgztnl7rD7AhXNumMGHTNFDzMQ_AUICigF',
 '/search?q=yahoo+finance+ETH&ie=UTF-8&tbm=bks&source=lnms&sa=X&ved=0ahUKEwjgztnl7rD7AhXNumMGHTNFDzMQ_AUICygG',
 '/advanced_search',
 '/search?q=yahoo+finance+ETH&ie=UTF-8&tbm=nws&source=lnt&tbs=qdr:h&sa=X&ved=0ahUKEwjgztnl7rD7AhXNumMGHTNFDzMQpwUIDQ',
 '/search?q=yahoo+finance+ETH&ie=UTF-8&tbm=nws&sourc

### Getting rid of unwanted URLs

In [24]:
exclude_list = ['maps', 'policies', 'preferences', 'accounts', 'support']

In [25]:
def strip_unwanted_urls(urls, exclude_list):
    val = []
    for url in urls: 
        if 'https://' in url and not any(exclude_word in url for exclude_word in exclude_list):
            res = re.findall(r'(https?://\S+)', url)[0].split('&')[0]
            val.append(res)
    return list(set(val))

In [26]:
cleaned_urls = {ticker:strip_unwanted_urls(raw_urls[ticker], exclude_list) for ticker in monitored_tickers}
cleaned_urls

{'ETH': ['https://uk.finance.yahoo.com/news/defi-exchange-overtakes-coinbase-for-eth-trades-ftx-161934796.html',
  'https://finance.yahoo.com/news/ftx-hacker-panicked-still-holds-233057265.html',
  'https://finance.yahoo.com/news/crypto-fund-alamedas-ethereum-wallet-105636445.html',
  'https://finance.yahoo.com/news/ether-capital-corporation-maintains-highest-173500016.html',
  'https://finance.yahoo.com/news/first-mover-americas-ftxs-hacked-133244491.html',
  'https://finance.yahoo.com/news/retransmission-hive-announces-quarterly-revenue-110000288.html',
  'https://finance.yahoo.com/news/one-prdt-finance-might-roll-130000713.html',
  'https://finance.yahoo.com/news/ether-turns-deflationary-amount-eth-095751397.html',
  'https://finance.yahoo.com/news/ether-sees-biggest-weekly-gain-082926717.html',
  'https://finance.yahoo.com/news/crypto-com-accidentally-sent-400m-145924881.html'],
 'FTX': ['https://finance.yahoo.com/news/ft-xs-bust-and-crypto-crash-come-with-two-silver-linings-103045

### Searching and Scraping Cleaned URLs

In [27]:
def scrape_and_process(URLs):
    ARTICLES = []
    for url in URLs: 
        r = requests.get(url)
        soup = BeautifulSoup(r.text, 'html.parser')
        paragraphs = soup.find_all('p')
        text = [paragraph.text for paragraph in paragraphs]
        words = ' '.join(text).split(' ')[:350]
        ARTICLE = ' '.join(words)
        ARTICLES.append(ARTICLE)
    return ARTICLES

In [28]:
articles = {ticker:scrape_and_process(cleaned_urls[ticker]) for ticker in monitored_tickers}
articles

{'ETH': ['Billions of dollars lost as FTX crypto exchange implodes Decentralised cryptocurrency exchange Uniswap flipped Coinbase (COIN) to become the second-largest platform for trading ethereum after Binance, in the wake of the FTX implosion. In the last 24 hours, Uniswap has eclipsed major centralised exchange Coinbase for ethereum (ETH-USD) trades according to data from Traders are flocking to decentralised exchanges (DEXs) in the wake of FTX’s sudden, catastrophic implosion. In the wake of the FTX crisis, bitcoin (BTC-USD) is standing at $17,042 (£14,330) as of the time of writing, up 2.6% in the past 24 hours and ethereum jumped over 1.8% in the past day to $1,270. Check: Crypto live prices The news of a possible mass movement to decentralised exchanges was shared on Twitter today by the inventor of Uniswap. Uniswap inventor Hayden Adams tweeted: "DEX starting to replace CEX? "Total ETH/USD (or stables) volume:Binance: ~$1.9b, Uniswap: ~$1.1b, Coinbase: ~$0.6b." According to data

### Summarizing all Articles

In [29]:
def summarize(articles):
    summaries = []
    for article in articles:
        input_ids = tokenizer.encode(article, return_tensors='pt')
        output = model.generate(input_ids, max_length=55, num_beams=5, early_stopping=True)
        summary = tokenizer.decode(output[0], skip_special_tokens=True)
        summaries.append(summary)
    return summaries

In [31]:
summaries = {ticker:summarize(articles[ticker]) for ticker in monitored_tickers}
summaries

{'ETH': ['Uniswap overtakes Coinbase as second-largest platform for ethereum trades. Decentralised cryptocurrency exchanges have been in the news recently',
  'Security firm Arkham finds $339 million in Ether, DAI, Maker’s stablecoin. Outflows at bankrupt FTX were caused by insider attack',
  'Fund held more than $500 million in crypto as of Oct. 1. Ether has fallen more than 8% in the past two weeks',
  'Ether Capital has no exposure to FTX or its affiliated businesses. Figment has confirmed there will be no direct or immediate impact to its business',
  'We are aware of the issue and are working to resolve it.',
  'Revenue for the second quarter was $29.6 million, up $44.2 million over previous quarter.',
  'PRDT Finance is the first fully cross-chain prediction platform. The platform supports Bitcoin, Ethereum, BNB, Polygon',
  "Ethereum network usage has increased in recent days. Ether's supply has declined by 5,598 since the Merge upgrade",
  'Ether rose over 16% last week, oversh

In [32]:
summaries['FTX']

['FTX-driven crypto rout is a wake-up call for investors. S&P 500 is up 5.9% in past five trading sessions',
 'FTX may have more than 1 million creditors, court document shows. Bitcoin has fallen by more than 14% since FTX filed for bankruptcy',
 'Saylor compares Bankman-Fried to Jordan Belfort. FTX filed for Chapter 11 bankruptcy following a failed takeover',
 'FTX is ‘one of the most damaging events in crypto history,’ ARK says. Regulators should ‘take draconian measures,’ firm says',
 'FTX, once one of the world’s largest, filed for bankruptcy. Analysts say hundreds of millions of dollars may have vanished',
 'Ikigai says it had a ‘large majority’ of its assets on FTX. Crypto lenders BlockFi, Berkeley Research also provide updates',
 'We are aware of the issue and are working to resolve it.',
 'FTX says ‘unauthorized access’ to its accounts. Analysts say hundreds of millions of dollars are missing',
 'FTX has collapsed, tanking his fortune by 94%. Bankman-Fried told The New York Tim

# Sentiment Analysis of Articles

In [34]:
sentiment = pipeline('sentiment-analysis')

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


Downloading:   0%|          | 0.00/629 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/268M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

In [35]:
sentiment(summaries['FTX'])

[{'label': 'NEGATIVE', 'score': 0.9955804944038391},
 {'label': 'NEGATIVE', 'score': 0.9996664524078369},
 {'label': 'NEGATIVE', 'score': 0.9976012110710144},
 {'label': 'NEGATIVE', 'score': 0.9986016154289246},
 {'label': 'NEGATIVE', 'score': 0.9996944665908813},
 {'label': 'NEGATIVE', 'score': 0.9852508306503296},
 {'label': 'POSITIVE', 'score': 0.9979088306427002},
 {'label': 'NEGATIVE', 'score': 0.9996572732925415},
 {'label': 'NEGATIVE', 'score': 0.999707043170929},
 {'label': 'POSITIVE', 'score': 0.9979088306427002}]

In [36]:
scores = {ticker:sentiment(summaries[ticker]) for ticker in monitored_tickers}
scores

{'ETH': [{'label': 'NEGATIVE', 'score': 0.9465776681900024},
  {'label': 'NEGATIVE', 'score': 0.9990807771682739},
  {'label': 'NEGATIVE', 'score': 0.9995892643928528},
  {'label': 'NEGATIVE', 'score': 0.9995774626731873},
  {'label': 'POSITIVE', 'score': 0.9979088306427002},
  {'label': 'POSITIVE', 'score': 0.9823259711265564},
  {'label': 'POSITIVE', 'score': 0.9737576842308044},
  {'label': 'NEGATIVE', 'score': 0.9983112812042236},
  {'label': 'POSITIVE', 'score': 0.8470471501350403},
  {'label': 'NEGATIVE', 'score': 0.9993730187416077}],
 'FTX': [{'label': 'NEGATIVE', 'score': 0.9955804944038391},
  {'label': 'NEGATIVE', 'score': 0.9996664524078369},
  {'label': 'NEGATIVE', 'score': 0.9976012110710144},
  {'label': 'NEGATIVE', 'score': 0.9986016154289246},
  {'label': 'NEGATIVE', 'score': 0.9996944665908813},
  {'label': 'NEGATIVE', 'score': 0.9852508306503296},
  {'label': 'POSITIVE', 'score': 0.9979088306427002},
  {'label': 'NEGATIVE', 'score': 0.9996572732925415},
  {'label': '

In [38]:
print(summaries['FTX'][3], scores['FTX'][3]['label'], scores['FTX'][3]['score'])

FTX is ‘one of the most damaging events in crypto history,’ ARK says. Regulators should ‘take draconian measures,’ firm says NEGATIVE 0.9986016154289246


In [41]:
print(summaries['ETH'][3], scores['ETH'][3]['label'], scores['ETH'][3]['score'])

Ether Capital has no exposure to FTX or its affiliated businesses. Figment has confirmed there will be no direct or immediate impact to its business NEGATIVE 0.9995774626731873


In [39]:
scores['ETH'][0]['score']

0.9465776681900024

# Exporting to CSV

In [42]:
summaries

{'ETH': ['Uniswap overtakes Coinbase as second-largest platform for ethereum trades. Decentralised cryptocurrency exchanges have been in the news recently',
  'Security firm Arkham finds $339 million in Ether, DAI, Maker’s stablecoin. Outflows at bankrupt FTX were caused by insider attack',
  'Fund held more than $500 million in crypto as of Oct. 1. Ether has fallen more than 8% in the past two weeks',
  'Ether Capital has no exposure to FTX or its affiliated businesses. Figment has confirmed there will be no direct or immediate impact to its business',
  'We are aware of the issue and are working to resolve it.',
  'Revenue for the second quarter was $29.6 million, up $44.2 million over previous quarter.',
  'PRDT Finance is the first fully cross-chain prediction platform. The platform supports Bitcoin, Ethereum, BNB, Polygon',
  "Ethereum network usage has increased in recent days. Ether's supply has declined by 5,598 since the Merge upgrade",
  'Ether rose over 16% last week, oversh

In [43]:
scores

{'ETH': [{'label': 'NEGATIVE', 'score': 0.9465776681900024},
  {'label': 'NEGATIVE', 'score': 0.9990807771682739},
  {'label': 'NEGATIVE', 'score': 0.9995892643928528},
  {'label': 'NEGATIVE', 'score': 0.9995774626731873},
  {'label': 'POSITIVE', 'score': 0.9979088306427002},
  {'label': 'POSITIVE', 'score': 0.9823259711265564},
  {'label': 'POSITIVE', 'score': 0.9737576842308044},
  {'label': 'NEGATIVE', 'score': 0.9983112812042236},
  {'label': 'POSITIVE', 'score': 0.8470471501350403},
  {'label': 'NEGATIVE', 'score': 0.9993730187416077}],
 'FTX': [{'label': 'NEGATIVE', 'score': 0.9955804944038391},
  {'label': 'NEGATIVE', 'score': 0.9996664524078369},
  {'label': 'NEGATIVE', 'score': 0.9976012110710144},
  {'label': 'NEGATIVE', 'score': 0.9986016154289246},
  {'label': 'NEGATIVE', 'score': 0.9996944665908813},
  {'label': 'NEGATIVE', 'score': 0.9852508306503296},
  {'label': 'POSITIVE', 'score': 0.9979088306427002},
  {'label': 'NEGATIVE', 'score': 0.9996572732925415},
  {'label': '

In [44]:
cleaned_urls

{'ETH': ['https://uk.finance.yahoo.com/news/defi-exchange-overtakes-coinbase-for-eth-trades-ftx-161934796.html',
  'https://finance.yahoo.com/news/ftx-hacker-panicked-still-holds-233057265.html',
  'https://finance.yahoo.com/news/crypto-fund-alamedas-ethereum-wallet-105636445.html',
  'https://finance.yahoo.com/news/ether-capital-corporation-maintains-highest-173500016.html',
  'https://finance.yahoo.com/news/first-mover-americas-ftxs-hacked-133244491.html',
  'https://finance.yahoo.com/news/retransmission-hive-announces-quarterly-revenue-110000288.html',
  'https://finance.yahoo.com/news/one-prdt-finance-might-roll-130000713.html',
  'https://finance.yahoo.com/news/ether-turns-deflationary-amount-eth-095751397.html',
  'https://finance.yahoo.com/news/ether-sees-biggest-weekly-gain-082926717.html',
  'https://finance.yahoo.com/news/crypto-com-accidentally-sent-400m-145924881.html'],
 'FTX': ['https://finance.yahoo.com/news/ft-xs-bust-and-crypto-crash-come-with-two-silver-linings-103045

In [46]:
summaries['ETH'][3]

'Ether Capital has no exposure to FTX or its affiliated businesses. Figment has confirmed there will be no direct or immediate impact to its business'

In [47]:
def create_output_array(summaries, scores, urls):
    output = []
    for ticker in monitored_tickers:
        for counter in range(len(summaries[ticker])):
            output_this = [
                ticker,
                summaries[ticker][counter],
                scores[ticker][counter]['label'],
                scores[ticker][counter]['score'],
                urls[ticker][counter]
            ]
            output.append(output_this)
    return output

In [48]:
final_output = create_output_array(summaries, scores, cleaned_urls)
final_output

[['ETH',
  'Uniswap overtakes Coinbase as second-largest platform for ethereum trades. Decentralised cryptocurrency exchanges have been in the news recently',
  'NEGATIVE',
  0.9465776681900024,
  'https://uk.finance.yahoo.com/news/defi-exchange-overtakes-coinbase-for-eth-trades-ftx-161934796.html'],
 ['ETH',
  'Security firm Arkham finds $339 million in Ether, DAI, Maker’s stablecoin. Outflows at bankrupt FTX were caused by insider attack',
  'NEGATIVE',
  0.9990807771682739,
  'https://finance.yahoo.com/news/ftx-hacker-panicked-still-holds-233057265.html'],
 ['ETH',
  'Fund held more than $500 million in crypto as of Oct. 1. Ether has fallen more than 8% in the past two weeks',
  'NEGATIVE',
  0.9995892643928528,
  'https://finance.yahoo.com/news/crypto-fund-alamedas-ethereum-wallet-105636445.html'],
 ['ETH',
  'Ether Capital has no exposure to FTX or its affiliated businesses. Figment has confirmed there will be no direct or immediate impact to its business',
  'NEGATIVE',
  0.99957

In [49]:
final_output.insert(0, ['Ticker', 'Summary', 'Label', 'Confidence', 'URL'])

In [50]:
final_output

[['Ticker', 'Summary', 'Label', 'Confidence', 'URL'],
 ['ETH',
  'Uniswap overtakes Coinbase as second-largest platform for ethereum trades. Decentralised cryptocurrency exchanges have been in the news recently',
  'NEGATIVE',
  0.9465776681900024,
  'https://uk.finance.yahoo.com/news/defi-exchange-overtakes-coinbase-for-eth-trades-ftx-161934796.html'],
 ['ETH',
  'Security firm Arkham finds $339 million in Ether, DAI, Maker’s stablecoin. Outflows at bankrupt FTX were caused by insider attack',
  'NEGATIVE',
  0.9990807771682739,
  'https://finance.yahoo.com/news/ftx-hacker-panicked-still-holds-233057265.html'],
 ['ETH',
  'Fund held more than $500 million in crypto as of Oct. 1. Ether has fallen more than 8% in the past two weeks',
  'NEGATIVE',
  0.9995892643928528,
  'https://finance.yahoo.com/news/crypto-fund-alamedas-ethereum-wallet-105636445.html'],
 ['ETH',
  'Ether Capital has no exposure to FTX or its affiliated businesses. Figment has confirmed there will be no direct or imme

In [53]:
with open('summaries.csv', mode='w', newline='') as f:
    csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    csv_writer.writerows(final_output)