In [1]:
! pip install transformers



In [2]:
from transformers import PegasusTokenizer, PegasusForConditionalGeneration
from bs4 import BeautifulSoup
import requests

In [3]:
model_name = "human-centered-summarization/financial-summarization-pegasus"
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name)

In [38]:
monitored_tickers = ['reliance', 'grasim', 'hindalco', 'TCS']

In [39]:
def search_for_stock_news_urls(ticker):
    search_url = "https://www.google.com/search?q=mint+{}&tbm=nws".format(ticker)
    r = requests.get(search_url)
    soup = BeautifulSoup(r.text, 'html.parser')
    atags = soup.find_all('a')
    hrefs = [link['href'] for link in atags]
    return hrefs

In [40]:
search_url = "https://www.google.com/search?q=mint+{}&tbm=nws".format('TCS')
search_url

'https://www.google.com/search?q=mint+TCS&tbm=nws'

In [51]:
search_for_stock_news_urls('grasim')

['/?sa=X&ved=0ahUKEwi5rPakz9T0AhWZwTgGHVJtCqIQOwgC',
 '/?output=search&ie=UTF-8&tbm=nws&sa=X&ved=0ahUKEwi5rPakz9T0AhWZwTgGHVJtCqIQPAgE',
 '/search?q=mint+grasim&tbm=nws&ie=UTF-8&gbv=1&sei=w9ywYfm-PJmD4-EP0tqpkAo',
 '/search?q=mint+grasim&ie=UTF-8&source=lnms&sa=X&ved=0ahUKEwi5rPakz9T0AhWZwTgGHVJtCqIQ_AUIBygA',
 '/search?q=mint+grasim&ie=UTF-8&tbm=shop&source=lnms&sa=X&ved=0ahUKEwi5rPakz9T0AhWZwTgGHVJtCqIQ_AUICCgB',
 '/search?q=mint+grasim&ie=UTF-8&tbm=isch&source=lnms&sa=X&ved=0ahUKEwi5rPakz9T0AhWZwTgGHVJtCqIQ_AUICigD',
 'https://maps.google.com/maps?q=mint+grasim&um=1&ie=UTF-8&sa=X&ved=0ahUKEwi5rPakz9T0AhWZwTgGHVJtCqIQ_AUICygE',
 '/search?q=mint+grasim&ie=UTF-8&tbm=vid&source=lnms&sa=X&ved=0ahUKEwi5rPakz9T0AhWZwTgGHVJtCqIQ_AUIDCgF',
 '/search?q=mint+grasim&ie=UTF-8&tbm=bks&source=lnms&sa=X&ved=0ahUKEwi5rPakz9T0AhWZwTgGHVJtCqIQ_AUIDSgG',
 '/advanced_search',
 '/search?q=mint+grasim&ie=UTF-8&tbm=nws&source=lnt&tbs=qdr:h&sa=X&ved=0ahUKEwi5rPakz9T0AhWZwTgGHVJtCqIQpwUIDw',
 '/search?q=mint

In [55]:
raw_urls = {ticker:search_for_stock_news_urls(ticker) for ticker in monitored_tickers}
raw_urls

{'reliance': ['/?sa=X&ved=0ahUKEwjK-YPfz9T0AhW7zzgGHXB-BqAQOwgC',
  '/?output=search&ie=UTF-8&tbm=nws&sa=X&ved=0ahUKEwjK-YPfz9T0AhW7zzgGHXB-BqAQPAgE',
  '/search?q=mint+reliance&tbm=nws&ie=UTF-8&gbv=1&sei=Pd2wYYrnM7uf4-EP8PyZgAo',
  '/search?q=mint+reliance&ie=UTF-8&source=lnms&sa=X&ved=0ahUKEwjK-YPfz9T0AhW7zzgGHXB-BqAQ_AUIBygA',
  '/search?q=mint+reliance&ie=UTF-8&tbm=isch&source=lnms&sa=X&ved=0ahUKEwjK-YPfz9T0AhW7zzgGHXB-BqAQ_AUICSgC',
  '/search?q=mint+reliance&ie=UTF-8&tbm=shop&source=lnms&sa=X&ved=0ahUKEwjK-YPfz9T0AhW7zzgGHXB-BqAQ_AUICigD',
  'https://maps.google.com/maps?q=mint+reliance&um=1&ie=UTF-8&sa=X&ved=0ahUKEwjK-YPfz9T0AhW7zzgGHXB-BqAQ_AUICygE',
  '/search?q=mint+reliance&ie=UTF-8&tbm=vid&source=lnms&sa=X&ved=0ahUKEwjK-YPfz9T0AhW7zzgGHXB-BqAQ_AUIDCgF',
  '/search?q=mint+reliance&ie=UTF-8&tbm=bks&source=lnms&sa=X&ved=0ahUKEwjK-YPfz9T0AhW7zzgGHXB-BqAQ_AUIDSgG',
  '/advanced_search',
  '/search?q=mint+reliance&ie=UTF-8&tbm=nws&source=lnt&tbs=qdr:h&sa=X&ved=0ahUKEwjK-YPfz9T0Ah

In [56]:
raw_urls.keys()

dict_keys(['reliance', 'grasim', 'hindalco', 'TCS'])

In [57]:
raw_urls.values()

dict_values([['/?sa=X&ved=0ahUKEwjK-YPfz9T0AhW7zzgGHXB-BqAQOwgC', '/?output=search&ie=UTF-8&tbm=nws&sa=X&ved=0ahUKEwjK-YPfz9T0AhW7zzgGHXB-BqAQPAgE', '/search?q=mint+reliance&tbm=nws&ie=UTF-8&gbv=1&sei=Pd2wYYrnM7uf4-EP8PyZgAo', '/search?q=mint+reliance&ie=UTF-8&source=lnms&sa=X&ved=0ahUKEwjK-YPfz9T0AhW7zzgGHXB-BqAQ_AUIBygA', '/search?q=mint+reliance&ie=UTF-8&tbm=isch&source=lnms&sa=X&ved=0ahUKEwjK-YPfz9T0AhW7zzgGHXB-BqAQ_AUICSgC', '/search?q=mint+reliance&ie=UTF-8&tbm=shop&source=lnms&sa=X&ved=0ahUKEwjK-YPfz9T0AhW7zzgGHXB-BqAQ_AUICigD', 'https://maps.google.com/maps?q=mint+reliance&um=1&ie=UTF-8&sa=X&ved=0ahUKEwjK-YPfz9T0AhW7zzgGHXB-BqAQ_AUICygE', '/search?q=mint+reliance&ie=UTF-8&tbm=vid&source=lnms&sa=X&ved=0ahUKEwjK-YPfz9T0AhW7zzgGHXB-BqAQ_AUIDCgF', '/search?q=mint+reliance&ie=UTF-8&tbm=bks&source=lnms&sa=X&ved=0ahUKEwjK-YPfz9T0AhW7zzgGHXB-BqAQ_AUIDSgG', '/advanced_search', '/search?q=mint+reliance&ie=UTF-8&tbm=nws&source=lnt&tbs=qdr:h&sa=X&ved=0ahUKEwjK-YPfz9T0AhW7zzgGHXB-BqAQpwUIDw

In [58]:
import re

In [59]:
exclude_list = ['maps' ,'policies' ,'preferences', 'accounts', 'support']

In [60]:
def strip_unwanted_urls(urls,exclude_list):
    val = []
    for url in urls:
        if 'https://' in url and not any(exclude_word in url for exclude_word in exclude_list):
            res = re.findall(r'(https?://\S+)',url)[0].split('&')[0]
            val.append(res)
    return list(set(val))

In [61]:
strip_unwanted_urls(raw_urls['reliance'],exclude_list)

['https://www.livemint.com/companies/news/ril-forms-2-billion-production-jv-with-abu-dhabi-chemical-company-11638879989312.html',
 'https://www.livemint.com/market/stock-market-news/stocks-to-watch-reliance-industries-l-t-hindustan-zinc-11638926012677.html',
 'https://www.livemint.com/market/stock-market-news/stocks-to-watch-ril-tech-mahindra-sbi-auto-shares-future-retail-11638752620978.html',
 'https://www.livemint.com/industry/telecom/telcos-seek-further-concessions-from-government-11638970971831.html',
 'https://www.livemint.com/companies/news/reliance-takes-736-million-green-loan-to-finance-rec-solar-buy-11638809326903.html',
 'https://www.livemint.com/market/stock-market-news/reliance-industries-ril-shares-find-favour-after-jio-tariff-hike-what-brokerages-say-on-stock-11638161379724.html',
 'https://www.livemint.com/market/mark-to-market/aramco-deal-drop-does-not-sway-ril-s-investors-11638121115388.html',
 'https://www.livemint.com/companies/news/rbi-retains-three-member-advisory-

In [62]:
cleaned_urls = {ticker:strip_unwanted_urls(raw_urls[ticker], exclude_list) for ticker in monitored_tickers}
cleaned_urls

{'reliance': ['https://www.livemint.com/companies/news/ril-forms-2-billion-production-jv-with-abu-dhabi-chemical-company-11638879989312.html',
  'https://www.livemint.com/market/stock-market-news/stocks-to-watch-reliance-industries-l-t-hindustan-zinc-11638926012677.html',
  'https://www.livemint.com/market/stock-market-news/stocks-to-watch-ril-tech-mahindra-sbi-auto-shares-future-retail-11638752620978.html',
  'https://www.livemint.com/industry/telecom/telcos-seek-further-concessions-from-government-11638970971831.html',
  'https://www.livemint.com/companies/news/reliance-takes-736-million-green-loan-to-finance-rec-solar-buy-11638809326903.html',
  'https://www.livemint.com/market/stock-market-news/reliance-industries-ril-shares-find-favour-after-jio-tariff-hike-what-brokerages-say-on-stock-11638161379724.html',
  'https://www.livemint.com/market/mark-to-market/aramco-deal-drop-does-not-sway-ril-s-investors-11638121115388.html',
  'https://www.livemint.com/companies/news/rbi-retains-th

In [68]:
def scrape_and_process(URLs):
    articles = []
    for url in URLs:
        r = requests.get(url)
        soup = BeautifulSoup(r.text, 'html.parser')
        paragraphs = soup.find_all('p')
        text = [paragraph.text for paragraph in paragraphs]
        words = ' '.join(text).split(' ')[:300]
        article = ' '.join(words)
        articles.append(article)
    return articles

In [69]:
articles = {ticker:scrape_and_process(cleaned_urls[ticker]) for ticker in monitored_tickers}
articles

{'reliance': ['The project will enable the substitution of imports and the creation of new local value chains, RIL has said \n\n\tPetchem major Reliance Industries Ltd (RIL) has formed a $2 billion partnership with Abu Dhabi Chemicals Derivatives Company RSC Ltd (Ta’ziz) for chemical production, the company said on Tuesday. The joint venture will construct and operate a chlor-alkali, ethylene dichloride (EDC) and polyvinyl chloride (PVC) production facility, with an investment of more than $2 billion. \n\n\t“Representing the first production of these chemicals in the UAE, the project will enable the substitution of imports and the creation of new local value chains, while also meeting growing demand for these chemicals globally," RIL said. \n\n\tThe Ta’ziz Industrial Chemicals Zone is a joint venture between Abu Dhabi National Oil Company (ADNOC) and ADQ. The project builds on ADNOC and Reliance’s long-standing strategic partnership and is Reliance’s first investment in the Middle East

In [70]:
articles['reliance']

['The project will enable the substitution of imports and the creation of new local value chains, RIL has said \n\n\tPetchem major Reliance Industries Ltd (RIL) has formed a $2 billion partnership with Abu Dhabi Chemicals Derivatives Company RSC Ltd (Ta’ziz) for chemical production, the company said on Tuesday. The joint venture will construct and operate a chlor-alkali, ethylene dichloride (EDC) and polyvinyl chloride (PVC) production facility, with an investment of more than $2 billion. \n\n\t“Representing the first production of these chemicals in the UAE, the project will enable the substitution of imports and the creation of new local value chains, while also meeting growing demand for these chemicals globally," RIL said. \n\n\tThe Ta’ziz Industrial Chemicals Zone is a joint venture between Abu Dhabi National Oil Company (ADNOC) and ADQ. The project builds on ADNOC and Reliance’s long-standing strategic partnership and is Reliance’s first investment in the Middle East and North Af

In [71]:
def summarize(articles):
    summaries = []
    for article in articles:
        input_ids = tokenizer.encode(article,return_tensors='pt')
        output = model.generate(input_ids, max_length=55, num_beams=5, early_stopping=True)
        summary = tokenizer.decode(output[0], skip_special_tokens=True)
        summaries.append(summary)
    return summaries

In [72]:
summaries = {ticker:summarize(articles[ticker]) for ticker in monitored_tickers}
summaries

{'reliance': ['‘Representing the first production of these chemicals in the UAE,’ RIL says. Ta’ziz is a joint venture between ADNOC and ADQ',
  'Poll shows 60% of respondents expect RBI to keep key rates unchanged. Reliance, Hindustan Zinc among top 10 stocks to watch on Wednesday',
  'Economists at Nomura and Barclays expect a hike in reverse repo rate. Tech Mahindra acquired US remote customer experience provider Activus Connect',
  'Mukesh Ambani says India will contain future Covid waves. Bharti Enterprises chairman calls for lower levies on carriers',
  'Term loan pays interest margin of 120bp-125bp over Libor. REC Solar is borrower on the loan, while RIL is the guarantor',
  'Analysts see tariff hike as positive for Bharti, Jio. Reliance Jio has not given official word on it: Motilal Oswal',
  'Telecom vertical ‘in a sweet spot,’ says Motilal Oswal. Analysts at Motilal Oswal expect 22% upside on RJio, 7% upside on consolidated Ebitda.',
  'Central bank has retained advisory panel

In [73]:
summaries['reliance']

['‘Representing the first production of these chemicals in the UAE,’ RIL says. Ta’ziz is a joint venture between ADNOC and ADQ',
 'Poll shows 60% of respondents expect RBI to keep key rates unchanged. Reliance, Hindustan Zinc among top 10 stocks to watch on Wednesday',
 'Economists at Nomura and Barclays expect a hike in reverse repo rate. Tech Mahindra acquired US remote customer experience provider Activus Connect',
 'Mukesh Ambani says India will contain future Covid waves. Bharti Enterprises chairman calls for lower levies on carriers',
 'Term loan pays interest margin of 120bp-125bp over Libor. REC Solar is borrower on the loan, while RIL is the guarantor',
 'Analysts see tariff hike as positive for Bharti, Jio. Reliance Jio has not given official word on it: Motilal Oswal',
 'Telecom vertical ‘in a sweet spot,’ says Motilal Oswal. Analysts at Motilal Oswal expect 22% upside on RJio, 7% upside on consolidated Ebitda.',
 'Central bank has retained advisory panel. Reliance Capital b

In [74]:
from transformers import pipeline
sentiment = pipeline('sentiment-analysis')

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english)


In [75]:
scores = {ticker:sentiment(summaries[ticker]) for ticker in monitored_tickers}
scores



{'reliance': [{'label': 'POSITIVE', 'score': 0.9855557680130005},
  {'label': 'NEGATIVE', 'score': 0.9692246317863464},
  {'label': 'NEGATIVE', 'score': 0.9891399145126343},
  {'label': 'POSITIVE', 'score': 0.6046360731124878},
  {'label': 'NEGATIVE', 'score': 0.9304536581039429},
  {'label': 'NEGATIVE', 'score': 0.985382080078125},
  {'label': 'POSITIVE', 'score': 0.7685961723327637},
  {'label': 'POSITIVE', 'score': 0.9713582396507263},
  {'label': 'NEGATIVE', 'score': 0.9967558979988098},
  {'label': 'NEGATIVE', 'score': 0.99619460105896}],
 'grasim': [{'label': 'NEGATIVE', 'score': 0.9996169805526733},
  {'label': 'NEGATIVE', 'score': 0.9950636029243469},
  {'label': 'NEGATIVE', 'score': 0.9409163594245911},
  {'label': 'NEGATIVE', 'score': 0.997526228427887},
  {'label': 'POSITIVE', 'score': 0.993603527545929},
  {'label': 'NEGATIVE', 'score': 0.9173575043678284},
  {'label': 'POSITIVE', 'score': 0.9822432994842529},
  {'label': 'POSITIVE', 'score': 0.9716947674751282},
  {'label'

In [76]:
print(summaries['reliance'][0], scores['reliance'][0]['label'], scores['reliance'][0]['score'])

‘Representing the first production of these chemicals in the UAE,’ RIL says. Ta’ziz is a joint venture between ADNOC and ADQ POSITIVE 0.9855557680130005


In [77]:
print(summaries['grasim'][0], scores['grasim'][0]['label'], scores['grasim'][0]['score'])

Asian Paints, Berger Paints have fallen since Grasim venture announcement. Niche players like Kansai Nerolac and Akzo Nobel have seen declines NEGATIVE 0.9996169805526733


In [84]:
def create_output_array(summaries, scores, urls):
    output = []
    for ticker in monitored_tickers:
        for counter in range(len(summaries[ticker])):
            output_this = [
                ticker,
                summaries[ticker][counter],
                scores[ticker][counter]['label'],
                scores[ticker][counter]['score'],
                urls[ticker][counter]
            ]
            output.append(output_this)
    return output

In [85]:
final_output = create_output_array(summaries, scores, cleaned_urls)
final_output

[['reliance',
  '‘Representing the first production of these chemicals in the UAE,’ RIL says. Ta’ziz is a joint venture between ADNOC and ADQ',
  'POSITIVE',
  0.9855557680130005,
  'https://www.livemint.com/companies/news/ril-forms-2-billion-production-jv-with-abu-dhabi-chemical-company-11638879989312.html'],
 ['reliance',
  'Poll shows 60% of respondents expect RBI to keep key rates unchanged. Reliance, Hindustan Zinc among top 10 stocks to watch on Wednesday',
  'NEGATIVE',
  0.9692246317863464,
  'https://www.livemint.com/market/stock-market-news/stocks-to-watch-reliance-industries-l-t-hindustan-zinc-11638926012677.html'],
 ['reliance',
  'Economists at Nomura and Barclays expect a hike in reverse repo rate. Tech Mahindra acquired US remote customer experience provider Activus Connect',
  'NEGATIVE',
  0.9891399145126343,
  'https://www.livemint.com/market/stock-market-news/stocks-to-watch-ril-tech-mahindra-sbi-auto-shares-future-retail-11638752620978.html'],
 ['reliance',
  'Mukes

In [86]:
final_output.insert(0,['Ticker','Summary', 'Label', 'Confidence', 'URL'])

In [87]:
final_output

[['Ticker', 'Summary', 'Label', 'Confidence', 'URL'],
 ['reliance',
  '‘Representing the first production of these chemicals in the UAE,’ RIL says. Ta’ziz is a joint venture between ADNOC and ADQ',
  'POSITIVE',
  0.9855557680130005,
  'https://www.livemint.com/companies/news/ril-forms-2-billion-production-jv-with-abu-dhabi-chemical-company-11638879989312.html'],
 ['reliance',
  'Poll shows 60% of respondents expect RBI to keep key rates unchanged. Reliance, Hindustan Zinc among top 10 stocks to watch on Wednesday',
  'NEGATIVE',
  0.9692246317863464,
  'https://www.livemint.com/market/stock-market-news/stocks-to-watch-reliance-industries-l-t-hindustan-zinc-11638926012677.html'],
 ['reliance',
  'Economists at Nomura and Barclays expect a hike in reverse repo rate. Tech Mahindra acquired US remote customer experience provider Activus Connect',
  'NEGATIVE',
  0.9891399145126343,
  'https://www.livemint.com/market/stock-market-news/stocks-to-watch-ril-tech-mahindra-sbi-auto-shares-futur

In [88]:
import csv
with open('stocksummaries.csv', mode='w', newline='') as f:
    csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    csv_writer.writerows(final_output)