In [29]:
!pip install transformers
!pip install sentencepiece




In [30]:
# "The Pegasus Tokenizer will preprocess your text in the way that the Pegasus model was originally trained on, including lowercasing text, splitting it into subwords, and mapping these subwords to their corresponding IDs in the model's vocabulary."
# "The "conditional" part of the name refers to the fact that the generated text is conditioned on the input text. In the case of Pegasus, this model is specialized for abstractive text summarization tasks, where the goal is to generate a summary of the input text."

from transformers import PegasusTokenizer, PegasusForConditionalGeneration 
import requests
from bs4 import BeautifulSoup

In [31]:
model_name = "human-centered-summarization/financial-summarization-pegasus"
tokenizer = PegasusTokenizer.from_pretrained(model_name) #what encodes and decodes the text
model = PegasusForConditionalGeneration.from_pretrained(model_name)

In [32]:
#!pip install lxml

url = "https://sg.news.yahoo.com/tesla-stock-hit-two-downgrades-151408762.html?guccounter=1&guce_referrer=aHR0cHM6Ly93d3cuZ29vZ2xlLmNvbS8&guce_referrer_sig=AQAAADlllwEDx1bUlJiCpaZ-JAnMEsHaGkkZNXAxG8pzGgUq6SMFPs37f0kNluMJyG0zBgDzmiI8xSdRTXVenjiOphi6sp03rLyYK4n8RdZpi0PVRJScz3YmxEUVubZwI4Ehn207e88gp2iHy4DT5BdMAbYS8w_fx99DyFq9EETJ5sM5"
document_text = requests.get(url).text
soup = BeautifulSoup(document_text, 'lxml')
header = soup.findAll('h1')[1].text
print(header)
paragraphs = soup.findAll('p')
print(paragraphs)

Tesla stock hit with two downgrades in two days: What analysts are saying
[<p>Analysts at both Morgan Stanley and Barclays have downgraded Tesla shares to "equal weight." Yahoo Finance Automotive Reporter Pras Subramanian explains why.</p>, <p><span class="speaker">SEANA SMITH:</span> Well, investors are unplugging from Tesla with shares sinking lower today, off just over 3%. The EV maker facing two downgrades in just two days this week. One from Morgan Stanley, the analyst there, Adam Jonas, and the other from Barclays. So where does the company really stand? Yahoo Finance's Pras Subramanian joining us now. Pras, we know Tesla obviously a massive favorite here among investors since the start of the year with this huge run up. But a downgrade, particularly from Jonas, that's pretty big.</p>, <p><span class="speaker">PRAS SUBRAMANIAN:</span> Yeah. I mean, he's a pretty longtime bull on Tesla. And Dan Levy, day before, also not a bearish analyst on Tesla. Both downgrading-- both basicall

In [40]:
print(tokenizer.model_max_length)

text_array = []
for paragraph in paragraphs:
    text_array.append(paragraph.text)
    
words = ' '.join(text_array).split(' ')[:400] # model accepts max input length of 512 tokens so ill pass in less to be safe
text = ' '.join(words)
print(text)

512
Analysts at both Morgan Stanley and Barclays have downgraded Tesla shares to "equal weight." Yahoo Finance Automotive Reporter Pras Subramanian explains why. SEANA SMITH: Well, investors are unplugging from Tesla with shares sinking lower today, off just over 3%. The EV maker facing two downgrades in just two days this week. One from Morgan Stanley, the analyst there, Adam Jonas, and the other from Barclays. So where does the company really stand? Yahoo Finance's Pras Subramanian joining us now. Pras, we know Tesla obviously a massive favorite here among investors since the start of the year with this huge run up. But a downgrade, particularly from Jonas, that's pretty big. PRAS SUBRAMANIAN: Yeah. I mean, he's a pretty longtime bull on Tesla. And Dan Levy, day before, also not a bearish analyst on Tesla. Both downgrading-- both basically saying that the stock is not tied to fundamentals right now. You know, not really much news other than some charging deals here and there that are

In [47]:
input_ids = tokenizer.encode(text, return_tensors='pt')
#print(input_ids)

#num beams refers to beam width in beam search algorithm

#for future reference - https://www.youtube.com/watch?v=RLWuzLLSIgw&ab_channel=DeepLearningAI
output = model.generate(input_ids, max_length=60, num_beams=6, early_stopping=True) # early stopping - doesnt have to be 60 token
print(output)
summary = tokenizer.decode(output[0], skip_special_tokens=True)
print(summary)

tensor([[    0, 14796,  5921,  2931,   228, 38913,   116,   115,   188,   228,
           390,   107,     1]])
EV maker facing two downgrades in just two days.


In [112]:
ticker = input()
monitored_tickers = ['GME', 'TSLA', 'BTC']

 BTC, TSLA, GME


['BTC', ' TSLA', ' GME']


In [130]:
def search_ticker(ticker):
    url = "https://www.google.com/search?q=yahoo+finance+{}&tbm=nws".format(ticker)
    get_request = requests.get(url)
    soup = BeautifulSoup(get_request.text, 'lxml')
    links = soup.find_all('a')
    array = []
    for link in links:
        array.append(link.get('href'))
    return array #returns array of yahoo finance urls
    
def filter_links(arr):
    exclude_list = ['maps', 'policies', 'preferences', 'accounts', 'support']
    result = []
    for link in arr:
        if ("https" in link) and not any(exclude_word in link for exclude_word in exclude_list):
            start_index = link.find("https://")
            if start_index != -1:
                link = link[start_index:].split('&')[0]
            result.append(link)
    return result #removes unwanted urls
            
raw_urls = search_ticker(ticker)
print(raw_urls, "\n")
filtered_links = filter_links(raw_urls)
print(filtered_links)

['/?sa=X&ved=0ahUKEwj3l-iMj-H_AhWDHrkGHTv_AUMQOwgC', '/search?q=yahoo+finance+TSLA&tbm=nws&ie=UTF-8&gbv=1&sei=2ZyZZLfHL4O95OUPu_6HmAQ', '/search?q=yahoo+finance+TSLA&ie=UTF-8&source=lnms&sa=X&ved=0ahUKEwj3l-iMj-H_AhWDHrkGHTv_AUMQ_AUIBSgA', '/search?q=yahoo+finance+TSLA&ie=UTF-8&tbm=vid&source=lnms&sa=X&ved=0ahUKEwj3l-iMj-H_AhWDHrkGHTv_AUMQ_AUIBygC', '/search?q=yahoo+finance+TSLA&ie=UTF-8&tbm=isch&source=lnms&sa=X&ved=0ahUKEwj3l-iMj-H_AhWDHrkGHTv_AUMQ_AUICCgD', 'https://maps.google.com/maps?q=yahoo+finance+TSLA&um=1&ie=UTF-8&sa=X&ved=0ahUKEwj3l-iMj-H_AhWDHrkGHTv_AUMQ_AUICSgE', '/search?q=yahoo+finance+TSLA&ie=UTF-8&tbm=shop&source=lnms&sa=X&ved=0ahUKEwj3l-iMj-H_AhWDHrkGHTv_AUMQ_AUICigF', '/search?q=yahoo+finance+TSLA&ie=UTF-8&tbm=bks&source=lnms&sa=X&ved=0ahUKEwj3l-iMj-H_AhWDHrkGHTv_AUMQ_AUICygG', '/advanced_search', '/search?q=yahoo+finance+TSLA&ie=UTF-8&tbm=nws&source=lnt&tbs=qdr:h&sa=X&ved=0ahUKEwj3l-iMj-H_AhWDHrkGHTv_AUMQpwUIDQ', '/search?q=yahoo+finance+TSLA&ie=UTF-8&tbm=nws&source

In [207]:
def scrape_articles(url_array):
    articles = []
    for url in url_array:
        if ("https://www.google.com") not in url:
            get_request = requests.get(url)
            if get_request.status_code == 200:
                soup = BeautifulSoup(get_request.text, 'lxml')
                paragraphs = soup.find_all('p')
                text = [paragraph.text for paragraph in paragraphs]
                words = ' '.join(text).split(' ')[:350]
                article = ' '.join(words)
                articles.append(article)
                print(article, "\n")
    return articles

articles = scrape_articles(filtered_links)

Tesla (TSLA) stock sank 5.5% on Wednesday, its steepest loss in two months, after one analyst on Wall Street cautioned it was time to take some money off the table. In a note to investors on Wednesday, Barclays analyst Dan Levy downgraded Tesla shares to Equal Weight from Overweight, claiming the recent rally ignored near-term questions about the stock’s fundamentals. Though Tesla doesn’t exactly trade on fundamentals — it currently trades at a forward P/E of 80 — Levy does make some interesting points about where Tesla stock sits compared to only a month ago. “We believe the stock’s recent rally can be best explained by the market’s current AI-driven thematic trade, as well as excitement over recent announcements to open the TSLA Supercharger network to other brands,” Levy said in his note. “Yet while we aren’t surprised that the stock has participated in the rally, we believe it is prudent to move to the sidelines.” Levy’s main thesis for the downgrade rests on three main points: the

In [208]:
def summarise(articles):
    result = []
    for article in articles:
        input_ids = tokenizer.encode(article, return_tensors='pt')
        output = model.generate(input_ids, max_length=60, num_beams=5, early_stopping=True)
        summary = tokenizer.decode(output[0], skip_special_tokens=True)
        result.append(summary)
    return result
        
summarised_articles = summarise(articles)

In [209]:
print(summarised_articles)

['Barclays downgrades Tesla to Equal Weight from Overweight. Near-term fundamentals haven’t changed, Levy says', '‘It’s kind of like the markets are finally recognizing the full value of the software,’ Gerber says.', 'Shares have been on the rise for a couple weeks now. Tesla engineers had concerns about the upcoming Cybertruck', 'EVs will be able to charge within Tesla’s Supercharger network in 2024.', 'Tesla’s new truck to be more than a ‘niche product,’ firm says. ARK sees EV market share hitting over 70% by 2027', 'Well-known critic of Tesla, Elon Musk suspended. Greenspan’s personal Twitter account was also suspended', 'All versions of the Model 3 now qualify for full federal EV tax credit. Tesla Model 3 RWD version now costs $32,740 with full credit.', "Tesla, Apple, Alphabet all fall on Monday. Investors consider what the weekend's short-lived challenge to Putin means"]


In [210]:
from transformers import pipeline

In [211]:
analyzer = pipeline("sentiment-analysis")

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


In [212]:
analysis = analyzer(summarised_articles)
print(analysis)

[{'label': 'NEGATIVE', 'score': 0.9703496694564819}, {'label': 'NEGATIVE', 'score': 0.6549932956695557}, {'label': 'NEGATIVE', 'score': 0.9748253226280212}, {'label': 'POSITIVE', 'score': 0.8688324093818665}, {'label': 'POSITIVE', 'score': 0.9890111684799194}, {'label': 'NEGATIVE', 'score': 0.9994245767593384}, {'label': 'NEGATIVE', 'score': 0.9905866980552673}, {'label': 'NEGATIVE', 'score': 0.9728940725326538}]


In [213]:
print(summarised_articles[0])
print(analysis[0])

Barclays downgrades Tesla to Equal Weight from Overweight. Near-term fundamentals haven’t changed, Levy says
{'label': 'NEGATIVE', 'score': 0.9703496694564819}


In [217]:
def make_output(ticker, summarised_articles, analysis, filtered_links):
    output = []
    for i in range(len(summarised_articles)):
        result = [ticker, summarised_articles[i], analysis[i]['label'], analysis[i]['score'], filtered_links[i]]
        output.append(result)
    return output

output = make_output(ticker, summarised_articles, analysis, filtered_links)

In [218]:
output.insert(0, ['Ticker', 'Summary', 'Sentiment', 'Confidence', 'URL'])

In [219]:
import csv
with open('assetsummaries.csv', mode='w', newline='') as f:
    csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    csv_writer.writerows(output)

In [220]:
!pip freeze > requirements.txt