# Project

## Packages

In [1]:
from bs4 import BeautifulSoup # to parse external data
import yfinance as yf
import pandas as pd # to read CSV files
import requests # to get data
import spacy #to extraxt entities
import streamlit as st

## RSS feeds

In [2]:
# Check if data from the page are downloadable
yahoo = requests.get("https://finance.yahoo.com/news/rssindex")
yahoo

<Response [200]>

In [3]:
# Get headlines
Tyahoo = BeautifulSoup(yahoo.content, features='xml')
TY = Tyahoo.findAll('title')
TY # list of titles

[<title>Yahoo Finance</title>,
 <title>Yahoo Finance</title>,
 <title>Why a Wave of Mergers Next Year Could Lift Drug and Biotech Stocks</title>,
 <title>It's no stimulus check, but millions are passing up $2,000 in free government money</title>,
 <title>Cathie Wood says stocks have corrected into ‘deep value territory’ and won’t let benchmarks ‘hold our strategies hostage’</title>,
 <title>Retirees Aren’t Spending Enough of Their Nest Eggs. Here’s Why.</title>,
 <title>Tech Stocks Face a Tough Road in 2022. Here Are the Likely Exceptions.</title>,
 <title>I just got a huge dividend payout from a mutual fund and my investment value dropped — what’s going on?</title>,
 <title>Bill Ackman says inflation could actually be a ‘raging’ 10% — here are 3 creative ways to hedge</title>,
 <title>Insiders Sense a Bottom in These 2 Stocks; Analysts Say ‘Buy’</title>,
 <title>REITs Are Poised to Outperform Stocks Next Year. Here Are 3 Picks.</title>,
 <title>You could be a landlord for Amazon, FedE

In [4]:
wsj = requests.get("https://feeds.a.dj.com/rss/RSSMarketsMain.xml?fbclid=IwAR17gY8vV2SdoTLP_35v7zGYmPireg5xIX_y1VEgPYRoXVd5jVouoKRlXAc")
wsj

<Response [200]>

In [5]:
Twsj = BeautifulSoup(wsj.content, features='xml')
TW = Twsj.findAll('title')
TW

[<title>WSJ.com: Markets</title>,
 <title>WSJ.com: Markets</title>,
 <title>Inflation Adds to Cost of Clean Energy Transition</title>,
 <title>Equifax to Add More 'Buy Now, Pay Later' Plans to Credit Reports</title>,
 <title>Sky-High Lumber Prices Are Back</title>,
 <title>U.S. Gas Exports Likely More Trickle Than Flood</title>,
 <title>Investors Balk at Plan to Buy Coal Mines and Close Them</title>,
 <title>Crypto and Its Many Fees: What to Know About the Hidden Costs</title>,
 <title>Johnson &amp; Johnson, McDonald's, Toyota: Stocks That Defined the Week</title>,
 <title>Stocks Fall Amid Rate-Rise Worries</title>,
 <title>U.S. Regulators Raise Concern With Stablecoin Digital Currency</title>,
 <title>Turkish Lira, Stocks Sink Amid Inflation Concerns</title>,
 <title>What's Not to Like About a Fund With a 7% Yield</title>,
 <title>Holiday Tipping Guide for 2021</title>,
 <title>Chinese Broker to Conduct Review Into Short-Seller's Fraud Allegations</title>,
 <title>Rivian, FedEx, Cerne

In [6]:
cnbc = requests.get("https://www.cnbc.com/id/15839135/device/rss/rss.html?fbclid=IwAR2o0zeWtmgEwZob45_F6e02pkTVo9uBGL0VI1GQv8mPyScEFY-hn9t089Y")
cnbc

<Response [200]>

In [7]:
Tcnbc = BeautifulSoup(cnbc.content, features='xml')
TC = Tcnbc.findAll('title')
TC

[<title>Earnings</title>,
 <title>Darden shares fall as CEO announces plans to retire; Olive Garden parent raises forecast despite planned wage hike</title>,
 <title>Adobe plunges 10% and has second-worst day in past decade on weak guidance</title>,
 <title>Lowe's says pandemic-fueled home improvement demand could cool in year ahead</title>,
 <title>Lululemon earnings top estimates, but shares fall after retailer cuts forecast for Mirror sales</title>,
 <title>Oracle swings to loss because of payment tied to dispute over former CEO Hurd's employment</title>,
 <title>GameStop shares fall as video game retailer reports widening losses in third quarter</title>,
 <title>Rent the Runway posts widening losses, as subscribers have yet to return to pre-pandemic levels</title>,
 <title>Stitch Fix shares crater as retailer cuts forecast, despite topping earnings estimates</title>,
 <title>MongoDB shares jump as revenue growth accelerates</title>,
 <title>DocuSign plunges almost 30% after e-signa

## Extract entities

In [8]:
# python -m spacy download en_core_web_sm
nlp = spacy.load("en_core_web_sm") # en_core_web_sm - basic NLP task (to process extracted text data)

In [9]:
processed_hline = nlp(TC[1].text) #Get first headline
print(TC[1])
for token in processed_hline:
    print(token.text, "-----", spacy.explain(token.pos_),'-----', spacy.explain(token.dep_)) # tokens + tags + dependencies

<title>Darden shares fall as CEO announces plans to retire; Olive Garden parent raises forecast despite planned wage hike</title>
Darden ----- adjective ----- adjectival modifier
shares ----- noun ----- nominal subject
fall ----- verb ----- clausal complement
as ----- subordinating conjunction ----- marker
CEO ----- noun ----- nominal subject
announces ----- verb ----- adverbial clause modifier
plans ----- noun ----- direct object
to ----- particle ----- auxiliary
retire ----- verb ----- open clausal complement
; ----- punctuation ----- punctuation
Olive ----- proper noun ----- compound
Garden ----- proper noun ----- compound
parent ----- noun ----- nominal subject
raises ----- verb ----- None
forecast ----- noun ----- direct object
despite ----- subordinating conjunction ----- prepositional modifier
planned ----- verb ----- adjectival modifier
wage ----- noun ----- compound
hike ----- noun ----- object of preposition


In [10]:
# See the dependencies
spacy.displacy.render(processed_hline, style='dep', jupyter=True, options={'distance': 110}) 

In [11]:
# Show important entities; tag ORG = Companies
spacy.displacy.render(processed_hline, style='ent', jupyter=True, options={'distance': 120}) 

In [12]:
# Extract ORG from headlines
companies = []
for title in TC:
    doc = nlp(title.text)
    for token in doc.ents:
        if token.label_ == 'ORG':
            companies.append(token.text)
        else: 
            pass
        
companies

['Darden',
 'Olive Garden',
 'Lowe',
 'Mirror',
 'Runway',
 'Kay Jewelers',
 'Nordstrom',
 "Dick's Sporting Goods",
 'Macy',
 'Lowe',
 'Target',
 'Home Depot',
 'EV']

In [13]:
# Which data will be displayed for the stock
Stock_info = {
    'Org': [],
    'Symbol': [],
    'currentPrice': [],
    'dayHigh': [],
    'dayLow': [],
    'forwardPE': [],
    'dividendYield': []
}