# Extracting Stock Data from Yahoo Finance

### Project Outline
- Scraping data from https://finance.yahoo.com/
- I will first extract the stock data.
- I will then find the Top 10 most active stocks.
- Lastly find the Top 10 Gainers and Loser.

In [3]:
import requests

In [4]:
home_url = 'https://finance.yahoo.com/most-active'

In [6]:
response = requests.get(home_url)

In [7]:
response.status_code

200

In [8]:
contents = response.text
contents[:1000]

'<!doctype html><html data-color-theme="light" id="atomic" class="NoJs androidbrowser smartphone failsafe" lang="en-US"><head prefix="og: https://ogp.me/ns#"><script>window.performance && window.performance.mark && window.performance.mark(\'PageStart\');</script><meta charset="utf-8"><title>Most Active Stocks Today - Yahoo Finance</title><meta name="keywords" content="Stock Screener, industry, index membership, share data, stock price, market cap, beta, sales, profitability, valuation ratios, analyst estimates, large cap value, bargain growth, preset stock screens"><meta http-equiv="x-dns-prefetch-control" content="on"><meta property="twitter:dnt" content="on"><meta property="fb:app_id" content="458584288257241"><meta name="theme-color" content="#400090"><meta name="viewport" content="width=device-width, initial-scale=1"><meta name="description" lang="en-US" content="See the list of the most active stocks today, including share price change and percentage, trading volume, intraday high

### Now parse and extract information

In [9]:
from bs4 import BeautifulSoup

document = BeautifulSoup(contents, 'html.parser')

In [10]:
active_class = 'Fw(600) C($linkColor)'
most_active_tags = document.find_all('a', {'class': active_class})

In [11]:
len(most_active_tags)

25

In [12]:
most_active_tags[:10]

[<a class="Fw(600) C($linkColor)" data-test="quoteLink" href="/quote/TSLA?p=TSLA" title="Tesla, Inc.">TSLA</a>,
 <a class="Fw(600) C($linkColor)" data-test="quoteLink" href="/quote/INTC?p=INTC" title="Intel Corporation">INTC</a>,
 <a class="Fw(600) C($linkColor)" data-test="quoteLink" href="/quote/SOFI?p=SOFI" title="SoFi Technologies, Inc.">SOFI</a>,
 <a class="Fw(600) C($linkColor)" data-test="quoteLink" href="/quote/AAPL?p=AAPL" title="Apple Inc.">AAPL</a>,
 <a class="Fw(600) C($linkColor)" data-test="quoteLink" href="/quote/RIVN?p=RIVN" title="Rivian Automotive, Inc.">RIVN</a>,
 <a class="Fw(600) C($linkColor)" data-test="quoteLink" href="/quote/PLTR?p=PLTR" title="Palantir Technologies Inc.">PLTR</a>,
 <a class="Fw(600) C($linkColor)" data-test="quoteLink" href="/quote/AMZN?p=AMZN" title="Amazon.com, Inc.">AMZN</a>,
 <a class="Fw(600) C($linkColor)" data-test="quoteLink" href="/quote/AMD?p=AMD" title="Advanced Micro Devices, Inc.">AMD</a>,
 <a class="Fw(600) C($linkColor)" data-te

In [13]:
most_active_tags[0].text

'TSLA'

In [14]:
most_active_symbol = []

for symbol in most_active_tags:
    most_active_symbol.append(symbol.text)
    
print(most_active_symbol)

['TSLA', 'INTC', 'SOFI', 'AAPL', 'RIVN', 'PLTR', 'AMZN', 'AMD', 'NIO', 'DISH', 'F', 'NVDA', 'T', 'PFE', 'ZI', 'SIRI', 'GOOG', 'PANW', 'GPK', 'BAC', 'CCL', 'DBX', 'MSFT', 'WBD', 'GOOGL']


In [15]:
name_class = 'Va(m) Ta(start) Px(10px) Fz(s)'
most_active_names = document.find_all('td', {'class': name_class})

In [16]:
len(most_active_names)

25

In [17]:
most_active_name = []

for name in most_active_names:
    most_active_name.append(name.text)
    
print(most_active_name)

['Tesla, Inc.', 'Intel Corporation', 'SoFi Technologies, Inc.', 'Apple Inc.', 'Rivian Automotive, Inc.', 'Palantir Technologies Inc.', 'Amazon.com, Inc.', 'Advanced Micro Devices, Inc.', 'NIO Inc.', 'DISH Network Corporation', 'Ford Motor Company', 'NVIDIA Corporation', 'AT&T Inc.', 'Pfizer Inc.', 'ZoomInfo Technologies Inc.', 'Sirius XM Holdings Inc.', 'Alphabet Inc.', 'Palo Alto Networks, Inc.', 'Graphic Packaging Holding Company', 'Bank of America Corporation', 'Carnival Corporation & plc', 'Dropbox, Inc.', 'Microsoft Corporation', 'Warner Bros. Discovery, Inc.', 'Alphabet Inc.']


In [18]:

most_active_price = document.find_all('fin-streamer', {'class' : "", 'data-field': 'regularMarketPrice'})
len(most_active_price)

25

In [19]:
print(most_active_price[0].text)

260.54


In [20]:
most_active_prices = []

for price in most_active_price:
    most_active_prices.append(price.text)
    
print(most_active_prices)

['260.54', '36.37', '8.60', '184.92', '14.88', '16.30', '125.49', '120.08', '9.40', '6.47', '14.42', '426.92', '16.06', '40.06', '26.82', '3.9400', '124.06', '246.53', '26.01', '29.19', '15.80', '25.97', '346.62', '12.80', '123.53']


In [21]:
most_active_pricechng = document.find_all('fin-streamer', {'class' : "Fw(600)", 'data-field' : 'regularMarketChange'})
len(most_active_pricechng)

25

In [22]:
print(most_active_pricechng[1].text)

+0.55


In [23]:
most_active_pricechngs = []

for chng in most_active_pricechng:
    most_active_pricechngs.append(chng.text)
    
print(most_active_pricechngs)

['+4.64', '+0.55', '-0.95', '-1.09', '-0.36', '-0.30', '-1.62', '-4.16', '-0.39', '+0.23', '-0.03', '+0.39', '+0.03', '+0.31', '-1.58', '+0.1100', '-1.73', '+2.73', '-0.16', '-0.18', '-0.32', '+0.14', '-1.48', '-0.33', '-1.56']


In [24]:
most_active_pricechngp = document.find_all('fin-streamer', {'class' : "Fw(600)", 'data-field' : 'regularMarketChangePercent'})
len(most_active_pricechngp)

25

In [25]:
most_active_pricechngspp = []

for chng in most_active_pricechngp:
    most_active_pricechngspp.append(chng.text)
    
print(most_active_pricechngspp)

['+1.81%', '+1.54%', '-9.95%', '-0.59%', '-2.36%', '-1.81%', '-1.27%', '-3.35%', '-3.98%', '+3.69%', '-0.21%', '+0.09%', '+0.19%', '+0.78%', '-5.56%', '+2.87%', '-1.38%', '+1.12%', '-0.61%', '-0.61%', '-1.99%', '+0.54%', '-0.43%', '-2.51%', '-1.25%']


In [26]:

most_active_vol = document.find_all('fin-streamer', {'class' : "", 'data-field' : "regularMarketVolume"})
len(most_active_vol)

25

In [27]:
most_active_pricechngs = []

for chng in most_active_pricechng:
    most_active_pricechngs.append(chng.text)
    
print(most_active_pricechngs)

['+4.64', '+0.55', '-0.95', '-1.09', '-0.36', '-0.30', '-1.62', '-4.16', '-0.39', '+0.23', '-0.03', '+0.39', '+0.03', '+0.31', '-1.58', '+0.1100', '-1.73', '+2.73', '-0.16', '-0.18', '-0.32', '+0.14', '-1.48', '-0.33', '-1.56']


In [28]:
most_active_vols = []

for vol in most_active_vol:
    most_active_vols.append(vol.text)
    
print(most_active_vols)

['167.916M', '110.24M', '107.557M', '101.256M', '92.23M', '90.696M', '84.247M', '82.007M', '81.4M', '79.289M', '75.398M', '65.571M', '64.149M', '60.339M', '58.349M', '57.974M', '56.699M', '54.823M', '53.235M', '53.05M', '51.112M', '47.223M', '46.552M', '46.121M', '45.536M']


In [29]:

most_active_cap = document.find_all('fin-streamer', {'class' : "", 'data-field' : "marketCap"})
len(most_active_cap)

25

In [30]:
most_active_caps = []

for cap in most_active_cap:
    most_active_caps.append(cap.text)
    
print(most_active_caps)

['825.782B', '151.699B', '8.092B', '2.909T', '13.977B', '34.531B', '1.288T', '193.372B', '16.785B', '3.446B', '57.691B', '1.054T', '114.813B', '226.151B', '11.338B', '15.241B', '1.571T', '75.402B', '7.989B', '232.619B', '20.585B', '9.081B', '2.577T', '31.182B', '1.572T']


### Summarizing the findings into a csv

In [31]:
import pandas as pd

In [32]:
table_dict = {
    'Stocks_Symbol': most_active_symbol,
    'Stocks_Name' : most_active_name,
    'Stocks_Price' : most_active_prices,
    'Stocks_Price_Change' : most_active_pricechngs,
    'Stocks_Percent_Change' : most_active_pricechngspp,
    'Stocks_Volume' : most_active_vols,
    'Stocks_Market_Capital' : most_active_caps
}

topics_df = pd.DataFrame(table_dict)
topics_df

Unnamed: 0,Stocks_Symbol,Stocks_Name,Stocks_Price,Stocks_Price_Change,Stocks_Percent_Change,Stocks_Volume,Stocks_Market_Capital
0,TSLA,"Tesla, Inc.",260.54,4.64,+1.81%,167.916M,825.782B
1,INTC,Intel Corporation,36.37,0.55,+1.54%,110.24M,151.699B
2,SOFI,"SoFi Technologies, Inc.",8.6,-0.95,-9.95%,107.557M,8.092B
3,AAPL,Apple Inc.,184.92,-1.09,-0.59%,101.256M,2.909T
4,RIVN,"Rivian Automotive, Inc.",14.88,-0.36,-2.36%,92.23M,13.977B
5,PLTR,Palantir Technologies Inc.,16.3,-0.3,-1.81%,90.696M,34.531B
6,AMZN,"Amazon.com, Inc.",125.49,-1.62,-1.27%,84.247M,1.288T
7,AMD,"Advanced Micro Devices, Inc.",120.08,-4.16,-3.35%,82.007M,193.372B
8,NIO,NIO Inc.,9.4,-0.39,-3.98%,81.4M,16.785B
9,DISH,DISH Network Corporation,6.47,0.23,+3.69%,79.289M,3.446B


In [34]:
topics_df.to_csv('Most_Active_Stocks.csv')