In [1]:
# imports 
import requests
import csv
from bs4 import BeautifulSoup

In [2]:
# This function takes the parsed-html page and process the tags to extract the stocks
# From the page, class - "wsod_dataTable wsod_dataTableBigAlt" has the data we need.
# First get all the tables, then from each table extract the table-rows and finally,
# from the rows, extract the table-data (stock names)
# 
# Since we already know that there are 30 stocks on the page, in the order of 10 as 
# Most Actives, followed by Gainers as next 10 and lastly, 10 stocks in Losers, we can 
# split the stocks as pack of 10 stocks each
def process_hot_stocks(hot_stocks_html):
  stocks = []
  tables = hot_stocks_html.find_all("table", {"class": "wsod_dataTable wsod_dataTableBigAlt"})
  for table in tables:
    table_rows = table.find_all("tr")
    for table_row in table_rows:
      table_data = table_row.find("td")
      if table_data:
        stocks.append(table_data.text)

  hot_stocks_dict = {}
  for i in range(3):
    stocks_classifier = hot_stocks_html.find_all("h3")[i].text
    boundary_left, boundary_right = i*10, (i+1)*10
    hot_stocks_dict[stocks_classifier] = set(stocks[boundary_left:boundary_right])
  
  return hot_stocks_dict


In [3]:
# Reference: https://www.digitalocean.com/community/tutorials/how-to-work-with-web-data-using-requests-and-beautiful-soup-with-python-3
# This function sends a get request to the URL -- "https://money.cnn.com/data/hotstocks/" and 
# parses the page using BeautifulSoup and calls process_hot_stocks() function to make 3 lists 
# of 10 stocks each. 
def fetch_hotstocks():
  hot_stocks_url = "https://money.cnn.com/data/hotstocks/"
  page = requests.get(hot_stocks_url)
  parsed_page = BeautifulSoup(page.text, 'lxml')

  hot_stocks_html = parsed_page.find("div", {"id": "wsod_hotStocks"})
  hot_stocks_dict = process_hot_stocks(hot_stocks_html)

  return hot_stocks_dict

In [4]:
# This functions simply prints the data fetched from the URL -- "https://money.cnn.com/data/hotstocks/" 
# under the banner of which they were originally categorised
def print_hot_stocks(hot_stocks_dict):
  print('''This is a program to scrape data from the https://money.cnn.com/data/hotstocks/  for a class project.  
  
Which stock are you interested in: 
''')
  for stocks_classifier, stocks in hot_stocks_dict.items():
    print(f"{stocks_classifier}:")
    for stock in stocks:
      print(stock)
    
    print("\n")

In [5]:
# This function helps parse the span tag from the page to extract the 4 values we 
# are interested in about the stock.
def parse_values_from_page(parsed_page):
  # From the website: https://finance.yahoo.com/quote/, it is apparent that the attribute
  # "data-reactid" can be used to uniquely identify the values we are interested in.
  # Attribute "data-reactid" values:
  # Open -- 103
  # Prev Close -- 98
  # Volume --  126
  # Market Cap -- 139
  span_ids = ["103","98","126", "139"]

  values = []
  for span_id in span_ids:
    if not parsed_page.find("span", {"data-reactid": span_id}):
      print(parsed_page)
    # print(span_id, parsed_page.find("span", {"data-reactid": span_id}))
    values.append(parsed_page.find("span", {"data-reactid": span_id}).text)

  return values

In [6]:
# For each stock, this functions cooks the URL and gets information about the stock
# under consideration. It then parses the page using BeautifulSoup and calls the
# parse_values_from_page() function to extract the values we are interested in.
def get_stocks_info(stocks):
  stocks_dict = {}
  print("Stocks in get_stocks_info():", stocks)
  for stock in stocks:
    ticker_symbol = stock.split()[0]
    stock_url = f"https://finance.yahoo.com/quote/{ticker_symbol}"

    page = requests.get(stock_url)
    parsed_page = BeautifulSoup(page.text, 'lxml')
    stocks_dict[stock] = parse_values_from_page(parsed_page)

  return stocks_dict

In [7]:
# As the name suggests, this function uses the two dictionary to construct the values
# array to write to CSV in the order specified in the description of the project
def write_stocks_to_csv(classifier_dict, stocks_dict):
  with open('stocks.csv', 'w') as csv_file:
    write = csv.writer(csv_file)

    for stock, values in stocks_dict.items():
      classifier = classifier_dict[stock]
      ticker_symbol, company_name = stock.split(" ", 1)
      row = [classifier, ticker_symbol, company_name] + values
      write.writerow(row)

In [10]:
# This fuctions prints the information about the stock that the user is interested in.
# It first extracts the values from the stocks_dict (last four) and the name, and then 
# prints it out one-by-one.
def print_stock_info(ticker_symbol, stocks_dict):
  found = False
  values = []
  for stock, info in stocks_dict.items():
    stock_symbol = stock.split(" ", 1)[0]
    if stock_symbol == ticker_symbol:
      values.append(stock)
      values.extend(info)
      found = True

  if not found:
    print("No such ticker-symbol noted while processing hot-stocks")
    return 

  print(f"The data for {values[0]} is the following: ")
  print(values[0])
  print("OPEN:", values[1])
  print("PREV CLOSE:", values[2])
  print("VOLUME:", values[3])
  print("MARKET CAP:", values[4])

In [None]:
try:
  # make dictionary of the hot-stocks under thier stock-type (Most Active, Gainers, Losers)
  # for faster access
  hot_stocks_dict = fetch_hotstocks()

  # make dictionary of stock to stock-type for the use of writing to csv
  classifier_dict = {}
  for key, values in hot_stocks_dict.items():
    for value in values:
      classifier_dict[value] = key

  # make a list of all stocks, to collectively get information about each stock
  stocks = []
  for values in hot_stocks_dict.values():
    stocks.extend(values)
  stocks_dict = get_stocks_info(stocks)

  # using the data from the two dicitonary, make rows of values and write to csv file 
  write_stocks_to_csv(classifier_dict, stocks_dict)

  # Print hot stocks information
  print_hot_stocks(hot_stocks_dict)
  
  # finally prompt user to input ticker symbol for a stock and print information about 
  # the stock if it is present in the hot-stocks list.
  ticker_symbol = input("User inputs:").strip().upper()
  print_stock_info(ticker_symbol, stocks_dict)

except Exception as ex:
  print(f"Exception encountered while reading ticker symbol: {ex}")