# Reading Stock Screeners based on Industry 

What we aim to do here is to get a list of all the companies and their technicals/fundamentals. 

## Steps followed
1. Import necessary libraries and setup chromedriver
2. Define a function that will get the Yahoo Finance
    1. Load the page
    2. Find the table in the page
    3. Extract the table
    4. Read the table as a dataframe 
3. Save the dataframe

In [1]:
import os

# Importing the necessary modules
import pandas as pd
import time
import warnings
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By

warnings.filterwarnings('ignore')

ModuleNotFoundError: No module named 'pandas'

In [4]:
# Setting up the options for the webdriver
options = Options()
options.add_experimental_option("detach", True)
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),
                          options=options)

In [5]:
def get_yahoo_table(yahoo_link, xpath):
    """Load a Yahoo Finance page and extract a table as a pandas DataFrame.

    :param yahoo_link: Yahoo Finance link to the page
    :param xpath: XPath of the table element

    :return: pandas DataFrame containing the table data
    """
    # Open a web browser to the provided Yahoo Finance link
    driver.get(yahoo_link)

    # Wait for the page to fully load (you might need to adjust the time)
    time.sleep(5)
    driver.execute_script("window.scrollBy(0, 500);")
    driver.find_element("xpath", '//*[@id="scr-res-table"]/div[1]/table/thead/tr/th[6]').click()
    time.sleep(5)
    driver.find_element("xpath", '//*[@id="scr-res-table"]/div[1]/table/thead/tr/th[6]').click()
    time.sleep(5)


    # Try to find the table on the page using the provided description (XPath)
    try:
        table_element = driver.find_element('xpath', xpath)
    except:
        # If the table is not found, show a message and return nothing
        print("Table element not found.")
        return None

    # Get the contents of the table in a format that computers can understand
    table_html = table_element.get_attribute('outerHTML')

    # Convert th/e table's contents into a structured format that can be easily worked with
    table_df = pd.read_html(table_html)[0]

    # Return the structured table as a result
    return table_df

In [6]:
# You will get this link by going to yahoo finance and doing the screener thing
yahoo_link = "https://finance.yahoo.com/screener/unsaved/6c46aa22-e5e2-42b1-a272-ab6fdd5a4aa3?offset=0&count=100"
# This should be the same for everyone but if you face issue drop a whatsapp message in the group
table_xpath = '//*[@id="scr-res-table"]/div[1]/table'


table_data = get_yahoo_table(yahoo_link, table_xpath)


In [7]:
table_data

Unnamed: 0,Symbol,Name,Price (Intraday),Change,% Change,Volume,Avg Vol (3 month),Market Cap,PE Ratio (TTM),52 Week Range
0,INTC,Intel Corporation,36.34,-1.65,-4.34%,53.18M,38.242M,152.192B,,
1,AAPL,Apple Inc.,179.07,1.10,+0.62%,50.448M,57.543M,2.8T,30.05,
2,AMD,"Advanced Micro Devices, Inc.",101.61,-0.76,-0.74%,41.536M,61.521M,164.168B,,
3,NVDA,NVIDIA Corporation,435.20,-4.46,-1.01%,37.136M,49.922M,1.075T,105.38,
4,PLTR,Palantir Technologies Inc.,15.15,-0.31,-2.01%,37.498M,63.544M,32.6B,,
...,...,...,...,...,...,...,...,...,...,...
95,SSNC,"SS&C Technologies Holdings, Inc.",55.32,-0.18,-0.32%,870771,1.008M,13.724B,22.86,
96,NOW,"ServiceNow, Inc.",572.33,-6.18,-1.07%,829523,1.085M,116.755B,82.47,
97,KLAC,KLA Corporation,457.31,0.80,+0.18%,663463,975574,62.523B,18.94,
98,MSI,"Motorola Solutions, Inc.",285.80,-1.60,-0.56%,466822,747666,47.734B,32.40,


In [8]:
table_data.shape

# transform string ending with k and M to pure integer
def volume_string_to_int(str):
    multipliers = {'k':1000, 'M':1000000}
    if str[-1].isdigit():
        return int(str)
    mult = multipliers[str[-1]]
    return int(float(str[:-1])*mult)

In [9]:
if not os.path.exists("data/"):
    os.mkdir("data/")
    print("Data Directory Created!")

In [10]:
file_name = "utilities.csv"
table_data.to_csv("data/" + file_name)

In [11]:
table_data.columns

Index(['Symbol', 'Name', 'Price (Intraday)', 'Change', '% Change', 'Volume',
       'Avg Vol (3 month)', 'Market Cap', 'PE Ratio (TTM)', '52 Week Range'],
      dtype='object')