## Custom Stock Screener
### Medium article  
Import Libraries, ignore warnings and set working directories

In [1]:
import pandas as pd
import yfinance as yf
import os
from datetime import datetime

#dont show warnings
import warnings

warnings.filterwarnings("ignore")

data_directory = os.getcwd() + "/data/"



### Read the csv file with the stocks

You can add to this file any stock you want just by adding a new line in the csv file (it can be only the symbol)

In [2]:
stocks = pd.read_csv(data_directory + "sp500.csv")
stocks.head()

Unnamed: 0,Symbol,Name,Sector
0,MMM,3M,Industrials
1,AOS,A. O. Smith,Industrials
2,ABT,Abbott Laboratories,Health Care
3,ABBV,AbbVie,Health Care
4,ABMD,Abiomed,Health Care


### Get information from Yahoo Finance

Loop through the stocks and get information from Yahoo Finance
During the loop the information of each stock will be stored in a list
If there is an error we just print it. Most probable case is that the symbol cannot be found in yahoo finance at least the way is in our list

In [3]:
yf_info = []
err_counter = 0

for index, row in stocks.iterrows():
    try:
        ticker = yf.Ticker(row["Symbol"])
        yf_info.append(ticker.info)
    except Exception as e:
        print(f'Something went wrong with symbol {row["Symbol"]} with error {e}')
        err_counter += 1
    
yf_info_df = pd.DataFrame(yf_info)
print(f'There were {err_counter} errors')
print(f'Got the information for {len(yf_info_df)} stocks')

Something went wrong with symbol ATVI with error 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/ATVI?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true&crumb=qf4ObopbTZ9
Something went wrong with symbol ABC with error 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/ABC?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true&crumb=qf4ObopbTZ9
Something went wrong with symbol RE with error 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/RE?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true&crumb=qf4ObopbTZ9
There were 3 errors
Got the information for 502 stocks


## Work with the dataframe

First we check if there are any missing values and we get the ones with the most

In [4]:
missing_values = yf_info_df.isnull().sum()
sorted_missing_values = missing_values[missing_values > 0].sort_values(ascending=False)
print(sorted_missing_values.head(10))


industrySymbol              501
fax                         422
address2                    382
trailingPegRatio            141
lastSplitDate               140
lastSplitFactor             140
fiveYearAvgDividendYield    120
dividendYield               116
dividendRate                116
earningsGrowth               94
dtype: int64





### We drop all the columns that mostly contain missing values

In [ ]:
cols_to_drop = ['zip','phone','address1', 'address2','state', 'industrySymbol', 'fax']
cols_to_drop = ['industrySymbol', 'fax', 'address2'] 
yf_info_df = yf_info_df.drop(cols_to_drop, errors='ignore', axis=1)
yf_info_df.head()

### Now we examine the columns that exists in the dataframe

In [5]:
yf_info_df.columns.tolist()

['address1',
 'city',
 'state',
 'zip',
 'country',
 'phone',
 'website',
 'industry',
 'industryKey',
 'industryDisp',
 'sector',
 'sectorKey',
 'sectorDisp',
 'longBusinessSummary',
 'fullTimeEmployees',
 'companyOfficers',
 'auditRisk',
 'boardRisk',
 'compensationRisk',
 'shareHolderRightsRisk',
 'overallRisk',
 'governanceEpochDate',
 'compensationAsOfEpochDate',
 'maxAge',
 'priceHint',
 'previousClose',
 'open',
 'dayLow',
 'dayHigh',
 'regularMarketPreviousClose',
 'regularMarketOpen',
 'regularMarketDayLow',
 'regularMarketDayHigh',
 'dividendRate',
 'dividendYield',
 'exDividendDate',
 'payoutRatio',
 'fiveYearAvgDividendYield',
 'beta',
 'forwardPE',
 'volume',
 'regularMarketVolume',
 'averageVolume',
 'averageVolume10days',
 'averageDailyVolume10Day',
 'bid',
 'ask',
 'bidSize',
 'askSize',
 'marketCap',
 'fiftyTwoWeekLow',
 'fiftyTwoWeekHigh',
 'priceToSalesTrailing12Months',
 'fiftyDayAverage',
 'twoHundredDayAverage',
 'trailingAnnualDividendRate',
 'trailingAnnualDivid

## Following the review we can drop collumns that we believe unessesary for a stock screener

In [7]:
cols_to_drop = ['zip','phone','address1', 'state']
yf_info_df = yf_info_df.drop(cols_to_drop, errors='ignore', axis=1)

## Because yinance does not provide a meaningfull order for the dataframe we are going to move in the beginning the collumns that makes sense to be first  


In [9]:
cols_to_move_beginning = ['symbol', 'shortName', 'sector', 'industry']
yf_info_df = yf_info_df[cols_to_move_beginning + [col for col in yf_info_df.columns if col not in cols_to_move_beginning]]
yf_info_df.head()


Unnamed: 0,symbol,shortName,sector,industry,city,country,website,industryKey,industryDisp,sectorKey,...,pegRatio,targetHighPrice,targetLowPrice,targetMeanPrice,targetMedianPrice,recommendationMean,numberOfAnalystOpinions,earningsGrowth,fax,industrySymbol
0,MMM,3M Company,Industrials,Conglomerates,Saint Paul,United States,https://www.3m.com,conglomerates,Conglomerates,industrials,...,,,,,,,,,,
1,AOS,A.O. Smith Corporation,Industrials,Specialty Industrial Machinery,Milwaukee,United States,https://www.aosmith.com,specialty-industrial-machinery,Specialty Industrial Machinery,industrials,...,2.72,90.0,64.0,79.1,81.0,2.7,10.0,0.268,,
2,ABT,Abbott Laboratories,Healthcare,Medical Devices,North Chicago,United States,https://www.abbott.com,medical-devices,Medical Devices,healthcare,...,-12.43,133.0,100.0,117.65,117.0,2.0,20.0,0.012,,
3,ABBV,AbbVie Inc.,Healthcare,Drug Manufacturers - General,North Chicago,United States,https://www.abbvie.com,drug-manufacturers-general,Drug Manufacturers - General,healthcare,...,-3.36,205.0,135.0,169.54,170.0,2.3,24.0,-0.548,,
4,ABMD,"ABIOMED, Inc.",,,,,,,,,...,,,,,,2.8,,,,


## Last we save the dataframe in a csv format that we can open in an excel and examine further

The csv file is saved with a prefix of the current date and time - this way you can keep track of all the information you have acquired in one place in the past


In [10]:
formatted_datetime = datetime.now().strftime('%Y%m%d%H%M')
yf_info_df.to_csv(f'{data_directory}{formatted_datetime} Custom Screener.csv')