## Custom Stock Screener
### Medium article  
Import Libraries, ignore warnings and set working directories

In [1]:
import numpy as np
import pandas as pd
import yfinance as yf
import os
from datetime import datetime


#dont show warnings
import warnings

warnings.filterwarnings("ignore")

data_directory = os.getcwd() + "/data/"



### Read the csv file with the stocks

You can add to this file any stock you want just by adding a new line in the csv file (it can be only the symbol)

In [2]:
stocks = pd.read_csv(data_directory + "sp500.csv")
stocks.head()

Unnamed: 0,Symbol,Name,Sector
0,MMM,3M,Industrials
1,AOS,A. O. Smith,Industrials
2,ABT,Abbott Laboratories,Health Care
3,ABBV,AbbVie,Health Care
4,ABMD,Abiomed,Health Care


### Get information from Yahoo Finance

Loop through the stocks and get information from Yahoo Finance
During the loop the information of each stock will be stored in a list
If there is an error we just print it. Most probable case is that the symbol cannot be found in yahoo finance at least the way is in our list

In [3]:
yf_info = []
err_counter = 0

for index, row in stocks.iterrows():
    try:
        ticker = yf.Ticker(row["Symbol"])
        yf_info.append(ticker.info)
    except Exception as e:
        print(f'Something went wrong with symbol {row["Symbol"]} with error {e}')
        err_counter += 1
    
yf_info_df = pd.DataFrame(yf_info)
print(f'There were {err_counter} errors')
print(f'Got the information for {len(yf_info_df)} stocks')

Something went wrong with symbol ATVI with error 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/ATVI?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true&crumb=qf4ObopbTZ9
Something went wrong with symbol ABC with error 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/ABC?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true&crumb=qf4ObopbTZ9
Something went wrong with symbol RE with error 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/RE?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&ssl=true&crumb=qf4ObopbTZ9
There were 3 errors
Got the information for 502 stocks


## Work with the dataframe

First we check if there are any missing values and we get the ones with the most

In [4]:
missing_values = yf_info_df.isnull().sum()
sorted_missing_values = missing_values[missing_values > 0].sort_values(ascending=False)
print(sorted_missing_values.head(10))


industrySymbol              501
fax                         422
address2                    382
trailingPegRatio            141
lastSplitDate               140
lastSplitFactor             140
fiveYearAvgDividendYield    120
dividendYield               116
dividendRate                116
earningsGrowth               94
dtype: int64





### We drop all the columns that mostly contain missing values

In [ ]:
cols_to_drop = ['zip','phone','address1', 'address2','state', 'industrySymbol', 'fax']
cols_to_drop = ['industrySymbol', 'fax', 'address2'] 
yf_info_df = yf_info_df.drop(cols_to_drop, errors='ignore', axis=1)
yf_info_df.head()

### Now we examine the columns that exists in the dataframe

In [5]:
yf_info_df.columns.tolist()

['address1',
 'city',
 'state',
 'zip',
 'country',
 'phone',
 'website',
 'industry',
 'industryKey',
 'industryDisp',
 'sector',
 'sectorKey',
 'sectorDisp',
 'longBusinessSummary',
 'fullTimeEmployees',
 'companyOfficers',
 'auditRisk',
 'boardRisk',
 'compensationRisk',
 'shareHolderRightsRisk',
 'overallRisk',
 'governanceEpochDate',
 'compensationAsOfEpochDate',
 'maxAge',
 'priceHint',
 'previousClose',
 'open',
 'dayLow',
 'dayHigh',
 'regularMarketPreviousClose',
 'regularMarketOpen',
 'regularMarketDayLow',
 'regularMarketDayHigh',
 'dividendRate',
 'dividendYield',
 'exDividendDate',
 'payoutRatio',
 'fiveYearAvgDividendYield',
 'beta',
 'forwardPE',
 'volume',
 'regularMarketVolume',
 'averageVolume',
 'averageVolume10days',
 'averageDailyVolume10Day',
 'bid',
 'ask',
 'bidSize',
 'askSize',
 'marketCap',
 'fiftyTwoWeekLow',
 'fiftyTwoWeekHigh',
 'priceToSalesTrailing12Months',
 'fiftyDayAverage',
 'twoHundredDayAverage',
 'trailingAnnualDividendRate',
 'trailingAnnualDivid

## Following the review we can drop collumns that we believe unessesary for a stock screener

In [7]:
cols_to_drop = ['zip','phone','address1', 'state']
yf_info_df = yf_info_df.drop(cols_to_drop, errors='ignore', axis=1)

## We will convert the epoch variables to datetime format

In [13]:
epoch_columns = ['governanceEpochDate', 'compensationAsOfEpochDate', 'exDividendDate', 'lastSplitDate', 'lastDividendDate', 'firstTradeDateEpochUtc']
yf_info_df[epoch_columns] = yf_info_df[epoch_columns].apply(pd.to_datetime, unit='s')

# List of old column names and corresponding new column names
epoch_columns_to_rename = ['old_col1', 'old_col2', 'old_col3']
epoch_columns_new_name = ['new_col1', 'new_col2', 'new_col3']

# Rename columns
yf_info_df.rename(columns=dict(zip(epoch_columns_to_rename, epoch_columns_new_name)), inplace=True)

## Create a category columns based on Capitalization

In [17]:
# Define the bins and labels for each capitalization category
bins = [0, 50_000_000, 300_000_000, 2_000_000_000, 10_000_000_000, 200_000_000_000, float('inf')]
labels = ['nano', 'micro', 'small', 'mid', 'large', 'mega']

# Create a new column with the categorized values
yf_info_df['CapCategory'] = pd.cut(yf_info_df['marketCap'], bins=bins, labels=labels, right=False)

yf_info_df['CapCategory'].value_counts()

CapCategory
large    397
mid       44
mega      35
small      2
nano       0
micro      0
Name: count, dtype: int64

## Calculate how close is the current price to the high and low of 52 weeks

In [81]:
yf_info_df['52WeeksCurrentPosition'] = (yf_info_df['currentPrice'] - yf_info_df['fiftyTwoWeekLow']) / (yf_info_df['fiftyTwoWeekHigh'] -yf_info_df['fiftyTwoWeekLow'])

## Calculate some metrics on the C Level executives of the company

In [76]:
def get_company_officers(row):
    # we check in case is not a list to return NaN and not throw an error
    if not isinstance(row['companyOfficers'], list):
        return None, None
    return len(row['companyOfficers']), round(np.mean([obj['age'] for obj in row['companyOfficers'] if 'age' in obj]))
    
yf_info_df[['CountOfCompanyOfficers','AvgAgeOfCompanyOfficers']] = yf_info_df.apply(lambda row: pd.Series(get_company_officers(row)), axis=1)
yf_info_df[['symbol', 'shortName', 'CountOfCompanyOfficers','AvgAgeOfCompanyOfficers']].head()

Unnamed: 0,symbol,shortName,CountOfCompanyOfficers,AvgAgeOfCompanyOfficers
0,MMM,3M Company,10.0,56.0
1,AOS,A.O. Smith Corporation,10.0,57.0
2,ABT,Abbott Laboratories,10.0,53.0
3,ABBV,AbbVie Inc.,10.0,58.0
4,ABMD,"ABIOMED, Inc.",,


## Because yfinance does not provide a meaningfull order for the dataframe we are going to move in the beginning the collumns that makes sense to be first  


In [82]:
cols_to_move_beginning = ['symbol', 'shortName', 'sector', 'industry','CapCategory']
yf_info_df = yf_info_df[cols_to_move_beginning + [col for col in yf_info_df.columns if col not in cols_to_move_beginning]]
yf_info_df.head()

Unnamed: 0,symbol,shortName,sector,industry,CapCategory,city,country,website,industryKey,industryDisp,...,targetMeanPrice,targetMedianPrice,recommendationMean,numberOfAnalystOpinions,earningsGrowth,fax,industrySymbol,CountOfCompanyOfficers,AvgAgeOfCompanyOfficers,52WeeksCurrentPosition
0,MMM,3M Company,Industrials,Conglomerates,large,Saint Paul,United States,https://www.3m.com,conglomerates,Conglomerates,...,,,,,,,,10.0,56.0,0.538047
1,AOS,A.O. Smith Corporation,Industrials,Specialty Industrial Machinery,large,Milwaukee,United States,https://www.aosmith.com,specialty-industrial-machinery,Specialty Industrial Machinery,...,79.1,81.0,2.7,10.0,0.268,,,10.0,57.0,0.986967
2,ABT,Abbott Laboratories,Healthcare,Medical Devices,large,North Chicago,United States,https://www.abbott.com,medical-devices,Medical Devices,...,117.65,117.0,2.0,20.0,0.012,,,10.0,53.0,0.779817
3,ABBV,AbbVie Inc.,Healthcare,Drug Manufacturers - General,mega,North Chicago,United States,https://www.abbvie.com,drug-manufacturers-general,Drug Manufacturers - General,...,169.54,170.0,2.3,24.0,-0.548,,,10.0,58.0,0.646299
4,ABMD,"ABIOMED, Inc.",,,large,,,,,,...,,,2.8,,,,,,,0.994018


## Last we save the dataframe in a csv format that we can open in an excel and examine further

The csv file is saved with a prefix of the current date and time - this way you can keep track of all the information you have acquired in one place in the past


In [78]:
formatted_datetime = datetime.now().strftime('%Y%m%d%H%M')
yf_info_df.to_csv(f'{data_directory}{formatted_datetime} Custom Screener.csv')