In [1]:
import os
import glob
import datetime as dt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
yf.pdr_override() # <== that's all it takes :-)

from concurrent import futures
from dateutil.relativedelta import relativedelta
from pandas_datareader import data as pdr
from scipy.stats import gaussian_kde

""" set home directory """
try: 
    home_dir
except NameError: 
    home_dir = os.getcwd()

os.chdir(home_dir)

""" set working directory """
# Change the current working directory
working_dir = "./most_attractive_stocks"
os.chdir(working_dir)

data_dir = "../data/historical"
os.makedirs(data_dir, exist_ok=True)

# Print the current working directory
print("Current working directory: {0}".format(os.getcwd()))
#print("Current data directory: {0}".format(os.listdir('../data')))

Current working directory: C:\work\p-canslim_pyramid\most_attractive_stocks


In [2]:
""" datetime util """
now = dt.datetime.now()
lastday = now + relativedelta(months=0, days=-1)
firstday_of_this_month = dt.datetime(now.year, now.month, 1)
lastday_of_this_month = dt.datetime(now.year, now.month, 1) + relativedelta(months=1, days=-1)
firstday_of_last_month = dt.datetime(now.year, now.month, 1) + relativedelta(months=-1, days=0)
lastday_of_last_month = dt.datetime(now.year, now.month, 1) + relativedelta(months=0, days=-1)

In [3]:
""" set the download window """
start_date = "2017-01-01"
end_date = lastday.strftime('%Y-%m-%d')

# Download Tickers

In [4]:
tables = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')

sp500_df = tables[0]
second_table = tables[1]
print(sp500_df.shape)

# rename symbol to escape symbol error
sp500_df["Symbol"] = sp500_df["Symbol"].map(lambda x: x.replace(".", "-"))
sp500_df.to_csv(f"{data_dir}/SP500_{end_date}.csv", index=False)
sp500_df = pd.read_csv(f"{data_dir}/SP500_{end_date}.csv")
print(sp500_df.shape)
sp500_tickers = list(sp500_df["Symbol"])
print(sp500_df.head())
#print(sp500_tickers)

(503, 9)
(503, 9)
  Symbol     Security SEC filings             GICS Sector  \
0    MMM           3M     reports             Industrials   
1    AOS  A. O. Smith     reports             Industrials   
2    ABT       Abbott     reports             Health Care   
3   ABBV       AbbVie     reports             Health Care   
4    ACN    Accenture     reports  Information Technology   

                GICS Sub-Industry    Headquarters Location  Date added  \
0        Industrial Conglomerates    Saint Paul, Minnesota  1976-08-09   
1               Building Products     Milwaukee, Wisconsin  2017-07-26   
2           Health Care Equipment  North Chicago, Illinois  1964-03-31   
3                 Pharmaceuticals  North Chicago, Illinois  2012-12-31   
4  IT Consulting & Other Services          Dublin, Ireland  2011-07-06   

       CIK      Founded  
0    66740         1902  
1    91142         1916  
2     1800         1888  
3  1551152  2013 (1888)  
4  1467373         1989  


# Find Most Attractive Stocks

In [5]:
stock_list = f"{data_dir}/SP500_{end_date}.csv"

In [6]:
!echo $stock_list

../data/historical/SP500_2023-01-18.csv


In [7]:
!python findStocks.py --stock_list $stock_list --key "P6QVC3IFRGHVKGAN" --data_folder "../data/historical" --downloaded True

>>> GM

[*********************100%***********************]  1 of 1 completed


Traceback (most recent call last):
  File "C:\work\p-canslim_pyramid\most_attractive_stocks\findStocks.py", line 202, in <module>
    record = testStock(s, dataFolder, flexible, record)
  File "C:\work\p-canslim_pyramid\most_attractive_stocks\findStocks.py", line 179, in testStock
    record = saveResults(stock, s, stockFile, record)
  File "C:\work\p-canslim_pyramid\most_attractive_stocks\findStocks.py", line 147, in saveResults
    stock.save()
  File "C:\work\p-canslim_pyramid\most_attractive_stocks\findStocksClasses.py", line 412, in save
    df = pd.concat([self.reports['q'], self.reports['a']], ignore_index=True)
TypeError: list indices must be integers or slices, not str


In [7]:
!python xxx_findStocks.py --stock_list $stock_list --key "P6QVC3IFRGHVKGAN" --data_folder "../data/historical"

^C


In [13]:
import os

filenames = []
for filename in os.listdir(data_dir)[1:]:
    # Get the substring of the filename
    substring = filename.split(" ")[0] # gets the first 5 characters
    filenames.append(substring)


In [18]:
filenames = list(set(filenames))

In [21]:
len(filenames)

500

In [8]:
%%writefile findStocks.py
#!/usr/bin/env python3
"""
Script to find new stocks to invest in.

Rules used in these scripts
1. Annual - EPS must increase by 20%
2. Annual - ROE must be over 17%
3. Quarterly - EPS must increase by 20%
4. Quarterly - Sales must increase by 20%
5. Stock price can not be decreasing recently

Additional rules that automatically disqualify stock
2. Foreign currency
3. Recent negative net income
4. Stock price has a decreasing trend
"""

import argparse
from downloadData import download
from findStocksUtils import (getStockList, findProcessed, saveAll,
                             updateProcessed, makeDirectory, checkDataMatches)
from findStocksClasses import Stock


def getInputs():
    """Parse arguments."""
    parser = argparse.ArgumentParser()
    parser.add_argument('--stock_list', type=str, required=True, help=(
        '.csv file with a column of stock symbols, e.g. Example Stock List.csv'))
    parser.add_argument('--key', type=str, help=(
        'API key for Alpha Vantage'))
    parser.add_argument('--data_folder', type=str, default='Data', help=(
        'Folder with data downloaded from Alpha Vantage'))
    parser.add_argument('--not_flexible', action='store_true', help=(
        'Add argument to manually input data from sec.gov if it is missing'))

    params = vars(parser.parse_args())
    stockFile = params['stock_list']
    apikey = params['key'] if 'key' in params else None
    dataFolder = params['data_folder']
    flexible = not params['not_flexible']
    record = []
    return stockFile, dataFolder, apikey, flexible, record


def getData(stockFile, dataFolder, apikey):
    """Download the data and/or get list of stocks ready for analysis."""
    # Create folders for script to work
    makeDirectory(dataFolder)

    # Use predownloaded data
    if apikey is None:
        checkDataMatches(stockFile, dataFolder)
    # Or download data from Alpha Vantage
    else:
        download(stockFile, dataFolder, apikey)

    # Get stock symbols for downloaded stock data
    allSymbols = getStockList(stockFile)

    # Get stock symbols for unprocessed stock data
    symbols = findProcessed(stockFile, allSymbols)

    assert symbols != [], 'No stocks were found in *_Processed.csv file'

    # final list is used to collect test results
    record = []

    return symbols


def preliminaryTests(stock):
    """Run preliminary tests to see if stock failed and can be skipped."""
    # Perliminary tests to disqualify a stock
    stock.prelimTests()

    # Skip stock if it failed any of the preliminary tests
    if stock.errorMessage != 'processed':
        return

    # Check for negative income
    stock.checkNegativeIncome()

    # Skip stock if it has negative net income
    if stock.errorMessage != 'processed':
        return

    # Check if stock price has been decreasing recently
    stock.checkSlope()

    # Skip stock if it does not have increasing price change
    if stock.errorMessage != 'processed':
        return

    # Reduce income statement and balance sheet into one DF
    stock.reduceDF()


def manageBadData(stock, r):
    """Manage missing data."""
    # Identify missing data
    stock.miss[r] = stock.identifyMissing(stock.reports[r])

    # If user is not flexible, get user input when missing data is encountered
    if not stock.flex:
        stock.reports[r] = stock.getData(stock.reports[r],
                                         stock.miss[r])
    # Otherwise if user is flexible, copy recent data over to missing data
    else:
        stock.reports[r], stock.miss[r] = stock.copy(stock.reports[r],
                                                     stock.miss[r])

    # Change column type to numeric if is object
    stock.reports[r] = stock.checkType(stock.reports[r])

    # Check if shares are missing trailing zeros
    shares = 'commonStockSharesOutstanding'
    stock.reports[r][shares] = stock.checkSmallShares(stock.reports[r][shares])


def analyzeStock(stock, r):
    """Run important tests to determine if quality stock."""
    # Calculate ROE and EPS
    stock.reports[r] = stock.calculate(stock.reports[r])

    # Calculate the percentage change year over year
    stock.reports[r] = stock.percentChange(stock.reports[r], r)

    # Round DF for pretty saving
    stock.reports[r] = stock.roundReports(stock.reports[r])

    # Check the percentage change year over year
    stock.test(stock.reports[r], r)

    if r == 'q':
        # Calculate the average of the percent changes
        stock.averagePercentChange(stock.reports[r])


def saveResults(stock, s, stockFile, record):
    """Save results after processing stock."""
    # Save stock information
    stock.save()

    # Update the processed file
    updateProcessed(stockFile, s, stock.errorMessage)

    # Append record to later save
    record.append([x for x in stock.record.values()])

    return record


def testStock(s, dataFolder, flexible):
    print('>>>', s)

    # Initialize Stock object
    stock = Stock(s, dataFolder, flexible)

    # Run preliminary tests to check if stock is disqualified
    preliminaryTests(stock)

    # Skip stock if it failed preliminary tests. Update the *Processed.csv
    if stock.errorMessage != 'processed':
        updateProcessed(stockFile, s, stock.errorMessage)

    for r in stock.reports:
        # Manage missing data and other inconsistencies in data
        manageBadData(stock, r)

        # Calculate new metrics and run stock tests
        analyzeStock(stock, r)

    # Save data and update files
    record = saveResults(stock, s, stockFile, record)
    print("returned")
    return record


if __name__ == '__main__':

    # Get user inputs
    stockFile, dataFolder, apikey, flexible, record = getInputs()

    # Get financial report data
    symbols = getData(stockFile, dataFolder, apikey)

    # Loop through all stocks
    for s in symbols:
        record = testStock(s, dataFolder, flexible)

        # Assert can occur when all stocks fail preliminary tests
        assert record != [], 'No procssed stocks to save to *Results.csv'

        # Save the test results for all stocks
        columns = [x for x in record.keys()]
        saveAll(record, columns)

Overwriting findStocks.py
