In [1]:
import os
import glob
import datetime as dt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
yf.pdr_override() # <== that's all it takes :-)

from concurrent import futures
from dateutil.relativedelta import relativedelta
from pandas_datareader import data as pdr
from scipy.stats import gaussian_kde

""" datetime util """
now = dt.datetime.now()
lastday = now + relativedelta(months=0, days=-1)
firstday_of_this_month = dt.datetime(now.year, now.month, 1)
lastday_of_this_month = dt.datetime(now.year, now.month, 1) + relativedelta(months=1, days=-1)
firstday_of_last_month = dt.datetime(now.year, now.month, 1) + relativedelta(months=-1, days=0)
lastday_of_last_month = dt.datetime(now.year, now.month, 1) + relativedelta(months=0, days=-1)

def last_working_day(given_date):
    while True:
        given_date -= dt.timedelta(days=1)
        if given_date.weekday() not in [5,6]:
            return given_date
        
""" set the download window """
start_date = "2017-01-01"
end_date = last_working_day(now).strftime('%Y-%m-%d')


""" set the data_dir """
data_dir = "./data"
os.makedirs(data_dir, exist_ok=True)

# Download Symbols

In [2]:
""" Download Tickers """
tables = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')

sp500_df = tables[0]
second_table = tables[1]
print(sp500_df.shape)

""" save symbols into csv file """
# rename symbol to escape symbol error
sp500_df["Symbol"] = sp500_df["Symbol"].map(lambda x: x.replace(".", "-"))
sp500_df.to_csv(f"{data_dir}/SP500_{end_date}.csv", index=False)
sp500_df = pd.read_csv(f"{data_dir}/SP500_{end_date}.csv")
print(sp500_df.shape)
sp500_tickers = list(sp500_df["Symbol"])
print(sp500_df.head())
#print(sp500_tickers)

(503, 8)
(503, 8)
  Symbol     Security             GICS Sector               GICS Sub-Industry  \
0    MMM           3M             Industrials        Industrial Conglomerates   
1    AOS  A. O. Smith             Industrials               Building Products   
2    ABT       Abbott             Health Care           Health Care Equipment   
3   ABBV       AbbVie             Health Care                 Pharmaceuticals   
4    ACN    Accenture  Information Technology  IT Consulting & Other Services   

     Headquarters Location  Date added      CIK      Founded  
0    Saint Paul, Minnesota  1976-08-09    66740         1902  
1     Milwaukee, Wisconsin  2017-07-26    91142         1916  
2  North Chicago, Illinois  1964-03-31     1800         1888  
3  North Chicago, Illinois  2012-12-31  1551152  2013 (1888)  
4          Dublin, Ireland  2011-07-06  1467373         1989  


# Financial Analysis

In [3]:
os.getcwd()

'/home/ian/work/p-canslim_pyramid'

In [4]:
!echo $end_date

2023-02-08


In [5]:
!python findStocks/findStocks.py --stock_list ./data/SP500_{end_date}.csv --data_folder "./data/financial" --report_folder "./report"

No stocks were found in *_Processed.csv file
>>> MMM
[*********************100%***********************]  1 of 1 completed
>>> AOS
[*********************100%***********************]  1 of 1 completed
>>> ABT
[*********************100%***********************]  1 of 1 completed
>>> ABBV
[*********************100%***********************]  1 of 1 completed
>>> ACN
[*********************100%***********************]  1 of 1 completed
>>> ATVI
[*********************100%***********************]  1 of 1 completed
>>> ADM
>>> ADBE
[*********************100%***********************]  1 of 1 completed
>>> ADP
[*********************100%***********************]  1 of 1 completed
>>> AAP
>>> AES
>>> AFL
[*********************100%***********************]  1 of 1 completed
>>> A
[*********************100%***********************]  1 of 1 completed
>>> APD
[*********************100%***********************]  1 of 1 completed
>>> AKAM
[*********************100%***********************]  1 of 1 completed
>>> A

# Most Attractive Stocks

In [6]:
data_dir = "./data"
input_file = f"{data_dir}/SP500_{end_date}_Processed.csv"

""" Load from CSV """
df = pd.read_csv(input_file)

df_failed = df[df["Processed"] != "processed"]
df_processed = df[df["Processed"].astype(str).str.contains("processed|processed_copied")] 

report_dir = "./report"
output_file = f"{report_dir}/{now.strftime('%Y-%m-%d')} Financial Analysis Results.csv"

""" Load from CSV """
df = pd.read_csv(output_file)

df_candidates = df[df["numfailed"] < 1].sort_values(by=['slope'], ascending=False, axis=0)
df_candidates = df_candidates[["stock", "slope", "avgpc" ]]
df_candidates.to_csv(f"{report_dir}/most_attractives_{end_date}.csv", index=False)

print(f"total: {len(df)} \n---------------------\nfailed: {len(df_failed)} \nprocessed: {len(df_processed)}")

print("\n---------------------\nPlease find the report directory")
print(f"total candidates: {len(df_candidates)}")
df_candidates[["stock", "avgpc", "slope",]]

total: 304 
---------------------
failed: 196 
processed: 304

---------------------
Please find the report directory
total candidates: 15


Unnamed: 0,stock,avgpc,slope
43,BKNG,-0.275,0.265
91,DOW,0.0,0.224
47,AVGO,0.462,0.176
55,CAT,0.201,0.173
127,HAL,0.377,0.161
152,JBHT,0.404,0.141
175,MAR,6.575,0.129
134,HLT,1.254,0.128
214,OGN,0.0,0.109
173,MPC,2.361,0.092
