In [1]:
# Necessary Libraries
import yfinance as yf, pandas as pd, shutil, os, time, glob
import numpy as np
import requests
from get_all_tickers import get_tickers as gt
from statistics import mean

In [10]:
# If you have a list of your own you would like to use just create a new list instead of using this, for example: tickers = ["FB", "AMZN", ...] 
tickers = gt.get_tickers_filtered(mktcap_min=150000, mktcap_max=4000000)
# Check that the amount of tickers isn't more than 2000
print("The amount of stocks chosen to observe: " + str(len(tickers)))

The amount of stocks chosen to observe: 60


In [15]:
# These two lines remove the Stocks folder and then recreate it in order to remove old stocks. Make sure you have created a Stocks Folder the first time you run this.
shutil.rmtree("SMA_Analysis/Stocks/")
os.mkdir("SMA_Analysis/Stocks/")

In [16]:
# Holds the amount of API calls we executed
Amount_of_API_Calls = 0
# This while loop is reponsible for storing the historical data for each ticker in our list. Note that yahoo finance sometimes incurs json.decode errors and because of this we are sleeping for 2
# seconds after each iteration, also if a call fails we are going to try to execute it again.
# Also, do not make more than 2,000 calls per hour or 48,000 calls per day or Yahoo Finance may block your IP. The clause "(Amount_of_API_Calls < 1800)" below will stop the loop from making
# too many calls to the yfinance API.
# Prepare for this loop to take some time. It is pausing for 2 seconds after importing each stock.

# Used to make sure we don't waste too many API calls on one Stock ticker that could be having issues
Stock_Failure = 0
Stocks_Not_Imported = 0

# Used to iterate through our list of tickers
i=0
while (i < len(tickers)) and (Amount_of_API_Calls < 20):
    try:
        print("Iteration = " + str(i))
        stock = tickers[i]  # Gets the current stock ticker
        temp = yf.Ticker(str(stock))
        Hist_data = temp.history(period="5y")  # Tells yfinance what kind of data we want about this stock (In this example, all of the historical data)
        Hist_data.to_csv("SMA_Analysis/Stocks/"+stock+".csv")  # Saves the historical data in csv format for further processing later
        time.sleep(2)  # Pauses the loop for two seconds so we don't cause issues with Yahoo Finance's backend operations
        Amount_of_API_Calls += 1 
        Stock_Failure = 0
        i += 1  # Iteration to the next ticker
    except ValueError:
        print("Yahoo Finance Backend Error, Attempting to Fix")  # An error occured on Yahoo Finance's backend. We will attempt to retreive the data again
        if Stock_Failure > 5:  # Move on to the next ticker if the current ticker fails more than 5 times
            i+=1
            Stocks_Not_Imported += 1
        Amount_of_API_Calls += 1
        Stock_Failure += 1
    # Handle SSL error
    except requests.exceptions.SSLError as e:
        print("Yahoo Finance Backend Error, Attempting to Fix SSL")  # An error occured on Yahoo Finance's backend. We will attempt to retreive the data again
        if Stock_Failure > 5:  # Move on to the next ticker if the current ticker fails more than 5 times
            i+=1
            Stocks_Not_Imported += 1
        Amount_of_API_Calls += 1
        Stock_Failure += 1
print("The amount of stocks we successfully imported: " + str(i - Stocks_Not_Imported))

Iteration = 0
Iteration = 1
Iteration = 2
Iteration = 3
Iteration = 4
Iteration = 5
Iteration = 6
Iteration = 7
Iteration = 8
Iteration = 9
Iteration = 10
Iteration = 11
Iteration = 12
Iteration = 13
Iteration = 14
Iteration = 15
Iteration = 16
Iteration = 17
Iteration = 18
Iteration = 19
The amount of stocks we successfully imported: 20


In [51]:
# Get the path for each stock file in a list
list_files = (glob.glob("SMA_Analysis/Stocks/*.csv"))
# You can use this line to limit the analysis to a portion of the stocks in the "stocks folder"
# list_files = list_files[:100]

In [53]:
# Create the dataframe that we will be adding the final analysis of each stock to
Compare_Stocks = pd.DataFrame(columns=["Company", "Days_Observed", "Crosses", "True_Positive", "False_Positive", "True_Negative", "False_Negative", "Sensitivity", 
"Specificity", "Accuracy", "TPR", "FPR"])
# While loop to cycle through the stock paths
count = 0
for stock in list_files:
    # Dataframe to hold the historical data of the stock we are interested in.
    Hist_data = pd.read_csv(stock)
    Company = ((os.path.basename(stock)).split(".csv")[0])  # Name of the company
    # Constants for the stock that we will be updating later
    Days_Observed = 0
    Crosses = 0
    True_Positive = 0
    False_Positive = 0
    True_Negative = 0
    False_Negative = 0
    Sensitivity = 0
    Specificity = 0
    Accuracy = 0
    # This list holds the closing prices of a stock
    prices = []
    c = 0
     # Add the closing prices to the prices list and make sure we start at greater than 2 dollars to reduce outlier calculations.
    while c < len(Hist_data):
        if Hist_data.iloc[c,4] > float(2.00):  # Check that the closing price for this day is greater than $2.00
            prices.append(Hist_data.iloc[c,4])
        c += 1
    prices_df = pd.DataFrame(prices)  # Make a dataframe from the prices list
    # Calculate exponentiall weighted moving averages:
    day12 = prices_df.ewm(span=12).mean()  #
    day26 = prices_df.ewm(span=26).mean()
    macd = []  # List to hold the MACD line values
    counter=0  # Loop to substantiate the MACD line
    while counter < (len(day12)):
        macd.append(day12.iloc[counter,0] - day26.iloc[counter,0])  # Subtract the 26 day EW moving average from the 12 day.
        counter += 1
    macd_df = pd.DataFrame(macd)
    signal_df = macd_df.ewm(span=9).mean() # Create the signal line, which is a 9 day EW moving average
    signal = signal_df.values.tolist()  # Add the signal line values to a list.
    #  Loop to Compare the expected MACD crosses results to the actual results
    Day = 1
    while Day < len(macd)-5: # -1 to be able to use the last day for prediction, -5 so we can look at the 5 day post average.
        Prev_Day = Day-1
        # Avg_Closing_Next_Days = (prices[Day+1] + prices[Day+2] + prices[Day+3] + prices[Day+4] + prices[Day+5])/5 # To use 5 day average as a decision.
        Avg_Closing_Next_Days = (prices[Day+1] + prices[Day+2] + prices[Day+3])/3  # To use 3 day average as a decision.
        Days_Observed += 1  # Count how many days were observed
        if ((signal[Prev_Day] > macd[Prev_Day]) and (signal[Day] <= macd[Day])):  # when the signal line dips below the macd line (Expected increase over the next x days)
            Crosses += 1   # register that a cross occurred
            if (prices[Day] < Avg_Closing_Next_Days):  # Tests if the price increases over the next x days.
                True_Positive += 1
            else:
                False_Negative += 1

        if ((signal[Prev_Day] < macd[Prev_Day]) and (signal[Day] >= macd[Day])): # when the signal line moves above the macd line (Expected dip over the next x days)
            Crosses += 1
            if (prices[Day] > Avg_Closing_Next_Days):  # Tests if the price decreases over the next x days.
                True_Negative += 1
            else:
                False_Positive += 1
        Day += 1
    try:
        Sensitivity = (True_Positive / (True_Positive + False_Negative)) # Calculate sensitivity
    except ZeroDivisionError:  # Catch the divide by zero error
        Sensitivity = 0
    try:
        Specificity = (True_Negative / (True_Negative + False_Positive)) # Calculate specificity
    except ZeroDivisionError:
        Specificity
    try:
        Accuracy = (True_Positive + True_Negative) / (True_Negative + True_Positive + False_Positive + False_Negative) # Calculate accuracy
    except ZeroDivisionError:
        Accuracy = 0
    TPR = Sensitivity  # Calculate the true positive rate
    FPR = 1 - Specificity  # Calculate the false positive rate
    # Create a row to add to the compare_stocks
    add_row = {'Company' : Company, 'Days_Observed' : Days_Observed, 'Crosses' : Crosses, 'True_Positive' : True_Positive, 'False_Positive' : False_Positive, 
    'True_Negative' : True_Negative, 'False_Negative' : False_Negative, 'Sensitivity' : Sensitivity, 'Specificity' : Specificity, 'Accuracy' : Accuracy, 'TPR' : TPR, 'FPR' : FPR} 
    Compare_Stocks = Compare_Stocks.append(add_row, ignore_index = True) # Add the analysis on the stock to the existing Compare_Stocks dataframe
    count += 1
Compare_Stocks.to_csv("SMA_Analysis/All_Stocks.csv", index = False)  # Save the compiled data on each stock to a csv - All_Stocks.csv

In [62]:
# Create the dataframe that we will be adding the final analysis of each stock to

AMZN_path = Path('SMA_Analysis/Stocks/AMZN.csv')
AMZN_df=pd.read_csv(AMZN_path,index_col="Date", infer_datetime_format=True, parse_dates=True)
#AMZN_df = pd.read_csv(AMZN_path)(columns=["Date","Open", "Close", "Return", "12 Day", "9 Day", "26 Day", "Accuracy"], index = "Date")
AMZN_df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-01-19,577.090027,584.0,566.450012,574.47998,4807200,0,0
2016-01-20,564.359985,578.450012,547.179993,571.77002,7966400,0,0
2016-01-21,573.580017,588.809998,568.219971,575.02002,4952200,0,0
2016-01-22,588.72998,600.099976,584.109985,596.380005,5120100,0,0
2016-01-25,597.98999,608.5,594.559998,596.530029,4396100,0,0


In [64]:
AMZN_day12 = AMZN_df.ewm(span=12).mean()  
AMZN_day26 = prices_df.ewm(span=26).mean()
AMZN_day9 = prices_df.ewm(span=9).mean()

In [46]:
# While loop to cycle through the stock paths
count = 0
for stock in list_files:
    # Dataframe to hold the historical data of the stock we are interested in.
    stock_Hist_data = pd.read_csv(stock)
    Company = ((os.path.basename(stock)).split(".csv")[0])  # Name of the company
    # Constants for the stock that we will be updating later
    # This list holds the closing prices of a stock
    prices = []
    c = 0
     # Add the closing prices to the prices list and make sure we start at greater than 2 dollars to reduce outlier calculations.
    while c < len(Hist_data):
        if Hist_data.iloc[c,4] > float(2.00):  # Check that the closing price for this day is greater than $2.00
            prices.append(Hist_data.iloc[c,4])
        c += 1
    prices_df = pd.DataFrame(prices)  # Make a dataframe from the prices list
    # Calculate exponentiall weighted moving averages:
    day12 = prices_df.ewm(span=12).mean()  #
    day26 = prices_df.ewm(span=26).mean()
    macd = []  # List to hold the MACD line values
    counter=0  # Loop to substantiate the MACD line
    while counter < (len(day12)):
        macd.append(day12.iloc[counter,0] - day26.iloc[counter,0])  # Subtract the 26 day EW moving average from the 12 day.
        counter += 1
    macd_df = pd.DataFrame(macd)
    signal_df = macd_df.ewm(span=9).mean() # Create the signal line, which is a 9 day EW moving average
    signal = signal_df.values.tolist()  # Add the signal line values to a list.
    #  Loop to Compare the expected MACD crosses results to the actual results
    Day = 1
    while Day < len(macd)-5: # -1 to be able to use the last day for prediction, -5 so we can look at the 5 day post average.
        Prev_Day = Day-1
        # Avg_Closing_Next_Days = (prices[Day+1] + prices[Day+2] + prices[Day+3] + prices[Day+4] + prices[Day+5])/5 # To use 5 day average as a decision.
        Avg_Closing_Next_Days = (prices[Day+1] + prices[Day+2] + prices[Day+3])/3  # To use 3 day average as a decision.
        Days_Observed += 1  # Count how many days were observed
        if ((signal[Prev_Day] > macd[Prev_Day]) and (signal[Day] <= macd[Day])):  # when the signal line dips below the macd line (Expected increase over the next x days)
            Crosses += 1   # register that a cross occurred
            if (prices[Day] < Avg_Closing_Next_Days):  # Tests if the price increases over the next x days.
                True_Positive += 1
            else:
                False_Negative += 1

        if ((signal[Prev_Day] < macd[Prev_Day]) and (signal[Day] >= macd[Day])): # when the signal line moves above the macd line (Expected dip over the next x days)
            Crosses += 1
            if (prices[Day] > Avg_Closing_Next_Days):  # Tests if the price decreases over the next x days.
                True_Negative += 1
            else:
                False_Positive += 1
        Day += 1
    try:
        Sensitivity = (True_Positive / (True_Positive + False_Negative)) # Calculate sensitivity
    except ZeroDivisionError:  # Catch the divide by zero error
        Sensitivity = 0
    try:
        Specificity = (True_Negative / (True_Negative + False_Positive)) # Calculate specificity
    except ZeroDivisionError:
        Specificity
    try:
        Accuracy = (True_Positive + True_Negative) / (True_Negative + True_Positive + False_Positive + False_Negative) # Calculate accuracy
    except ZeroDivisionError:
        Accuracy = 0
    TPR = Sensitivity  # Calculate the true positive rate
    FPR = 1 - Specificity  # Calculate the false positive rate
    # Create a row to add to the compare_stocks
    add_row = {'Company' : Company, 'Days_Observed' : Days_Observed, 'Crosses' : Crosses, 'True_Positive' : True_Positive, 'False_Positive' : False_Positive, 
    'True_Negative' : True_Negative, 'False_Negative' : False_Negative, 'Sensitivity' : Sensitivity, 'Specificity' : Specificity, 'Accuracy' : Accuracy, 'TPR' : TPR, 'FPR' : FPR} 
    Compare_Stocks = Compare_Stocks.append(add_row, ignore_index = True) # Add the analysis on the stock to the existing Compare_Stocks dataframe
    count += 1
Compare_Stocks.to_csv("SMA_Analysis/All_Stocks.csv", index = False)  # Save the compiled data on each stock to a csv - All_Stocks.csv

In [47]:
from pathlib import Path
import hvplot as hv