In [621]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import yfinance as yf
import pandas_datareader as pdr

from sklearn.metrics import accuracy_score

import logging

import sys
import os


In [622]:
class SuppressOutput:
    def __enter__(self):
        self.stdout = sys.stdout
        self.stderr = sys.stderr
        self.devnull = open(os.devnull, 'w')
        sys.stdout = self.devnull
        sys.stderr = self.devnull

    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stderr.close()
        sys.stdout = self.stdout
        sys.stderr = self.stderr


In [623]:
def filter_data(ticker_list, start, end):

    data_list = []

    for ticker in ticker_list:
        with SuppressOutput():
            data = yf.download(ticker, start=start, end=end)
        data_list.append(data)

    return dict(zip(ticker_list, data_list))
        

    

In [624]:
def create_data(ticker, ticker_name, l_wind, s_wind, wind):

    df = pd.DataFrame(ticker)
    date = df.index.tolist()
    df['Date'] = pd.to_datetime(date)

    df['Index'] = list(range(0, len(df['Date'])))

    large_window = l_wind  # Moving average window size
    small_window = s_wind
    window = wind

    # Calculate the moving average
    df['LMA'] = df['Open'].rolling(window=large_window).mean()
    df['SMA'] = df['Open'].rolling(window = small_window).mean()
    df['MA'] = df['Open'].rolling(window = window).mean()

    # print(df)

    # Create a plot
    # plt.figure(figsize=(20, 8))
    # plt.plot(df['Date'], df['Open'], label='Original Data')
    # plt.plot(df['Date'], df['LMA'], label=f'Long Moving Average ({large_window} days)')
    # plt.plot(df['Date'], df['SMA'], label=f'Short Moving Average ({small_window} days)')
    # plt.plot(df['Date'], df['MA'], label = f'Moving Average ({window} days)')
    # plt.xlabel('Date')
    # plt.ylabel('Value')
    # plt.title('Moving Average Plot')
    # plt.legend()
    # plt.grid(True)
    # path = "Experimental_Figures/" + ticker_name + ".png"
    # plt.savefig(path)
    # plt.close()

    return df








In [625]:
def get_crossover_data(dataset, window):
    train_data = []
    for i in range(window + 25, len(dataset) - 25):
        if dataset['SMA'][i] > dataset['LMA'][i] and dataset['SMA'][i - 1] <= dataset['LMA'][i - 1]:
            lma_change = (dataset['LMA'][i] - dataset['LMA'][i - 10]) / dataset['LMA'][i]
            ma_change = (dataset['MA'][i] - dataset['MA'][i - 20]) / dataset['MA'][i]
            if(ma_change > .0225):
                future = list(dataset['Close'][i+1:i+21])
                gain = 1 if any(value > dataset['High'][i] * 1.03 for value in future) else 0
                train_data.append((gain, lma_change, ma_change))
    return train_data


In [626]:
def train_model(ticker_list, start_date, end_date, large_window, short_window, window):

    ticker_dict = filter_data(ticker_list, start_date, end_date)

    train_set = []

    for ticker in ticker_dict.keys():
        data = ticker_dict.get(ticker)
        ticker_dict[ticker] = create_data(data, ticker, large_window, short_window, window)
        train_set = train_set + get_crossover_data(ticker_dict[ticker], window)

    target = [target for target, _, _ in train_set]
    features = [(x,y) for _,x,y in train_set]

    print("Valid crossovers:", len(target))
    print("Successful crossovers:", target.count(1))

    model = RandomForestClassifier(n_estimators = 80, max_depth = 5)
    # model = LogisticRegression()
    model.fit(features, target)

    # Make predictions on the test set
    y_pred = model.predict(features)

    preds = list(y_pred)

    print(preds.count(1))

    correct_signals = 0

    trades = 0

    for i in range(0, len(preds)):
        confidence = model.predict_proba(np.array(features[i]).reshape(1, -1))[0, 1]
        if preds[i] == 1 and confidence > .6:
            trades += 1
            if preds[i] == target[i]:
                correct_signals += 1

    print("Amount of trades", trades)
    print("Amount of successful trades", correct_signals)
    # Calculate accuracy
    accuracy = accuracy_score(target, y_pred)
    print("Accuracy:", accuracy)

    return model



In [627]:
def test_model(ticker_list, start_date, end_date, large_window, short_window, window, model):

    ticker_dict = filter_data(ticker_list, start_date, end_date)

    train_set = []

    for ticker in ticker_dict.keys():
        data = ticker_dict.get(ticker)
        ticker_dict[ticker] = create_data(data, ticker, large_window, short_window, window)
        train_set = train_set + get_crossover_data(ticker_dict[ticker], window)

    target = [target for target, _, _ in train_set]
    features = [(x,y) for _,x,y in train_set]

    print("Valid crossovers:", len(target))
    print("Successful crossovers:", target.count(1))

    # Make predictions on the test set
    y_pred = model.predict(features)

    preds = list(y_pred)

    print("Predicted successful crossovers:", preds.count(1))

    correct_signals = 0

    trades = 0

    for i in range(0, len(preds)):
        confidence = model.predict_proba(np.array(features[i]).reshape(1, -1))[0, 1]
        if preds[i] == 1 and confidence > .6:
            trades += 1
            if preds[i] == target[i]:
                correct_signals += 1

    print("Amount of trades", trades)
    print("Amount of successful trades", correct_signals)
    # Calculate accuracy
    accuracy = accuracy_score(target, y_pred)
    print("Accuracy:", accuracy)



In [628]:
second_tickers = [
    "ADBE", "ADP", "AMGN", "AON", "APA", "APD", "APH", "AZO", "BBY", "BDX",
    "BK", "BLK", "BMY", "BRKB", "CARR", "CAT", "CB", "CCI", "CL", "CME",
    "COF", "CPRT", "CSX", "CTAS", "CXO", "D", "DLR", "DLTR", "DOW", "DXCM",
    "EQIX", "ETR", "EXC", "FDX", "FISV", "FOXA", "GD", "GILD", "GOOG", "GOOGL",
    "GPC", "HCA", "HOLX", "HON", "IDXX", "ILMN", "INCY", "INTU", "IP", "IQV",
    "ISRG", "ITW", "KHC", "KLAC", "KMB", "KO", "LBRDK", "LDOS", "LMT", "LRCX",
    "MA", "MAR", "MKTX", "MMC", "MO", "MSCI", "MSI", "MTD", "NDAQ", "NFLX",
    "NOC", "NTRS", "NVDA", "ORCL", "PANW", "PAYX", "PEAK", "PEG", "PGR", "PH",
    "PKG", "PLD", "PPG", "PPL", "PSA", "PSX", "PVH", "REGN", "RMD", "ROK",
    "ROP", "SBUX", "SCHW", "SIVB", "SNPS", "SO", "SPGI", "SRE", "STZ", "SWKS"
]

third_tickers = [
    "SWK", "SYK", "TMUS", "TROW", "TTD", "TTWO", "TWTR", "TYL", "UDR", "UHS",
    "ULTA", "USB", "VFC", "VLO", "VRSN", "VRTX", "WAB", "WAT", "WBA", "WDAY",
    "WDC", "WEC", "WELL", "WFC", "WHR", "WLTW", "WM", "WMB", "WRB", "WST", "WU",
    "WY", "XEL", "XLNX", "XRAY", "XYL", "YUM", "ZBH", "ZBRA", "ZION", "ZTS",
    "ABMD", "ALGN", "ALXN", "ANET", "ATO", "AIZ", "AVB", "AWK", "CBOE", "CDNS",
    "CE", "CFG", "CHD", "CHRW", "CMS", "CPB", "CTVA", "DE", "DG", "DLTR", "DOV",
    "DXCM", "EFX", "ESS", "EW", "FRC", "GPN", "GRMN", "HBAN", "HII", "HWM",
    "KEY", "LDOS", "LNT", "LYB", "MKC", "MKTX", "MTB", "NI", "OKE", "PAYX",
    "PBCT", "PEP", "PKI", "PNW", "POOL", "RJF", "ROL", "RSG", "SNPS", "TFX",
    "TMK", "TSCO", "VTR", "WRB", "WST", "ZION"
]

sp500_tickers_unique = [
    "AAPL", "MSFT", "AMZN", "GOOGL", "FB", "NVDA", "JPM", "JNJ", "V",
    "PG", "UNH", "MA", "BAC", "HD", "INTC", "CMCSA", "VZ", "NFLX", "PYPL", "PFE",
    "KO", "T", "MRK", "XOM", "DIS", "CVX", "PEP", "CSCO", "WMT", "NKE", "CRM",
    "MDT", "MCD", "ORCL", "AMGN", "C", "PM", "QCOM", "IBM", "TMO", "AVGO", "TXN",
    "UNP", "LMT", "AXP", "UPS", "GS", "SCHW", "NEE", "CME", "TGT", "BKNG", "USB",
    "ISRG", "SPGI", "ADP", "DUK", "INTU", "RTX", "CTAS", "AON", "CCI", "AIG",
    "MMC", "AEP", "SO", "GILD", "MO", "FIS", "GM", "ECL", "DD", "DOW", "LRCX",
    "SYK", "MET", "AGN", "SBUX", "ADI", "CPRT", "BDX", "ILMN", "NSC", "ZTS",
    "ATVI", "EQIX", "FDX", "NBL", "NDAQ", "NEE", "NEM", "NLSN", "NOC", "NOV",
    "NOW", "NRG", "NTAP", "NTRS", "NUE", "NVR", "NWL", "NWS", "NWSA", "O",
    "ODFL", "OGN", "OKE", "OMC", "ORCL", "ORLY", "OTIS", "OXY", "PAYC", "PAYX",
    "PBCT", "PCAR", "PEAK", "PEG", "PEP", "PFE", "PFG", "PG", "PGR", "PH", "PHM",
    "PKG", "PKI", "PLD", "PM", "PNC", "PNR", "PNW", "POOL", "PPG", "PPL", "PRGO",
    "PRU", "PSA", "PSX", "PVH", "PWR", "PXD", "PYPL", "QCOM", "QRVO", "RCL", "RE",
    "REG", "REGN", "RF", "RHI", "RJF", "RL", "RMD", "ROK", "ROL", "ROP", "ROST",
    "RSG", "RTX", "SBAC", "SBUX", "SCHW", "SEE", "SHW", "SIVB", "SJM", "SLB",
    "SNA", "SNPS", "SO", "SPG", "SPGI", "SRE", "STE", "STT", "STX", "STZ", "SWK",
    "SWKS", "SYF", "SYK", "SYY", "T", "TAP", "TDG", "TDY", "TEL", "TER", "TFC",
    "TFX", "TGT", "TJX", "TMO", "TMUS", "TPR", "TRV", "TSCO", "TSN", "TT",
    "TTWO", "TWTR", "TXN", "TXT", "TYL", "UA", "UAL", "UDR", "UHS", "ULTA", "UNH",
    "UNM", "UNP", "UPS", "URI", "USB", "VAR", "VFC", "VIAC", "VLO", "VMC", "VNO",
    "VRSK", "VRSN", "VRTX", "VTR", "VZ", "WAB", "WAT", "WBA", "WDC", "WEC", "WELL",
    "WFC", "WHR", "WLTW", "WM", "WMB", "WMT", "WRB", "WRK", "WST", "WU", "WY",
    "WYNN", "XEL", "XLNX", "XOM", "XRAY", "XYL", "YUM", "ZBH", "ZBRA", "ZION",
    "ZTS", "ABMD", "ALGN", "ALXN", "ANET", "ANSS", "APH", "ARE", "ATO", "AVB",
    "AVGO", "AVY", "AWK", "AXP", "AZO", "BAX", "BBY", "BDX", "BEN", "BF-B",
    "BIIB", "BIO", "BLK", "BLL", "BMY", "BR", "BRK-B", "BSX", "BWA", "BXP", "C",
    "CAG", "CAH", "CARR", "CAT", "CB", "CBOE", "CBRE", "CMA", "CMCSA", "CME",
    "CMG", "CMI", "CMS", "CNC", "CNP", "COF", "COG"
]

ticker_list = list(set(sp500_tickers_unique))
print(len(ticker_list))
print(len(list(set(second_tickers + third_tickers))))




270
189


In [629]:
model = train_model(list(set(second_tickers + third_tickers)), train_start_date, train_end_date, large_window, short_window, window)

ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['XLNX']: Exception('%ticker%: No timezone found, symbol may be delisted')
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['DOW']: Exception("%ticker%: Data doesn't exist for startDate = 1293858000, endDate = 1514782800")
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['CARR']: Exception("%ticker%: Data doesn't exist for startDate = 1293858000, endDate = 1514782800")
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['BRKB']: Exception('%ticker%: No timezone found, symbol may be delisted')
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['TMK']: Exception('%ticker%: No timezone found, symbol may be delisted')
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['ALXN']: Exception('%ticker%: No timezone found, symbol may be delisted')
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['WLTW']: Exception('%ticker%: No timezone found, symbol may be delisted')
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['PBCT']: Exception('%tic

Valid crossovers: 1484
Successful crossovers: 704
246
Amount of trades 77
Amount of successful trades 75
Accuracy: 0.6293800539083558


In [630]:
test_start_date = "2018-01-01"
test_end_date = "2023-01-01"

test_model(ticker_list, test_start_date, test_end_date, large_window, short_window, window, model)

ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['COG']: Exception('%ticker%: No timezone found, symbol may be delisted')
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['XLNX']: Exception('%ticker%: No timezone found, symbol may be delisted')
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['FB']: Exception('%ticker%: No timezone found, symbol may be delisted')
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['WLTW']: Exception('%ticker%: No timezone found, symbol may be delisted')
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['PBCT']: Exception('%ticker%: No timezone found, symbol may be delisted')
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['ALXN']: Exception('%ticker%: No timezone found, symbol may be delisted')
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['VAR']: Exception('%ticker%: No timezone found, symbol may be delisted')
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['TWTR']: Exception('%ticker%: No timezone found, symbol may be delisted')
ERRO

Valid crossovers: 1493
Successful crossovers: 833
Predicted successful crossovers: 369
Amount of trades 173
Amount of successful trades 124
Accuracy: 0.521768251841929
