In [2]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import yfinance as yf
import pandas_datareader as pdr

from sklearn.metrics import accuracy_score

import logging

import sys
import os

from scipy.stats import binom_test
from mpl_toolkits.mplot3d import Axes3D
import math


In [3]:
class SuppressOutput:
    def __enter__(self):
        self.stdout = sys.stdout
        self.stderr = sys.stderr
        self.devnull = open(os.devnull, 'w')
        sys.stdout = self.devnull
        sys.stderr = self.devnull

    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stderr.close()
        sys.stdout = self.stdout
        sys.stderr = self.stderr


In [4]:
def filter_data(ticker_list, start, end):

    data_list = []

    for ticker in ticker_list:
        with SuppressOutput():
            data = yf.download(ticker, start=start, end=end)
        data_list.append(data)

    return dict(zip(ticker_list, data_list))
        

    

In [5]:
def create_data(ticker, ticker_name, l_wind, s_wind, wind):

    df = pd.DataFrame(ticker)
    date = df.index.tolist()
    df['Date'] = pd.to_datetime(date)

    df['Index'] = list(range(0, len(df['Date'])))

    large_window = l_wind
    small_window = s_wind
    window = wind

    # Calculate the moving average
    df['LMA'] = df['Open'].ewm(span=large_window, adjust=False).mean()
    df['SMA'] = df['Open'].ewm(span=small_window, adjust=False).mean()
    df['MA'] = df['Open'].ewm(span=window, adjust=False).mean()

    return df








In [280]:
def create_train_set(ticker_list, start_date, end_date, large_window, short_window, window):
    ticker_dict = filter_data(ticker_list, start_date, end_date)

    for ticker in ticker_dict.keys():
        data = ticker_dict.get(ticker)
        ticker_dict[ticker] = create_data(data, ticker, large_window, short_window, window)

    return ticker_dict


def train_model(ticker_dict, large_window, short_window, window):

    train_set = []

    for ticker in ticker_dict.keys():
        train_set = train_set + get_crossover_data(ticker_dict[ticker], window)

    lma = [lma for _, lma, _ in train_set]
    sma = [sma for _, _, sma in train_set]
    gain = [gain for gain, _, _ in train_set]

    # fig = plt.figure(figsize = (20, 8))
    # plt.scatter(lma, sma, c=gain, cmap='bwr', marker='o')

    # # Add color bar
    # cbar = plt.colorbar()
    # cbar.set_label('Binary Value')

    # plt.title("Scatter plot of winning (red) and losing (blue) trades based on rate of change in moving averages")
    # plt.xlabel("Long-term Moving Average")
    # plt.ylabel("Short-term Moving Average")

    # # Show the plot
    # plt.show()

    target = [target for target, _, _ in train_set]
    features = [(x,y) for _,x,y in train_set]

    # print("Valid crossovers:", len(target))
    # print("Successful crossovers:", target.count(1))

    model = RandomForestClassifier(n_estimators = 20, max_depth = 4)
    # model = LogisticRegression()
    model.fit(features, target)

    # Make predictions on the test set
    y_pred = model.predict(features)

    preds = list(y_pred)

    correct_signals = 0

    trades = 0

    for i in range(0, len(preds)):
        confidence = model.predict_proba(np.array(features[i]).reshape(1, -1))[0, 1]
        if preds[i] == 1 and confidence > .7:
            trades += 1
            if preds[i] == target[i]:
                correct_signals += 1

    print("Amount of trades", trades)
    print("Amount of successful trades", correct_signals)
    # Calculate accuracy
    accuracy = accuracy_score(target, y_pred)
    print("Accuracy:", accuracy)

    return model



In [242]:
def create_test_set(ticker_list, start_date, end_date, large_window, short_window, window):
    ticker_dict = filter_data(ticker_list, start_date, end_date)

    for ticker in ticker_dict.keys():
        data = ticker_dict.get(ticker)
        ticker_dict[ticker] = create_data(data, ticker, large_window, short_window, window)

    return ticker_dict

def test_model(ticker_dict, large_window, short_window, window, model):

    train_set = []

    for ticker in ticker_dict.keys():
        train_set = train_set + get_crossover_data(ticker_dict[ticker], window)

    target = [target for target, _, _ in train_set]
    features = [(x,y) for _,x,y in train_set]

    print("Valid crossovers:", len(target))
    print("Successful crossovers:", target.count(1))

    # Make predictions on the test set
    y_pred = model.predict(features)

    preds = list(y_pred)

    print("Predicted successful crossovers:", preds.count(1))

    correct_signals = 0

    trades = 0

    for i in range(0, len(preds)):
        confidence = model.predict_proba(np.array(features[i]).reshape(1, -1))[0, 1]
        if preds[i] == 1 and confidence > .75:
            trades += 1
            if preds[i] == target[i]:
                correct_signals += 1

    print("Amount of trades", trades)
    print("Amount of successful trades", correct_signals)
    # Calculate accuracy
    accuracy = accuracy_score(target, y_pred)
    print("Accuracy:", accuracy)

    return (correct_signals / trades, target.count(1) / len(target))



In [8]:
window = 200
large_window = 80
small_window = 20


In [283]:

init_start = '2001-09-06'
init_end = '2010-09-06'

start_date = '2020-09-01'
end_date = '2023-09-06'

recent_start = '2022-08-01'
recent_end = '2023-09-06'

long_start = '2013-08-31'
long_end = '2023-09-06'

train_start = '2010-08-31'
train_end = '2018-09-06'

init_start = '2001-08-31'
init_end = '2010-09-06'

common_ticks = [
    "TSLA", "NIO", "AMD", "WBD", "NU", "TLRY", "NVDA", "AAPL", "PLTR", "INTC",
    "AMZN", "BAC", "F", "VALE", "WBA", "PARA", "XPEV", "DELL", "DNA", "DIS",
    "NUVA", "KVUE", "RIVN", "BEKE", "MARA", "GRAB", "GOOGL", "BKI", "LCID", "T",
    "HPE", "RIG", "AFRM", "CMCSA", "SWN", "SNAP", "OPEN", "SOFI", "PFE", "BABA",
    "HZNP", "CCL", "GOOG", "LYFT", "S", "MSFT", "XOM", "AAL", "VZ", "UBER", "KMI",
    "CSCO", "GSAT", "SHOP", "USB", "PDD", "META", "WFC", "C", "BBD", "CSX", "NOK",
    "GOLD", "JD", "LU", "PYPL", "IQ", "KEY", "KO", "PLUG", "FCX", "MU", "DG", "CVNA",
    "PCG", "RYCEY", "SIRI", "TAL", "BCS", "CHWY", "MRO", "IOT", "NTNX", "HPQ", "KGC",
    "XP", "HBAN", "YMM", "SCHW", "DVN", "RBLX", "DKNG", "HOOD", "LYG", "OXY", "ET",
    "GM", "M", "HAL", "JNJ", "OLN", "BKR", "NEE", "CVE", "ABCM", "DISH", "TFC",
    "JPM", "AGNC", "IONQ", "KHC", "RF", "MRVL", "MDLZ", "FOXA", "STLA", "ROKU",
    "TSM", "CVX", "NKE", "CVS", "SBUX", "CRM", "GPS", "INFY", "QCOM", "LULU", "FITB"
]


common_init = create_train_set(common_ticks, init_start, init_end, large_window, small_window, window)
common_mid = create_train_set(common_ticks, start_date, end_date, large_window, small_window, window)
common_short = create_train_set(common_ticks, recent_start, recent_end, large_window, small_window, window)
common_long = create_train_set(common_ticks, train_start, train_end, large_window, small_window, window)
common_decade = create_train_set(common_ticks, long_start, long_end, large_window, small_window, window)

In [284]:


def get_crossover_data(dataset, window):
    train_data = []
    for i in range(20, len(dataset) - 25):
        if dataset['SMA'][i - 6] <= dataset['LMA'][i - 6] and dataset['SMA'][i - 5] > dataset['LMA'][i - 5]:
            lma_change = (dataset['LMA'][i] - dataset['LMA'][i - 10]) / dataset['LMA'][i - 10] / 10
            sma_change = (dataset['SMA'][i] - dataset['SMA'][i - 10]) / dataset['SMA'][i - 10] / 10
            ma_change = (dataset['MA'][i] - dataset['MA'][i - 20]) / dataset['MA'][i - 20]
            if(lma_change > 0.002):
                future = list(dataset['High'][i+2:i+20])
                gain = 1 if any(value > dataset['Open'][i + 1] * 1.05 for value in future) else 0
                train_data.append((gain, lma_change, sma_change))
    return train_data

In [285]:

model = train_model(common_long, large_window, small_window, window)

Amount of trades 40
Amount of successful trades 38
Accuracy: 0.8048780487804879


In [286]:
test_model(common_mid, large_window, small_window, window, model)

Valid crossovers: 171
Successful crossovers: 129
Predicted successful crossovers: 121
Amount of trades 36
Amount of successful trades 31
Accuracy: 0.5906432748538012


(0.8611111111111112, 0.7543859649122807)

In [288]:

def get_signal_data(ticker_dict, window, model):
    trade_data = []
    for ticker in ticker_dict.keys():
        dataset = ticker_dict[ticker]
        for i in range(20, len(dataset)):
            if dataset['SMA'][i - 6] <= dataset['LMA'][i - 6] and dataset['SMA'][i - 5] > dataset['LMA'][i - 5]:
                lma_change = (dataset['LMA'][i] - dataset['LMA'][i - 10]) / dataset['LMA'][i - 10] / 10
                sma_change = (dataset['SMA'][i] - dataset['SMA'][i - 10]) / dataset['SMA'][i - 10] / 10
                ma_change = (dataset['MA'][i] - dataset['MA'][i - 20]) / dataset['MA'][i - 20]

                if(lma_change > .002 and model.predict_proba(np.array([lma_change, sma_change]).reshape(1, -1))[0, 1] > .75):
                    date = dataset['Date'][i]
                    sell = 0
                    # sell = dataset['Open'][i + 1] * 1.1 if any(value > dataset['Open'][i + 1] * 1.1 for value in future) else dataset['Close'][i + 80]
                    trade_data.append((date, ticker))
    return trade_data

def create_trade_signals(ticker_dict, window, initial_capital, trade_amount, model):
    capital = initial_capital
    trade_data = get_signal_data(ticker_dict, window, model)
    # up_data = get_upward_data(ticker_dict, window)
    # down_data = get_downward_data(ticker_dict, window)

    # trade_data += up_data
    # trade_data += down_data

    trade_df = pd.DataFrame(trade_data, columns=['Date', 'Ticker'])
    sorted_df = trade_df.sort_values(by='Date')
    sorted_df = sorted_df.reset_index(drop=True)

    date_range = pd.date_range(start= "2013-09-06", end= "2023-09-06")
    date_list = date_range.tolist()

    # positions = []

    wins = []
    loss = []


    to_buy = []
    positions = []
    to_sell = []

    loss = []
    wins = []

    dates = []
    portfolio = []

    for i in date_list:
        # Buy positions the next day

        for asset in to_buy:
            asset_df = ticker_dict[asset[0]]
            index = list(asset_df['Date']).index(asset[1])
            if(i == (asset_df['Date'][index + 1])):
                print("{} bought on {}: {:.3f}".format(asset[0], i, asset_df['Open'][index + 1]))
                capital -= trade_amount
                positions.append([asset[0], asset_df['Open'][index + 1], 20])
                to_buy.remove(asset)




        # Sell positions

        for asset in to_sell:
            asset_df = ticker_dict[asset[0][0]]
            if i == asset_df['Date'][asset[1]]:
                sell_price = asset_df['Open'][asset[1]]
                buy_price = asset[0][1]
                capital += trade_amount * (sell_price / buy_price)
                print("{} sold on {}: {:.3f}, move: {:.3f}".format(asset[0][0], i, sell_price, sell_price / buy_price - 1))
                to_sell.remove(asset)
                positions.remove(asset[0])
                move = sell_price / buy_price - 1
                if move < 0:
                    loss.append(move)
                if move > 0:
                    wins.append(move)
                


        # Mark position for selling

        for asset in positions:
            asset_df = ticker_dict[asset[0]]
            if i in asset_df['Date']:
                index = list(asset_df['Date']).index(i)
                asset[2]-= 1
                if asset_df['High'][index] > asset[1] * 1.05:
                    to_sell.append([asset, index + 1])
                    print("Sell signal: {} on {}".format(asset[0], i))
                elif asset[2] == 0:
                    to_sell.append([asset, index + 1])
                    print("Hold limit reached for {}".format(asset[0]))
                # elif asset_df['Low'][index] < asset[1] * .85:
                #     to_sell.append([asset, index + 1])
                #     print("Stop loss triggered for {}".format(asset[0]))
                






        # Create buy signal
        for n in range(len(sorted_df['Date'])):
            if sorted_df['Date'][n] == i and capital > trade_amount:
                # capital -= trade_amount
                print("Buy signal: {} on {}".format(sorted_df['Ticker'][n], sorted_df['Date'][n]))
                to_buy.append((sorted_df['Ticker'][n], sorted_df['Date'][n]))

                # positions.append([sorted_df['Buy'][n], sorted_df['Sell'][n], sorted_df['Sell_date'][n], sorted_df['Hold'][n], sorted_df['Ticker'][n]])

        portfolio.append((capital + len(positions) * trade_amount))
        dates.append(i)

    print((capital + len(positions) * trade_amount) / initial_capital)
    print(len(wins))
    print(len(loss))
    print(np.mean(wins))
    print(np.mean(loss))
    # plt.plot(dates, portfolio)

    return (capital + len(positions) * trade_amount) / initial_capital

create_trade_signals(common_mid, window, 7000, 1500, model)

Buy signal: PLUG on 2020-10-12 00:00:00
PLUG bought on 2020-10-13 00:00:00: 17.300
Sell signal: PLUG on 2020-11-05 00:00:00
PLUG sold on 2020-11-06 00:00:00: 18.500, move: 0.069
Buy signal: XPEV on 2020-11-10 00:00:00
XPEV bought on 2020-11-11 00:00:00: 31.260
Sell signal: XPEV on 2020-11-11 00:00:00
Buy signal: TLRY on 2020-11-11 00:00:00
TLRY bought on 2020-11-12 00:00:00: 7.700
XPEV sold on 2020-11-12 00:00:00: 36.480, move: 0.167
Sell signal: TLRY on 2020-11-12 00:00:00
TLRY sold on 2020-11-13 00:00:00: 7.740, move: 0.005
Buy signal: LYFT on 2020-11-18 00:00:00
LYFT bought on 2020-11-19 00:00:00: 39.600
Buy signal: DVN on 2020-11-19 00:00:00
DVN bought on 2020-11-20 00:00:00: 13.500
Sell signal: DVN on 2020-11-23 00:00:00
DVN sold on 2020-11-24 00:00:00: 15.040, move: 0.114
Buy signal: MRO on 2020-11-25 00:00:00
MRO bought on 2020-11-27 00:00:00: 6.460
Buy signal: OXY on 2020-11-27 00:00:00
Buy signal: C on 2020-11-27 00:00:00
Buy signal: RIG on 2020-11-27 00:00:00
OXY bought on 20

1.3211273940748816

In [289]:
short_term = []
mid_term = []

for i in range (10):
    model = train_model(common_long, large_window, small_window, window)
    short_term.append(create_trade_signals(common_short, window, 7000, 1500, model))
    mid_term.append(create_trade_signals(common_mid, window, 7000, 1500, model))

print(np.mean(short_term))
print(np.mean(mid_term))

Amount of trades 38
Amount of successful trades 38
Accuracy: 0.8048780487804879
Buy signal: YMM on 2022-12-08 00:00:00
YMM bought on 2022-12-09 00:00:00: 8.750
Sell signal: YMM on 2022-12-13 00:00:00
YMM sold on 2022-12-14 00:00:00: 8.410, move: -0.039
Buy signal: IQ on 2022-12-15 00:00:00
IQ bought on 2022-12-16 00:00:00: 3.710
Sell signal: IQ on 2022-12-16 00:00:00
IQ sold on 2022-12-19 00:00:00: 3.780, move: 0.019
Buy signal: LYFT on 2023-01-26 00:00:00
LYFT bought on 2023-01-27 00:00:00: 15.230
Sell signal: LYFT on 2023-01-27 00:00:00
LYFT sold on 2023-01-30 00:00:00: 15.590, move: 0.024
Buy signal: CVNA on 2023-05-19 00:00:00
CVNA bought on 2023-05-22 00:00:00: 10.765
Sell signal: CVNA on 2023-05-22 00:00:00
Buy signal: OPEN on 2023-05-22 00:00:00
OPEN bought on 2023-05-23 00:00:00: 2.500
CVNA sold on 2023-05-23 00:00:00: 11.580, move: 0.076
Sell signal: OPEN on 2023-05-23 00:00:00
OPEN sold on 2023-05-24 00:00:00: 2.370, move: -0.052
Buy signal: SOFI on 2023-06-12 00:00:00
SOFI b