In [None]:
import glob
import os
import time
import urllib.parse
from datetime import datetime, date

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pytz
import tensorflow as tf
from astral import LocationInfo
from astral.sun import sun
from keras import layers, models
from prophet import Prophet
from sklearn import preprocessing
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import LabelEncoder, StandardScaler
from xgboost import XGBClassifier

import strat_defs # custom functions
import prep_data

In [None]:
def gen_powerset(some_list):
    powerset = [[]]

    for i in some_list:
        powerset += [x + [i] for x in powerset]

    return powerset

In [None]:
sp_df_files = glob.glob('sp_df_*.csv')
sp_df_latest = max(sp_df_files, key=os.path.getctime)
sp_df_raw = pd.read_csv(sp_df_latest)

In [None]:
set(sp_df_raw['GICS Sector'])

In [None]:
sp_df_raw

### Set configuration and parameters for model testing

In [None]:
# Indicator Configuration
moving_average_config = prep_data.MovingAverageConfig(short_window=10, long_window=50)
bollinger_config = prep_data.BollingerConfig(window=90, num_std=3.0)
macd_config = prep_data.MACDConfig(short_window=12, long_window=26)

# Backtest Configuration
keras_config = strat_defs.KerasConfig(proba=0.5, sequence_length=30, epochs=20)
proba_config = strat_defs.ProbaConfig(knn = 0.5, logit = 0.5, mlp = 0.5, rf = 0.5, svc = 0.5, xgboost = 0.5)

backtest_config = strat_defs.BacktestConfig(
    logit_warm_start = False, # true with drop tickers = false fails. not sure why.
    proba = proba_config,
    keras = keras_config
)

**s_date**\
data start date S&P 500 minimum is 1993-01-29, Wikipedia page views minimum is 2015-07-01

**exclude_vars**\
enum {"Open","High","Low","Close","Adj Close","Volume","movement", "views"}

**strategy_list**\
enum {"Hold","KNN","Logit","MLP","RandomForest","SVC_proba","XGBoost","Keras","Perfection"}

From fast to slow (usually): Hold, Perfection, KNN, Logit, XGBoost, SVC_proba(200s), MLP (2978s), Keras(4048s)

For Best Prediction, I'm removing strategies that do not give a probability

\
*If testing Breakout, "High" and "Low" cannot be excluded*

In [None]:
s_date = "2015-07-01"
exclude_vars = ("Open","High","Low","Close","Adj Close","Volume")

# initial_train_period = 1000
# initial_train_period = 1890 # 2015-07-01 start predicting in 2023
initial_train_period = 2140 # 2015-07-01 start predicting in 2024
# initial_train_period = 7535 # 1993-01-29 start predicting in 2024
bko_window = 20
random_state = 42
n_jobs = None

# Stocks to test
# IPO after 2015-07-01: CRWD, CTVA, DAY
to_test = ['SPY','A','AAPL','ABBV','ABT','ACGL','ACN','ADBE','ADI','ADM','ADP','ADSK','AEE','AEP']#,
           # 'AES','AFL','AIG','AIZ','AJG','AKAM','ALB','ALGN','ALL','ALLE','AMAT','AMCR','AMD','AME',
           # 'AMGN','AMP','AMT','AMZN','ANET','ANSS','AON','AOS','APA','APD','APH','APO','APTV','ARE',
           # 'ATO','AVB','AVGO','AVY','AWK','AXON','AXP','AZO','BA','BAC','BALL','BAX','BBY','BDX',
           # 'BEN','BG','BIIB','BK','BKNG','BKR','BLDR','BLK','BMY','BR','BRO','BSX','BWA','BX','BXP',
           # 'C','CAG','CAH','CAT','CB','CBOE','CBRE','CCI','CCL','CDNS','CDW','CE','CF',
           # 'CFG','CHD','CHRW','CHTR','CI','CINF','CL','CLX','CMCSA','CME','CMG','CMI','CMS','CNC',
           # 'CNP','COF','COO','COP','COR','COST','CPAY','CPB','CPRT','CPT','CRL','CRM','CSCO',
           # 'CSGP','CSX','CTAS','CTRA','CTSH','CVS','CVX','CZR','D','DAL','DD','DE']
                                                     
# Strategies to test
strategy_list = ["Hold","KNN","Logit"]

### Run models

In [None]:
%%time
strat_bds, strat_mods = {}, {}
for ticker in to_test:
    print(f'\n\033[1m{ticker}\033[0m')
    indicator_config = prep_data.IndicatorConfig(ticker=ticker)
    prepd_data = prep_data.prep_data(config=indicator_config, drop_tickers=False)
    
    df_for_chart = prepd_data.loc[prepd_data['Date']>=s_date].reset_index(drop=True)
    df_for_chart = df_for_chart.drop(columns=[
        col for col in df_for_chart.columns 
        if any(col.startswith(prefix) for prefix in exclude_vars) and col != indicator_config.target+"_"+indicator_config.ticker
    ])
    df_for_chart = df_for_chart.dropna(axis='columns') # drop columns with an na
    
    print(f'Training on data set with {len(df_for_chart)} rows and {df_for_chart.shape[1]-1} features\n')
    
    # Calculate portfolio value over time
    for strat in strategy_list:
        start_time = time.time()
        print(f'{strat}', end=" ")
        backtested_data,model,score = strat_defs.backtest_strategy(data=df_for_chart,
                                                                   strategy=strat,
                                                                   target='Adj Close',
                                                                   ticker=ticker,
                                                                   config=backtest_config,
                                                                   initial_train_period=initial_train_period,
                                                                   bko_window=bko_window,
                                                                   random_state=random_state,
                                                                   n_jobs=n_jobs)
        end_time = time.time()
        print(f'score = {score}, time = {end_time-start_time}')
        
        strat_bds[f'{ticker}_{strat}'] = backtested_data
        strat_mods[f'{ticker}_{strat}'] = model

In [None]:
proba_strats = [x for x in strategy_list if x not in ["Hold"]]

combos = gen_powerset(proba_strats)
combos.remove([])

mod_mod_dic = {}
for mods in combos:
    mod_mod = strat_bds[to_test[0]+"_"+mods[0]][['Date']]
    for ticker in to_test:
        # Model of models
        df_prev = strat_bds[ticker+"_"+mods[0]][['Date','Daily_Return','Target','proba_1','Signal']]
        df_prev = df_prev.rename(columns={'Daily_Return': f'Daily_Return_{ticker}'
                                          ,'Target': f'{ticker}_Target'
                                          ,'proba_1': f'{ticker}_proba_1{mods[0]}'
                                          ,'Signal': f'{ticker}_Signal_{mods[0]}'})
        mod_mod = mod_mod.merge(df_prev,on='Date')
        
        for i in mods[1:]:
            df = strat_bds[ticker+"_"+i][['Date','proba_1','Signal']].rename(columns={'proba_1': f'{ticker}_proba_1{i}'
                                                                                      ,'Signal': f'{ticker}_Signal_{i}'})
            mod_mod = mod_mod.merge(df,on='Date')
    
    proba_cols = [col for col in mod_mod.columns if 'proba_1' in col]

    mod_mod['proba_1max'] = mod_mod[proba_cols].max(axis=1)
    
    mod_mod['proba_1max_col'] = mod_mod[proba_cols].idxmax(axis=1,skipna=True)
    mod_mod['proba_1max_ticker'] = "Daily_Return_"+mod_mod['proba_1max_col'].str.split('_').str[0]
    
    mod_mod['proba_1max_ticker_Daily_return'] = mod_mod.apply(
        lambda row: row[row['proba_1max_ticker']] if pd.notnull(row['proba_1max_col']) else row['Daily_Return_SPY'], axis=1
    )

    mod_mod['Strategy_Return'] = mod_mod['proba_1max_ticker_Daily_return']

    mod_mod.loc[mod_mod['proba_1max'] < 0.7, 'Strategy_Return'] = mod_mod['Daily_Return_SPY']
    mod_mod.loc[0, 'Strategy_Return'] = np.nan

    mod_mod_dic["_".join(mods)] = mod_mod

### Results

In [None]:
initial_capital = 10000 # scalar

# Plot Daily Portfolio Value
fig = go.Figure()

df = strat_bds['SPY_Hold'][initial_train_period:].copy()
df.loc[initial_train_period, 'Strategy_Return'] = 0
df['Portfolio_Value'] = (1 + df['Strategy_Return']).cumprod() * initial_capital

fig.add_trace(go.Scatter(x=df['Date'], y=df['Portfolio_Value'],
                                 mode='lines', name='Portfolio Value SPY_Hold'))

end_val = {'SPY':df['Portfolio_Value'].values[-1]}
for mods in combos:
    df = mod_mod_dic["_".join(mods)][initial_train_period:].copy()
    df.loc[initial_train_period, 'Strategy_Return'] = 0
    df['Portfolio_Value'] = (1 + df['Strategy_Return']).cumprod() * initial_capital
    
    fig.add_trace(go.Scatter(x=df['Date'], y=df['Portfolio_Value'],
                             mode='lines', name=f"Portfolio Value ({'_'.join(mods)})"))
    end_val['_'.join(mods)] = df['Portfolio_Value'].values[-1]

fig.update_layout(title="Portfolio")
fig.show()

In [None]:
pl_df = pd.DataFrame([end_val]).transpose().reset_index()
pl_df = pl_df.rename(columns={0:'end_val','index':'ticker'})
pl_df['vs_SPY'] = pl_df['end_val'] - end_val['SPY']
pl_df.sort_values(by='vs_SPY',ascending=False)

In [None]:
for i in combos:
    j ='_'.join(i)
    pl = end_val[j]-end_val['SPY']

    if pl < 0:
        print(f'{j} P&L: -${pl*-1:,.2f}')
    else:
        print(f'{j} P&L: ${pl:,.2f}')