In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import my_afml_fncs as my_afml
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

LOAD DATA

In [2]:
filename_end="2019_2024"
dollar_bars_df=pd.read_csv(f'tick_data/bars/dollar_bars_barchart_{filename_end}.csv')
dollar_bars_df['bar_number'] = range(len(dollar_bars_df))

Convert to datetime and set it to index

In [3]:
dollar_bars_df['Date']=pd.to_datetime(dollar_bars_df['Date'])
dollar_bars_df.set_index('Date',inplace=True)
dollar_bars_df.sort_index(inplace=True)

Create an array of daily vols

In [4]:
dollar_vol=my_afml.getDailyVol(dollar_bars_df.Close)

Build a cusum filter as a possible feature. Used earlier in 3.1

In [5]:
# dollar_es_cusum_events=my_afml.getTEvents(dollar_bars_df.Close,dollar_vol.mean())
# print('Dollar bar CUSUM events:',dollar_es_cusum_events.shape)

Define moving average params

In [6]:
# Start of 3.4
# Define moving average windows
short_window = 20  # e.g., 50 periods
long_window = 50  # e.g., 200 periods

Create events array where the moving average switches from in to out for dollar bars


In [7]:
# # create events where the moving averages cross over
# dollar_events = my_afml.movingAverageCrossover(dollar_bars_df.Close, short_window, long_window)
# # try this with a crossover sign change
# dollar_events['signal_switch'] = dollar_events.signal.diff() / 2
# # drop rows with any NaN values
# dollar_events = dollar_events.dropna()
# # filter rows where signal_switch is not zero
# dollar_events = dollar_events[dollar_events['signal_switch'] != 0]

In [8]:
# add the crossover data and std to volume_bars for the secondary model
dollar_bars_df[['short_ma', 'long_ma', 'mov_avg_signal']] = my_afml.movingAverageCrossover(dollar_bars_df.Close, short_window, long_window)
dollar_bars_df['1d_std']=my_afml.getDailyVol(dollar_bars_df.Close)
dollar_bars_df['correl']=dollar_bars_df['Close'].pct_change().autocorr(lag=1)


Mean  reverting strategy with Bollinger Bands

In [9]:
# Parameters
bb_window = 100
bb_num_std = 2

# Calculate Bollinger Bands
dollar_bars_df['SMA'] = dollar_bars_df['Close'].rolling(window=bb_window).mean()
dollar_bars_df['Rolling_STD'] = dollar_bars_df['Close'].rolling(window=bb_window).std()
dollar_bars_df['bollinger_upper'] = dollar_bars_df['SMA'] + (dollar_bars_df['Rolling_STD'] * bb_num_std)
dollar_bars_df['bollinger_lower'] = dollar_bars_df['SMA'] - (dollar_bars_df['Rolling_STD'] * bb_num_std)

# Step 1: Generate Initial Signals
dollar_bars_df['mean_reverting_signal'] = 0
dollar_bars_df.loc[dollar_bars_df['Close'] > dollar_bars_df['bollinger_upper'], 'mean_reverting_signal'] = -1
dollar_bars_df.loc[dollar_bars_df['Close'] < dollar_bars_df['bollinger_lower'], 'mean_reverting_signal'] = 1

In [10]:

# Step 2: Filter Out Consecutive Duplicate Signals
dollar_bars_df['prev_signal'] = dollar_bars_df['mean_reverting_signal'].shift(1)
signal_condition = (dollar_bars_df['mean_reverting_signal'] != 0) & (dollar_bars_df['prev_signal'] == 0)
dollar_bars_df['bb_trade_signal'] = np.where(signal_condition, dollar_bars_df['mean_reverting_signal'], 0)
dollar_bars_df.dropna(inplace=True)
dollar_bars_df.drop(columns=['prev_signal'], inplace=True)

In [11]:
bb_events=dollar_bars_df[['bar_number','Close','bb_trade_signal']]
bb_events=bb_events.dropna()
bb_events=bb_events[bb_events['bb_trade_signal']!=0]
bb_t1=my_afml.getVb(dollar_bars_df.Close,bb_events.index)

In [12]:
# now set ptsl to be lopsided [0,2] which we can do because we now have a 'side' from the bb. Trigger has column called 'side'
ptsl=[0,2] #zero mean no barrier, so we have no profit target but a 2* stop loss
# target is set to be standard deviation
trgt= dollar_vol * dollar_bars_df.Close
# trgt is variable but we can set a fixed minimum return
minRet=0.00003
numThread=128
# get events will find time of first touch after each event
bb_trigger_secondary_model=my_afml.getEvents(dollar_bars_df.Close, bb_events.index, ptsl, trgt, minRet, numThread, bb_t1, bb_events.bb_trade_signal)

Running processJobs


2025-03-11 16:54:45.960306 100.0% applyPtSlOnT1 done after 0.2 minutes. Remaining 0.0 minutes.


In [13]:
bb_bins_secondary_model=my_afml.getBins(bb_trigger_secondary_model,dollar_bars_df.Close)

In [14]:
# now fit same data using these labels
bb_X=dollar_bars_df.loc[bb_bins_secondary_model.index,:]
bb_y=bb_bins_secondary_model['bin']

In [17]:
# Prepare the data
features = bb_X[['correl','1d_std','mov_avg_signal']]
features.reset_index(drop=True, inplace=True)
# features = features.drop(columns=['Symbol'])
labels = bb_y  # Target variable

In [18]:
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=42)

# Train RandomForestClassifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Predict and evaluate
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print(classification_report(y_test, dollar_bars_df.loc[y_test.index]))

Accuracy: 0.5019710906701709
              precision    recall  f1-score   support

         0.0       0.51      0.50      0.51      6541
         1.0       0.50      0.50      0.50      6396

    accuracy                           0.50     12937
   macro avg       0.50      0.50      0.50     12937
weighted avg       0.50      0.50      0.50     12937



In [24]:

dollar_bars_df.loc[y_test.index]['bb_trade_signal']

Date
2022-08-26 09:29:00    1
2020-03-18 13:10:00    1
2019-04-17 14:14:00   -1
2021-05-04 11:06:00   -1
2023-09-14 17:50:00   -1
                      ..
2020-04-17 14:18:00    1
2022-02-16 10:22:00    1
2021-07-06 14:35:00    1
2023-10-26 17:11:00    1
2021-11-24 20:20:00   -1
Name: bb_trade_signal, Length: 12937, dtype: int64

In [23]:
y_test

Date
2022-08-26 09:29:00    0.0
2020-03-18 13:10:00    1.0
2019-04-17 14:14:00    1.0
2021-05-04 11:06:00    1.0
2023-09-14 17:50:00    1.0
                      ... 
2020-04-17 14:18:00    1.0
2022-02-16 10:22:00    0.0
2021-07-06 14:35:00    1.0
2023-10-26 17:11:00    1.0
2021-11-24 20:20:00    0.0
Name: bin, Length: 12937, dtype: float64