In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime

In [2]:
df = pd.read_csv('./data/stock1-cleaned.csv')

# Cleaning Columns

In [3]:
df.columns

Index(['Date', 'Time', 'Open', 'High', 'Low', 'Close', 'Volume',
       'Simple Moving Average - SMA - SMA(20)',
       'Simple Moving Average - SMA - SMA(50)',
       'Simple Moving Average - SMA - SMA(200)',
       'Exponential Moving Average - EMA - EMA(9,Modern)',
       'Exponential Moving Average - EMA - EMA(61,Modern)',
       'Relative Strength Index - RSI - RSI(14)', 'MACD - MACD(12,26,9)',
       'MACD - Histogram', 'MACD - Signal Line(MACD(12,26,9),9,Modern)'],
      dtype='object')

In [4]:
df.columns.str.strip()

Index(['Date', 'Time', 'Open', 'High', 'Low', 'Close', 'Volume',
       'Simple Moving Average - SMA - SMA(20)',
       'Simple Moving Average - SMA - SMA(50)',
       'Simple Moving Average - SMA - SMA(200)',
       'Exponential Moving Average - EMA - EMA(9,Modern)',
       'Exponential Moving Average - EMA - EMA(61,Modern)',
       'Relative Strength Index - RSI - RSI(14)', 'MACD - MACD(12,26,9)',
       'MACD - Histogram', 'MACD - Signal Line(MACD(12,26,9),9,Modern)'],
      dtype='object')

In [11]:
# Cleanup the columns.
df.columns = df.columns.str.strip()

In [6]:
df.iloc[0]

Date                                                 06/26/2025
Time                                                    4:00 PM
Open                                                    5601.04
High                                                    5601.04
Low                                                     5597.09
Close                                                   5597.09
Volume                                                      431
Simple Moving Average - SMA - SMA(20)                       0.0
Simple Moving Average - SMA - SMA(50)                       0.0
Simple Moving Average - SMA - SMA(200)                      0.0
Exponential Moving Average - EMA - EMA(9,Modern)            0.0
Exponential Moving Average - EMA - EMA(61,Modern)           0.0
Relative Strength Index - RSI - RSI(14)                     0.0
MACD - MACD(12,26,9)                                        0.0
MACD - Histogram                                            0.0
MACD - Signal Line(MACD(12,26,9),9,Moder

In [7]:
df.iloc[3]

Date                                                 06/27/2025
Time                                                    9:31 AM
Open                                                    5617.75
High                                                    5617.75
Low                                                     5613.64
Close                                                   5613.64
Volume                                                     3406
Simple Moving Average - SMA - SMA(20)                       0.0
Simple Moving Average - SMA - SMA(50)                       0.0
Simple Moving Average - SMA - SMA(200)                      0.0
Exponential Moving Average - EMA - EMA(9,Modern)            0.0
Exponential Moving Average - EMA - EMA(61,Modern)           0.0
Relative Strength Index - RSI - RSI(14)                     0.0
MACD - MACD(12,26,9)                                        0.0
MACD - Histogram                                            0.0
MACD - Signal Line(MACD(12,26,9),9,Moder

In [8]:
df.iloc[-1]

Date                                                  07/02/2025
Time                                                     4:01 PM
Open                                                     5701.76
High                                                     5701.76
Low                                                      5701.76
Close                                                    5701.76
Volume                                                     32025
Simple Moving Average - SMA - SMA(20)                 5693.27975
Simple Moving Average - SMA - SMA(50)                 5683.24615
Simple Moving Average - SMA - SMA(200)               5707.510588
Exponential Moving Average - EMA - EMA(9,Modern)     5697.200763
Exponential Moving Average - EMA - EMA(61,Modern)    5685.803471
Relative Strength Index - RSI - RSI(14)                58.675792
MACD - MACD(12,26,9)                                    5.035659
MACD - Histogram                                        0.406712
MACD - Signal Line(MACD(1

In [9]:
# See https://docs.python.org/3/library/datetime.html
# The hour is not zero padded, so %-I
df['DateTime'] = pd.to_datetime(df['Date'] + ' ' + df['Time'], format='%m/%d/%Y %I:%M %p')

In [10]:
df.head(1)

Unnamed: 0,Date,Time,Open,High,Low,Close,Volume,Simple Moving Average - SMA - SMA(20),Simple Moving Average - SMA - SMA(50),Simple Moving Average - SMA - SMA(200),"Exponential Moving Average - EMA - EMA(9,Modern)","Exponential Moving Average - EMA - EMA(61,Modern)",Relative Strength Index - RSI - RSI(14),"MACD - MACD(12,26,9)",MACD - Histogram,"MACD - Signal Line(MACD(12,26,9),9,Modern)",DateTime
0,06/26/2025,4:00 PM,5601.04,5601.04,5597.09,5597.09,431,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2025-06-26 16:00:00


# Create forward-looking price targets

In [24]:
# Original, moved down, moved up.
df['Close'], df['Close'].shift(1), df['Close'].shift(-1)

(0      5597.090
 1      5596.000
 2      5596.000
 3      5613.640
 4      5616.305
          ...   
 528    5696.310
 529    5697.550
 530    5699.825
 531    5699.990
 532    5701.760
 Name: Close, Length: 533, dtype: float64,
 0           NaN
 1      5597.090
 2      5596.000
 3      5596.000
 4      5613.640
          ...   
 528    5698.150
 529    5696.310
 530    5697.550
 531    5699.825
 532    5699.990
 Name: Close, Length: 533, dtype: float64,
 0      5596.000
 1      5596.000
 2      5613.640
 3      5616.305
 4      5623.450
          ...   
 528    5697.550
 529    5699.825
 530    5699.990
 531    5701.760
 532         NaN
 Name: Close, Length: 533, dtype: float64)

In [21]:
def create_price_targets(df, profit_per_trade=10, windows=[5, 10, 15]):
    """
    ARGS
    ----
    profit_per_trade: desired profit per trade. Default to 10$.
    windows: a list of time differences in minutes (or almost minutes).
    """
    for window in windows:
        # Take the diff between current and future values.
        # NOTE: we are doing this to explore features.
        # We should do the shift in the other direction for the backtest.
        df[f'price_change_{window}min'] = df['Close'].shift(-window) - df['Close']

        # Look for periods where move in price was in our favour
        # for either a long or a short position.
        df[f'big_move_{window}min'] = (
                abs(
                    df[f'price_change_{window}min']
                ) > profit_per_trade
        ).astype(int)

        # If the change is +10, the set 1 to mark a bearish stance.
        # If change is -10, then set a -1 for bullish.
        # Else, we sideways and we set a 0.
        df[f'direction_{window}min'] = np.where(
            df[f'price_change_{window}min'] > 10,
            1,
            np.where(df[f'price_change_{window}min'] < -10, -1, 0),
        )
    
    # Drop rows where we can't calculate any targets
    df = df.dropna(subset=[f'direction_{max(windows)}min'])
    return df

In [22]:
df = create_price_targets(df)

In [23]:
df.head(2)

Unnamed: 0,Date,Time,Open,High,Low,Close,Volume,Simple Moving Average - SMA - SMA(20),Simple Moving Average - SMA - SMA(50),Simple Moving Average - SMA - SMA(200),...,DateTime,price_change_5min,big_move_5min,direction_5min,price_change_10min,big_move_10min,direction_10min,price_change_15min,big_move_15min,direction_15min
0,06/26/2025,4:00 PM,5601.04,5601.04,5597.09,5597.09,431,0.0,0.0,0.0,...,2025-06-26 16:00:00,26.36,1,1,30.97,1,1,59.07,1,1
1,06/26/2025,4:01 PM,5596.0,5596.0,5596.0,5596.0,37413,0.0,0.0,0.0,...,2025-06-26 16:01:00,26.435,1,1,42.46,1,1,64.3,1,1


In [26]:
df.tail(6)

Unnamed: 0,Date,Time,Open,High,Low,Close,Volume,Simple Moving Average - SMA - SMA(20),Simple Moving Average - SMA - SMA(50),Simple Moving Average - SMA - SMA(200),...,DateTime,price_change_5min,big_move_5min,direction_5min,price_change_10min,big_move_10min,direction_10min,price_change_15min,big_move_15min,direction_15min
527,07/02/2025,3:56 PM,5696.71,5698.15,5696.71,5698.15,300,5687.392125,5677.888928,5709.216978,...,2025-07-02 15:56:00,3.61,0,0,,0,0,,0,0
528,07/02/2025,3:57 PM,5695.53,5696.31,5693.05,5696.31,400,5688.208125,5678.777378,5708.752328,...,2025-07-02 15:57:00,,0,0,,0,0,,0,0
529,07/02/2025,3:58 PM,5691.975,5702.12,5691.975,5697.55,400,5689.450125,5679.770178,5708.258063,...,2025-07-02 15:58:00,,0,0,,0,0,,0,0
530,07/02/2025,3:59 PM,5700.57,5700.57,5699.28,5699.825,300,5690.542875,5680.958578,5708.104338,...,2025-07-02 15:59:00,,0,0,,0,0,,0,0
531,07/02/2025,4:00 PM,5700.365,5700.41,5695.44,5699.99,1100,5691.991875,5682.010978,5707.771788,...,2025-07-02 16:00:00,,0,0,,0,0,,0,0
532,07/02/2025,4:01 PM,5701.76,5701.76,5701.76,5701.76,32025,5693.27975,5683.24615,5707.510588,...,2025-07-02 16:01:00,,0,0,,0,0,,0,0


In [51]:
# Quick correlation check.
indicators = [
    'Simple Moving Average - SMA - SMA(20)',
    'Simple Moving Average - SMA - SMA(50)',
    'Simple Moving Average - SMA - SMA(200)',
    'Exponential Moving Average - EMA - EMA(9,Modern)',
    'Exponential Moving Average - EMA - EMA(61,Modern)',
    'Relative Strength Index - RSI - RSI(14)', 'MACD - MACD(12,26,9)',
    'MACD - Histogram', 'MACD - Signal Line(MACD(12,26,9),9,Modern)'
]

for indicator in indicators:
    if indicator in df.columns:
        corr_5min = df[indicator].corr(df['direction_5min'])
        corr_10min = df[indicator].corr(df['direction_10min'])
        print(f"{indicator[:80]:<50} 5min: {corr_5min:>3.3f} 10min: {corr_10min:>3.3f}")

Simple Moving Average - SMA - SMA(20)              5min: -0.200 10min: -0.182
Simple Moving Average - SMA - SMA(50)              5min: -0.161 10min: -0.237
Simple Moving Average - SMA - SMA(200)             5min: -0.048 10min: -0.019
Exponential Moving Average - EMA - EMA(9,Modern)   5min: -0.160 10min: -0.158
Exponential Moving Average - EMA - EMA(61,Modern)  5min: -0.132 10min: -0.160
Relative Strength Index - RSI - RSI(14)            5min: -0.184 10min: -0.166
MACD - MACD(12,26,9)                               5min: -0.061 10min: -0.005
MACD - Histogram                                   5min: -0.102 10min: -0.134
MACD - Signal Line(MACD(12,26,9),9,Modern)         5min: -0.043 10min: 0.003
