In [57]:
import pandas as pd
import math
import numpy as np
import talib

import matplotlib.pyplot as plt




from sklearn.linear_model import Ridge


pd.set_option('display.max_columns', None)
# Specify the path to the CSV file
csv_file_path = "./stockData/ES_1day.csv"

# Read the CSV file into a DataFrame
ES_1D = pd.read_csv(csv_file_path)
ES_1D = ES_1D.drop(["Volume" , "Volume2"], axis=1)
ES_1D_filtered = ES_1D[(ES_1D['DateTime'] > '2009-06-01') & (ES_1D['DateTime'] < '2021-01-01')]
ES_1D_filtered = ES_1D_filtered.reset_index(drop=True)
ES_1D = ES_1D_filtered

# Print the DataFrame
print("ES_1D")
print(ES_1D.tail())

ES_1D
        DateTime     Open     High     Low    Close
2920  2020-12-24  3685.25  3696.00  3678.5  3695.00
2921  2020-12-28  3682.00  3732.25  3676.0  3727.50
2922  2020-12-29  3731.00  3747.75  3714.5  3720.00
2923  2020-12-30  3724.50  3738.25  3716.5  3724.25
2924  2020-12-31  3725.00  3753.00  3715.0  3748.75


In [58]:
ES = ES_1D.copy()
def compare_ema(ES, short_period, long_period, column='Close'):
    """
    Compare short-period EMA with long-period EMA using TA-Lib, setting 1 for True and 0 for False.

    Parameters:
    - data: DataFrame containing the stock data.
    - short_period: The period for the short EMA.
    - long_period: The period for the long EMA.
    - column: The column name to calculate EMA on.

    Returns:
    - A DataFrame with added columns for short-period EMA, long-period EMA, and a comparison result as 1 or 0.
    """
    # Calculate EMAs
    data = ES.copy()
    
    short_ema = talib.EMA(data[column].values, timeperiod=short_period)
    long_ema = talib.EMA(data[column].values, timeperiod=long_period)

    # Add EMAs to DataFrame
    data[f'EMA_{short_period}'] = short_ema
    data[f'EMA_{long_period}'] = long_ema

    # Compare short EMA to long EMA and convert True/False to 1/0
    data['Short_above_Long'] = (data[f'EMA_{short_period}'] > data[f'EMA_{long_period}']).astype(int)                   
    ES['EMA_compare_'+str(short_period) + '_' + str(long_period)] = data['Short_above_Long']
    return ES

def sum_ema_comparisons(ES ,desired_name):
    """
    Sum all columns with 'EMA_compare' in their column name to get a total count of EMA crossovers.

    Parameters:
    - ES: DataFrame containing the stock data and EMA comparison columns.

    Returns:
    - The original DataFrame with an added column 'Total_EMA_Comparisons' representing the sum of all EMA comparison columns.
    """
    data = ES.copy()
    
    # Filter columns that contain 'EMA_compare'
    ema_compare_columns = data.filter(like='EMA_compare').columns
    
    # Sum these columns row-wise to get the total count of EMA crossovers
    data[desired_name] = data[ema_compare_columns].sum(axis=1)
    data.drop(columns=ema_compare_columns, inplace=True)
    return data


In [61]:
def get_previous_week_high_low(df):
    # Ensure DateTime is in datetime format
    df['DateTime'] = pd.to_datetime(df['DateTime'])
    
    # Initialize columns for the previous week's highest High and lowest Low
    df['Prev_Week_High'] = np.nan
    df['Prev_Week_Low'] = np.nan

    for i, row in df.iterrows():
        # Get the current date
        current_date = row['DateTime']
        
        # Find the last Monday before the current row's date
        last_monday = current_date - pd.Timedelta(days=current_date.weekday()) + pd.Timedelta(weeks=-1)
        
        # Find the last Friday before the current row's date
        last_friday = last_monday + pd.Timedelta(days=4)
        
        # Filter for the previous week's data (Monday to Friday)
        prev_week_data = df[(df['DateTime'] >= last_monday) & (df['DateTime'] <= last_friday)]
        
        # Calculate the highest High and lowest Low of the previous week
        if not prev_week_data.empty:
            df.at[i, 'Prev_Week_High'] = prev_week_data['High'].max()
            df.at[i, 'Prev_Week_Low'] = prev_week_data['Low'].min()

    return df


def get_current_week_high_low(df):
    # Ensure DateTime is in datetime format
    df['DateTime'] = pd.to_datetime(df['DateTime'])
    
    # Initialize columns for the current week's highest High and lowest Low so far
    df['Curr_Week_High_So_Far'] = np.nan
    df['Curr_Week_Low_So_Far'] = np.nan

    for i, row in df.iterrows():
        current_date = row['DateTime']
        
        # Determine the start of the current week (Monday)
        if current_date.weekday() == 0:  # If it's Monday
            week_start_date = current_date
        else:
            week_start_date = current_date - pd.Timedelta(days=current_date.weekday())
        
        # Filter for the current week's data up to and including the current row's date
        current_week_data = df[(df['DateTime'] >= week_start_date) & (df['DateTime'] <= current_date)]
        
        # Calculate the highest High and lowest Low for the current week so far
        df.at[i, 'Curr_Week_High_So_Far'] = current_week_data['High'].max()
        df.at[i, 'Curr_Week_Low_So_Far'] = current_week_data['Low'].min()

    return df


def add_variable_period_adx(df, period):

    # Calculate ADX using the specified period
    adx_column_name = f'ADX_{period}'  # Dynamic column name based on the period
    df[adx_column_name] = talib.ADX(df['High'], df['Low'], df['Close'], timeperiod=period)
    
    return df

def add_rsi_and_conditions(df):
    # Calculate RSI for periods 2 and 3
    df['RSI_2'] = talib.RSI(df['Close'], timeperiod=2)
    df['RSI_3'] = talib.RSI(df['Close'], timeperiod=3)
    
    # Define conditions for RSI_up and RSI_down
    df['RSI_up'] = ((df['RSI_2'] > 70) | (df['RSI_3'] > 70)).astype(int)
    df['RSI_down'] = ((df['RSI_2'] < 20) | (df['RSI_3'] < 20)).astype(int)
    
    # Drop the original RSI period 2 and RSI period 3 columns
    df.drop(['RSI_2', 'RSI_3'], axis=1, inplace=True)
    
    return df

def add_ema50_conditions(df):
    # Calculate EMA for 50-day period and nearby periods
    for period in [48, 49, 50, 51, 52]:
        df[f'EMA_{period}'] = talib.EMA(df['Close'], timeperiod=period)
    
    # Determine if Close is above or below EMA_50
    df['above_EMA50'] = (df['Close'] > df['EMA_50']).astype(int)
    
    # Determine if any of EMA_48, EMA_49, EMA_50, EMA_51, or EMA_52 is between High and Low
    df['touching_EMA50'] = ((df['EMA_48'] >= df['Low']) & (df['EMA_48'] <= df['High']) |
                          (df['EMA_49'] >= df['Low']) & (df['EMA_49'] <= df['High']) |
                          (df['EMA_50'] >= df['Low']) & (df['EMA_50'] <= df['High']) |
                          (df['EMA_51'] >= df['Low']) & (df['EMA_51'] <= df['High']) |
                          (df['EMA_52'] >= df['Low']) & (df['EMA_52'] <= df['High'])).astype(int)
    
    # Optional: Drop individual EMA columns if not needed
    df.drop([f'EMA_{period}' for period in [48, 49, 50, 51, 52]], axis=1, inplace=True)
    
    return df


def add_ema200_conditions(df):
    # Calculate EMA for the 200-day period and nearby periods
    for period in [198, 199, 200, 201, 202]:
        df[f'EMA_{period}'] = talib.EMA(df['Close'], timeperiod=period)
    
    # Determine if Close is above or below EMA_200
    df['above_EMA200'] = (df['Close'] > df['EMA_200']).astype(int)
    
    # Determine if any of EMA_198, EMA_199, EMA_200, EMA_201, or EMA_202 is between High and Low
    df['touching_EMA200'] = ((df['EMA_198'] >= df['Low']) & (df['EMA_198'] <= df['High']) |
                          (df['EMA_199'] >= df['Low']) & (df['EMA_199'] <= df['High']) |
                          (df['EMA_200'] >= df['Low']) & (df['EMA_200'] <= df['High']) |
                          (df['EMA_201'] >= df['Low']) & (df['EMA_201'] <= df['High']) |
                          (df['EMA_202'] >= df['Low']) & (df['EMA_202'] <= df['High'])).astype(int)
    
    # Optional: Drop individual EMA columns if not needed
    df.drop([f'EMA_{period}' for period in [198, 199, 200, 201, 202]], axis=1, inplace=True)
    
    return df


def add_ema8_conditions(df):
    # Calculate EMA for 8-day period and for the 7, 8, 9-day periods as well
    df['EMA_8'] = talib.EMA(df['Close'], timeperiod=8)
    df['EMA_7'] = talib.EMA(df['Close'], timeperiod=7)
    df['EMA_9'] = talib.EMA(df['Close'], timeperiod=9)
    
    # Determine if Close is above EMA_8
    df['above_EMA8'] = (df['Close'] > df['EMA_8']).astype(int)
    
    # Determine if the high and low range is touching any of EMA_7, EMA_8, or EMA_9
    df['touching_EMA8'] = ((df['EMA_7'] >= df['Low']) & (df['EMA_7'] <= df['High']) |
                          (df['EMA_8'] >= df['Low']) & (df['EMA_8'] <= df['High']) |
                          (df['EMA_9'] >= df['Low']) & (df['EMA_9'] <= df['High'])).astype(int)
    
    df.drop(['EMA_7','EMA_8', 'EMA_9'], axis=1, inplace=True)
    
    return df


def add_bollinger_band_features(df):
    # Calculate Bollinger Bands
    upperband, middleband, lowerband = talib.BBANDS(df['Close'], timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)
    
    # Determine if the high and low range is touching the Bollinger Bands
    df['touching_middle'] = ((df['Low'] <= middleband) & (df['High'] >= middleband)).astype(int)
    df['touching_upper'] = ((df['Low'] <= upperband) & (df['High'] >= upperband)).astype(int)
    df['touching_lower'] = ((df['Low'] <= lowerband) & (df['High'] >= lowerband)).astype(int)
    
    # Calculate the normalized value (close - lower line) / (upper line - lower line)
    # and cap the values at 1.5 and -0.5
    normalized_value = (df['Close'] - lowerband) / (upperband - lowerband)
    normalized_value_capped = normalized_value.clip(-0.5, 1.5)
    
    df['normalized_value'] = normalized_value_capped
    
    return df


def add_engulfing_columns(df):
    # Calculate the Engulfing pattern
    engulfing = talib.CDLENGULFING(df['Open'], df['High'], df['Low'], df['Close'])
    
    # Create Bullish Engulfing column (1 for bullish engulfing, 0 otherwise)
    df['Bullish_Engulfing'] = (engulfing > 0).astype(int)
    
    # Create Bearish Engulfing column (-1 for bearish engulfing, 0 otherwise)
    df['Bearish_Engulfing'] = (engulfing < 0).astype(int)
    
    return df

def add_hammer_invertedhammer_columns_binary(df):
    # Calculate the Hammer pattern and convert to binary (1 for presence, 0 for absence)
    df['Hammer'] = (talib.CDLHAMMER(df['Open'], df['High'], df['Low'], df['Close']) != 0).astype(int)
    
    # Calculate the Inverted Hammer pattern and convert to binary (1 for presence, 0 for absence)
    df['Inverted_Hammer'] = (talib.CDLINVERTEDHAMMER(df['Open'], df['High'], df['Low'], df['Close']) != 0).astype(int)
    
    return df



def add_grouped_star_columns(df):
    # Calculate Morning Doji Star and Morning Star patterns
    morning_doji_star = talib.CDLMORNINGDOJISTAR(df['Open'], df['High'], df['Low'], df['Close'])
    morning_star = talib.CDLMORNINGSTAR(df['Open'], df['High'], df['Low'], df['Close'])
    
    # Group together and mark as 1 if either pattern is identified
    df['Bullish_star'] = ((morning_doji_star != 0) | (morning_star != 0)).astype(int)
    
    # Calculate Evening Doji Star and Evening Star patterns
    evening_doji_star = talib.CDLEVENINGDOJISTAR(df['Open'], df['High'], df['Low'], df['Close'])
    evening_star = talib.CDLEVENINGSTAR(df['Open'], df['High'], df['Low'], df['Close'])
    
    # Group together and mark as 1 if either pattern is identified
    df['Bearish_star'] = ((evening_doji_star != 0) | (evening_star != 0)).astype(int)
    
    return df

def combine_and_clean_candlestick_patterns(df):
    # Combine bullish patterns into a single column
    bullish_columns = ['Bullish_Engulfing', 'Hammer', 'Bullish_star']
    df['bullish_candle_pattern'] = df[bullish_columns].any(axis=1).astype(int)
    
    # Combine bearish patterns into a single column
    bearish_columns = ['Bearish_Engulfing', 'Inverted_Hammer', 'Bearish_star']
    df['bearish_candle_pattern'] = df[bearish_columns].any(axis=1).astype(int)
    
    # Drop the original columns used for combination
    df.drop(bullish_columns + bearish_columns, axis=1, inplace=True)
    
    return df


def add_ema_strong_trend_conditions(df):
    # Calculate the required EMAs
    df['EMA_3'] = talib.EMA(df['Close'], timeperiod=3)
    df['EMA_9'] = talib.EMA(df['Close'], timeperiod=9)
    df['EMA_18'] = talib.EMA(df['Close'], timeperiod=18)
    df['EMA_2'] = talib.EMA(df['Close'], timeperiod=2)
    df['EMA_5'] = talib.EMA(df['Close'], timeperiod=5)
    df['EMA_10'] = talib.EMA(df['Close'], timeperiod=10)
    
    # Add columns based on conditions
    df['ema3_gt_ema9_gt_ema18'] = ((df['EMA_3'] > df['EMA_9']) & (df['EMA_9'] > df['EMA_18'])).astype(int)
    df['ema3_lt_ema9_lt_ema18'] = ((df['EMA_3'] < df['EMA_9']) & (df['EMA_9'] < df['EMA_18'])).astype(int)
    df['ema2_gt_ema5_gt_ema10'] = ((df['EMA_2'] > df['EMA_5']) & (df['EMA_5'] > df['EMA_10'])).astype(int)
    df['ema2_lt_ema5_lt_ema10'] = ((df['EMA_2'] < df['EMA_5']) & (df['EMA_5'] < df['EMA_10'])).astype(int)
    
    # Drop the EMA columns
    df.drop(['EMA_3', 'EMA_9', 'EMA_18', 'EMA_2', 'EMA_5', 'EMA_10'], axis=1, inplace=True)
    
    return df

In [62]:
def grab_high_liq(df, target_date):
    """
    Check the criteria for a single row specified by a date in the 'DateTime' column, with an added condition
    that the past high must be higher than the highs of the bars immediately before and after it, iterating backward
    from 2 days ago to 300 days ago.
    
    Parameters:
    - df: DataFrame with a 'High' column and a 'DateTime' column.
    - target_date: The date string specifying the row to check, matching the format in 'DateTime'.
    
    Returns:
    - True if all criteria are met, False otherwise.
    """
    # Convert target_date to datetime if it's not already
    target_date = pd.to_datetime(target_date)
    print(target_date)
    # Ensure target_date exists in the 'DateTime' column
    if target_date not in pd.to_datetime(df['DateTime']).values:
        raise ValueError("The specified date does not exist in the DataFrame.")

    # Find the row index for the specified date
    row_index = df.index[df['DateTime'] == target_date.strftime('%Y-%m-%d')].tolist()[0]
    
    if row_index == 0 or row_index == 1:  # Need at least 2 days prior to compare
        return False , False

    current_high = df.at[row_index, 'High']
    previous_high = df.at[row_index - 1, 'High']
    current_close = df.at[row_index, 'Close']
    if current_high <= previous_high:
        return False , False

    # Loop backward from 2 days ago to up to 300 days ago, within DataFrame bounds
    for i in range(min(len(df) - 1, row_index - 2), max(0, row_index - 600), -1):
        past_high = df.at[i, 'High']
        
        if past_high > current_high:
            return False , False

        if i > 0 and i < len(df) - 1:  # Check bounds
            high_before = df.at[i - 1, 'High']
            high_after = df.at[i + 1, 'High']

            if previous_high < past_high < current_high and past_high > high_before and past_high > high_after:
                return (current_close > past_high) , (current_close <= past_high)  # All criteria including the peak condition are met

    return False , False

def grab_low_liq(df0, target_date):
    df_buffer = df0.copy()
    df = df0.copy()
    
    df['High'] = 10000 - df_buffer['Low']
    df['Low'] = 10000 - df_buffer['High']
    df['Close'] = 10000 - df_buffer['Close']
    df['Open'] = 10000 - df_buffer['Open']
    #print(target_date , "low")
    a,b = grab_high_liq(df, target_date)
    #print(a,b,target_date )
    return a,b 


In [63]:

def plot_with_all_lines_and_return_ratios(data, datetime_column='DateTime', column='Close', period_length=50, exclude_recent_days=5 , draw_mode= False):
    from sklearn.linear_model import LinearRegression, Ridge

    
    df = data.copy()
    if(len(df) <= period_length + exclude_recent_days ) :
        return None, None, None, None
    # Full data for plotting: Including the excluded days
    plot_data = df.iloc[-(period_length + exclude_recent_days):]

    # Data for regression: Excluding the last 'exclude_recent_days'
    regression_data = plot_data.iloc[:-exclude_recent_days]
    
    # Excluded days data for special plotting and analysis
    excluded_data = plot_data.iloc[-exclude_recent_days:]

    # Prepare the data for Ridge Regression
    X_regression = np.arange(len(regression_data)).reshape(-1, 1)  # Days as numerical features for regression
    y_regression = regression_data[column].values  # Values to fit for regression

    # Fit the Ridge Regression model on the regression_data
    ridge_model = Ridge(alpha=1.0)
    ridge_model.fit(X_regression, y_regression)

    # Predict values using the fitted model for the full data
    X_full = np.linspace(0, len(plot_data)-1, len(plot_data)).reshape(-1, 1)
    y_pred_full = ridge_model.predict(X_full)

    # Calculate residuals for the regression period
    residuals = y_regression - ridge_model.predict(X_regression)

    # Collecting peak and bottom residual dates and values
    peak_indices, peak_values, bottom_indices, bottom_values = [], [], [], []
    if draw_mode:
        plt.figure(figsize=(12, 7))
        plt.plot(plot_data.index, plot_data[column], label='Actual ' + column, marker='o', linestyle='-', color='lightgray')
        plt.plot(plot_data.index, y_pred_full, label='Middle Ridge Regression Line', color='blue', linestyle='--')

        # Plot the excluded days as green dots
        plt.scatter(excluded_data.index, excluded_data[column], color='green', label='Excluded Days', zorder=5)

    quarter_length = len(regression_data) // 4
    for i in range(4):
        start = i * quarter_length
        end = start + quarter_length
        quarter_residuals = residuals[start:end]

        # For peak residuals
        max_residual_index = np.argmax(quarter_residuals)
        peak_indices.append(start + max_residual_index)
        peak_values.append(regression_data[column].iloc[start + max_residual_index])

        # For bottom residuals
        min_residual_index = np.argmin(quarter_residuals)
        bottom_indices.append(start + min_residual_index)
        bottom_values.append(regression_data[column].iloc[start + min_residual_index])

    # Fit and plot lines through the peak and bottom residual points
    if len(peak_indices) > 1 and len(bottom_indices) > 1:
        # Top line (Peak residuals)
        peak_model = LinearRegression()
        peak_model.fit(np.array(peak_indices).reshape(-1, 1), np.array(peak_values))
        y_peak_pred = peak_model.predict(X_full)
        if draw_mode:
            plt.plot(plot_data.index, y_peak_pred, color='red', linestyle='-', label='Top Line Through Peaks')

        # Bottom line (Minimum residuals)
        bottom_model = LinearRegression()
        
        bottom_model.fit(np.array(bottom_indices).reshape(-1, 1), np.array(bottom_values))
        y_bottom_pred = bottom_model.predict(X_full)
        if draw_mode:
            plt.plot(plot_data.index, y_bottom_pred, color='purple', linestyle='-', label='Bottom Line Through Bottoms')

    # Calculate ratios for excluded days relative to top and bottom lines
    excluded_close_prices = excluded_data[column].values
    ratios = (excluded_close_prices - y_bottom_pred[-exclude_recent_days:]) / (y_peak_pred[-exclude_recent_days:] - y_bottom_pred[-exclude_recent_days:])
    ratios_constrained = np.clip(ratios, -0.3, 1.3)  # Apply constraints
    
    if draw_mode:
        plt.title(f'Top and Bottom Lines in Recent {period_length} Days Excluding Last {exclude_recent_days}')
        plt.xlabel('Date')
        plt.ylabel(column + ' Price')
        plt.legend()
        plt.xticks(rotation=45)
        plt.show()
    
    #print(ratios_constrained)
    minimum_point = min(ratios_constrained) 
    max_point = max(ratios_constrained)
    current_point = ratios_constrained[-1]
    #print(minimum_point, max_point , current_point)
    
    # Assuming ridge_model is already fitted as shown in your snippet
    slope = ridge_model.coef_[0]  # Access the first (and in this case, only) coefficient

    #print(f"The slope of the middle regression line is: {slope}")

    total_days = period_length+exclude_recent_days
    closing_price_days_ago = data[column].iloc[-total_days]  # Get the closing price 40 rows before the last
    #print(f"Closing price from n days (rows) before the last: {closing_price_days_ago}")
    normalized_slope = slope*(total_days)/closing_price_days_ago
    #print(f"normalized_slope: {normalized_slope}")
    
    return minimum_point, max_point , current_point , normalized_slope
# Note: Replace 'data' with your actual DataFrame variable when calling the function.
# ratios_constrained = plot_with_all_lines_and_return_ratios(data, 'DateTime', 'Close', 50, 5)
# print(ratios_constrained)

def extract_latest_100days(df0, target_date):
    # Ensure 'DateTime' column is in datetime format
    df = df0.copy()
    
    df['DateTime'] = pd.to_datetime(df['DateTime'])

    # Ensure the DataFrame is sorted by 'DateTime'
    df = df.sort_values('DateTime')

    # Find the index of the row for the target_date
    target_index = df.index[df['DateTime'] == pd.to_datetime(target_date)].tolist()

    # Check if the target_date is found in the DataFrame
    if not target_index:
        raise ValueError(f"The specified date {target_date} does not exist in the DataFrame.")
    target_index = target_index[0]

    # Calculate the starting index
    start_index = max(0, target_index - 100)  # Ensure it does not go below 0

    # Filter out the rows
    result_df = df.iloc[start_index:target_index + 1]

    return result_df

ES_100days = extract_latest_100days(ES_1D, '2019-4-10')
print(ES_100days)
    
plot_with_all_lines_and_return_ratios(ES_100days, datetime_column='DateTime', column='Close', period_length=60, exclude_recent_days=7)



       DateTime     Open     High      Low    Close
2388 2018-11-14  2731.25  2748.25  2686.25  2698.50
2389 2018-11-15  2702.00  2736.75  2671.25  2734.50
2390 2018-11-16  2731.75  2748.75  2708.75  2743.00
2391 2018-11-19  2733.00  2748.00  2681.50  2696.25
2392 2018-11-20  2696.25  2696.75  2631.75  2640.00
...         ...      ...      ...      ...      ...
2484 2019-04-04  2880.75  2885.75  2871.00  2882.75
2485 2019-04-05  2882.50  2898.00  2880.50  2896.00
2486 2019-04-08  2896.25  2899.75  2884.25  2898.25
2487 2019-04-09  2898.50  2900.00  2877.25  2882.50
2488 2019-04-10  2882.00  2896.00  2878.00  2894.50

[101 rows x 5 columns]


(0.32814820057679667,
 0.6458370608311864,
 0.4298946708744972,
 0.1277351647416847)

In [64]:
def feature_engineering_analysis(ES):
    # EMA comparisons for short periods
    for L in range(4, 9):
        ES = compare_ema(ES, 2, L)
    for L in range(5, 9):
        ES = compare_ema(ES, 3, L)
    # Sum up the EMA comparisons for short periods
    ES = sum_ema_comparisons(ES, 'Total_Short_EMA')
    
    # EMA comparisons for longer periods
    for L in range(15, 19):
        ES = compare_ema(ES, 8, L)
    for L in range(15, 19):
        ES = compare_ema(ES, 9, L)
    # Sum up the EMA comparisons for longer periods
    ES = sum_ema_comparisons(ES, 'Total_Long_EMA')
    
    # Trend direction indicators
    # Identify if the current high is higher than the previous high
    ES['Higher_High'] = (ES['High'] > ES['High'].shift(1)).astype(int)
    
    # Identify if the current low is lower than the previous low
    ES['Lower_Low'] = (ES['Low'] < ES['Low'].shift(1)).astype(int)
    
    # Price crossing indicators
    # Identify if the current close is above the previous high
    ES['Close_Above_High'] = (ES['Close'] > ES['High'].shift(1)).astype(int)
    
    # Identify if the current close is below the previous low
    ES['Close_Below_Low'] = (ES['Close'] < ES['Low'].shift(1)).astype(int)
    
    # OHLC average and position within range
    # Calculate the average of Open, High, Low, and Close for each row
    ES['OHLC_Average'] = ES[['Open', 'High', 'Low', 'Close']].mean(axis=1)
    
    # Calculate the highest high and the lowest low from the previous 20 and 40 rows
    ES['Highest_High_20'] = ES['High'].rolling(window=20, min_periods=1).max()
    ES['Lowest_Low_20'] = ES['Low'].rolling(window=20, min_periods=1).min()
    ES['Highest_High_40'] = ES['High'].rolling(window=40, min_periods=1).max()
    ES['Lowest_Low_40'] = ES['Low'].rolling(window=40, min_periods=1).min()
    
    # Calculate the OHLC Average position within the 20 and 40 rows' high-low range
    ES['OHLC_Position_20'] = (ES['OHLC_Average'] - ES['Lowest_Low_20']) / (ES['Highest_High_20'] - ES['Lowest_Low_20'])
    ES['OHLC_Position_40'] = (ES['OHLC_Average'] - ES['Lowest_Low_40']) / (ES['Highest_High_40'] - ES['Lowest_Low_40'])

    # Replace potential NaN or infinity values from division by zero with 0
    ES.replace([np.inf, -np.inf, np.nan], 0, inplace=True)

    # Clean up: Drop intermediate columns used for calculations
    ES.drop(columns=['Highest_High_20', 'Highest_High_40', 'Lowest_Low_20', 'Lowest_Low_40'], inplace=True)
    
    
    ES = get_previous_week_high_low(ES)
    ES = get_current_week_high_low(ES)
    
    ES['Prev_Week_High_Taken'] = ES['Curr_Week_High_So_Far'] > ES['Prev_Week_High']
    ES['Prev_Week_Low_Taken'] = ES['Curr_Week_Low_So_Far'] < ES['Prev_Week_Low']
    
    
    ES['Close_Above_Prev_Week_High'] = ES['Close'] > ES['Prev_Week_High']
    ES['Close_Below_Prev_Week_Low'] = ES['Close'] < ES['Prev_Week_Low']
    
    ES.drop(columns=['Prev_Week_High', 'Prev_Week_Low', 'Curr_Week_High_So_Far', 'Curr_Week_Low_So_Far'], inplace=True)
    ES = add_variable_period_adx(ES , 4)
    ES = add_variable_period_adx(ES , 7)
    ES = add_variable_period_adx(ES , 14)
    
    ES = add_rsi_and_conditions(ES)
    
    ES = add_ema50_conditions(ES)
    
    ES = add_ema200_conditions(ES)
    
    ES = add_ema8_conditions(ES)
    
    ES = add_bollinger_band_features(ES)
    
    ES = add_engulfing_columns(ES)
    
    ES = add_hammer_invertedhammer_columns_binary(ES)
    
    ES = add_grouped_star_columns(ES)
    ES = combine_and_clean_candlestick_patterns(ES)
    
    ES = add_ema_strong_trend_conditions(ES)
    
    
    
    ES['DateTime'] = pd.to_datetime(ES['DateTime'])
    #ES.sort_values('DateTime', inplace=True)

    # Initialize the new columns to False
    ES['lq_close_gt_ph'] = False
    ES['lq_close_lt_ph'] = False
    ES['lq_close_gt_ph_low'] = False
    ES['lq_close_lt_ph_low'] = False
    
    
    
    ES['minimum_point'] = None
    ES['max_point'] = None
    ES['current_point'] = None
    ES['normalized_slope'] = None
    for date in ES['DateTime'].dt.strftime('%Y-%m-%d').unique():
        lq_close_gt_ph, lq_close_lt_ph = grab_high_liq(ES, date)
        lq_close_gt_ph_low , lq_close_lt_ph_low   = grab_low_liq(ES, date)
        # Update the DataFrame only if one of the conditions is True
        if lq_close_gt_ph or lq_close_lt_ph or lq_close_gt_ph_low or lq_close_lt_ph_low:
            ES.loc[ES['DateTime'] == pd.to_datetime(date), 'lq_close_gt_ph'] = lq_close_gt_ph
            ES.loc[ES['DateTime'] == pd.to_datetime(date), 'lq_close_lt_ph'] = lq_close_lt_ph
            ES.loc[ES['DateTime'] == pd.to_datetime(date), 'lq_close_gt_ph_low'] = lq_close_gt_ph_low
            ES.loc[ES['DateTime'] == pd.to_datetime(date), 'lq_close_lt_ph_low'] = lq_close_lt_ph_low
    
    for date in ES['DateTime'].dt.strftime('%Y-%m-%d').unique():      
        ES_100days = extract_latest_100days(ES, date)
        minimum_point, max_point , current_point , normalized_slope = plot_with_all_lines_and_return_ratios(ES_100days, datetime_column='DateTime', column='Close', period_length=60, exclude_recent_days=7)    
        ES.loc[ES['DateTime'] == pd.to_datetime(date), 'minimum_point'] = minimum_point
        ES.loc[ES['DateTime'] == pd.to_datetime(date), 'max_point'] = max_point
        ES.loc[ES['DateTime'] == pd.to_datetime(date), 'current_point'] = current_point
        ES.loc[ES['DateTime'] == pd.to_datetime(date), 'normalized_slope'] = normalized_slope
    
    ES = ES.drop(["OHLC_Average"], axis=1)
    ES['Target_goingUp'] = ES['Higher_High'] * (1- ES['Lower_Low']) 
    ES['Target_goingUp'] = ES['Target_goingUp'].shift(-1)
    
        
    return ES


In [65]:
ES = ES_1D.copy()
# Assuming your DataFrame ES_1D is loaded and contains the stock data
ES = feature_engineering_analysis(ES)

# Display the last few rows to verify the comparison
ES.tail(20)


2009-06-02 00:00:00
2009-06-02 00:00:00
2009-06-03 00:00:00
2009-06-03 00:00:00
2009-06-04 00:00:00
2009-06-04 00:00:00
2009-06-05 00:00:00
2009-06-05 00:00:00
2009-06-08 00:00:00
2009-06-08 00:00:00
2009-06-09 00:00:00
2009-06-09 00:00:00
2009-06-10 00:00:00
2009-06-10 00:00:00
2009-06-11 00:00:00
2009-06-11 00:00:00
2009-06-12 00:00:00
2009-06-12 00:00:00
2009-06-15 00:00:00
2009-06-15 00:00:00
2009-06-16 00:00:00
2009-06-16 00:00:00
2009-06-17 00:00:00
2009-06-17 00:00:00
2009-06-18 00:00:00
2009-06-18 00:00:00
2009-06-19 00:00:00
2009-06-19 00:00:00
2009-06-22 00:00:00
2009-06-22 00:00:00
2009-06-23 00:00:00
2009-06-23 00:00:00
2009-06-24 00:00:00
2009-06-24 00:00:00
2009-06-25 00:00:00
2009-06-25 00:00:00
2009-06-26 00:00:00
2009-06-26 00:00:00
2009-06-29 00:00:00
2009-06-29 00:00:00
2009-06-30 00:00:00
2009-06-30 00:00:00
2009-07-01 00:00:00
2009-07-01 00:00:00
2009-07-02 00:00:00
2009-07-02 00:00:00
2009-07-03 00:00:00
2009-07-03 00:00:00
2009-07-06 00:00:00
2009-07-06 00:00:00


2010-03-26 00:00:00
2010-03-26 00:00:00
2010-03-29 00:00:00
2010-03-29 00:00:00
2010-03-30 00:00:00
2010-03-30 00:00:00
2010-03-31 00:00:00
2010-03-31 00:00:00
2010-04-01 00:00:00
2010-04-01 00:00:00
2010-04-02 00:00:00
2010-04-02 00:00:00
2010-04-05 00:00:00
2010-04-05 00:00:00
2010-04-06 00:00:00
2010-04-06 00:00:00
2010-04-07 00:00:00
2010-04-07 00:00:00
2010-04-08 00:00:00
2010-04-08 00:00:00
2010-04-09 00:00:00
2010-04-09 00:00:00
2010-04-12 00:00:00
2010-04-12 00:00:00
2010-04-13 00:00:00
2010-04-13 00:00:00
2010-04-14 00:00:00
2010-04-14 00:00:00
2010-04-15 00:00:00
2010-04-15 00:00:00
2010-04-16 00:00:00
2010-04-16 00:00:00
2010-04-19 00:00:00
2010-04-19 00:00:00
2010-04-20 00:00:00
2010-04-20 00:00:00
2010-04-21 00:00:00
2010-04-21 00:00:00
2010-04-22 00:00:00
2010-04-22 00:00:00
2010-04-23 00:00:00
2010-04-23 00:00:00
2010-04-26 00:00:00
2010-04-26 00:00:00
2010-04-27 00:00:00
2010-04-27 00:00:00
2010-04-28 00:00:00
2010-04-28 00:00:00
2010-04-29 00:00:00
2010-04-29 00:00:00


2011-02-24 00:00:00
2011-02-25 00:00:00
2011-02-25 00:00:00
2011-02-28 00:00:00
2011-02-28 00:00:00
2011-03-01 00:00:00
2011-03-01 00:00:00
2011-03-02 00:00:00
2011-03-02 00:00:00
2011-03-03 00:00:00
2011-03-03 00:00:00
2011-03-04 00:00:00
2011-03-04 00:00:00
2011-03-07 00:00:00
2011-03-07 00:00:00
2011-03-08 00:00:00
2011-03-08 00:00:00
2011-03-09 00:00:00
2011-03-09 00:00:00
2011-03-10 00:00:00
2011-03-10 00:00:00
2011-03-11 00:00:00
2011-03-11 00:00:00
2011-03-14 00:00:00
2011-03-14 00:00:00
2011-03-15 00:00:00
2011-03-15 00:00:00
2011-03-16 00:00:00
2011-03-16 00:00:00
2011-03-17 00:00:00
2011-03-17 00:00:00
2011-03-18 00:00:00
2011-03-18 00:00:00
2011-03-21 00:00:00
2011-03-21 00:00:00
2011-03-22 00:00:00
2011-03-22 00:00:00
2011-03-23 00:00:00
2011-03-23 00:00:00
2011-03-24 00:00:00
2011-03-24 00:00:00
2011-03-25 00:00:00
2011-03-25 00:00:00
2011-03-28 00:00:00
2011-03-28 00:00:00
2011-03-29 00:00:00
2011-03-29 00:00:00
2011-03-30 00:00:00
2011-03-30 00:00:00
2011-03-31 00:00:00


2011-12-19 00:00:00
2011-12-20 00:00:00
2011-12-20 00:00:00
2011-12-21 00:00:00
2011-12-21 00:00:00
2011-12-22 00:00:00
2011-12-22 00:00:00
2011-12-23 00:00:00
2011-12-23 00:00:00
2011-12-27 00:00:00
2011-12-27 00:00:00
2011-12-28 00:00:00
2011-12-28 00:00:00
2011-12-29 00:00:00
2011-12-29 00:00:00
2011-12-30 00:00:00
2011-12-30 00:00:00
2012-01-03 00:00:00
2012-01-03 00:00:00
2012-01-04 00:00:00
2012-01-04 00:00:00
2012-01-05 00:00:00
2012-01-05 00:00:00
2012-01-06 00:00:00
2012-01-06 00:00:00
2012-01-09 00:00:00
2012-01-09 00:00:00
2012-01-10 00:00:00
2012-01-10 00:00:00
2012-01-11 00:00:00
2012-01-11 00:00:00
2012-01-12 00:00:00
2012-01-12 00:00:00
2012-01-13 00:00:00
2012-01-13 00:00:00
2012-01-17 00:00:00
2012-01-17 00:00:00
2012-01-18 00:00:00
2012-01-18 00:00:00
2012-01-19 00:00:00
2012-01-19 00:00:00
2012-01-20 00:00:00
2012-01-20 00:00:00
2012-01-23 00:00:00
2012-01-23 00:00:00
2012-01-24 00:00:00
2012-01-24 00:00:00
2012-01-25 00:00:00
2012-01-25 00:00:00
2012-01-26 00:00:00


2012-11-19 00:00:00
2012-11-20 00:00:00
2012-11-20 00:00:00
2012-11-21 00:00:00
2012-11-21 00:00:00
2012-11-23 00:00:00
2012-11-23 00:00:00
2012-11-26 00:00:00
2012-11-26 00:00:00
2012-11-27 00:00:00
2012-11-27 00:00:00
2012-11-28 00:00:00
2012-11-28 00:00:00
2012-11-29 00:00:00
2012-11-29 00:00:00
2012-11-30 00:00:00
2012-11-30 00:00:00
2012-12-03 00:00:00
2012-12-03 00:00:00
2012-12-04 00:00:00
2012-12-04 00:00:00
2012-12-05 00:00:00
2012-12-05 00:00:00
2012-12-06 00:00:00
2012-12-06 00:00:00
2012-12-07 00:00:00
2012-12-07 00:00:00
2012-12-10 00:00:00
2012-12-10 00:00:00
2012-12-11 00:00:00
2012-12-11 00:00:00
2012-12-12 00:00:00
2012-12-12 00:00:00
2012-12-13 00:00:00
2012-12-13 00:00:00
2012-12-14 00:00:00
2012-12-14 00:00:00
2012-12-17 00:00:00
2012-12-17 00:00:00
2012-12-18 00:00:00
2012-12-18 00:00:00
2012-12-19 00:00:00
2012-12-19 00:00:00
2012-12-20 00:00:00
2012-12-20 00:00:00
2012-12-21 00:00:00
2012-12-21 00:00:00
2012-12-24 00:00:00
2012-12-24 00:00:00
2012-12-26 00:00:00


2013-10-21 00:00:00
2013-10-22 00:00:00
2013-10-22 00:00:00
2013-10-23 00:00:00
2013-10-23 00:00:00
2013-10-24 00:00:00
2013-10-24 00:00:00
2013-10-25 00:00:00
2013-10-25 00:00:00
2013-10-28 00:00:00
2013-10-28 00:00:00
2013-10-29 00:00:00
2013-10-29 00:00:00
2013-10-30 00:00:00
2013-10-30 00:00:00
2013-10-31 00:00:00
2013-10-31 00:00:00
2013-11-01 00:00:00
2013-11-01 00:00:00
2013-11-04 00:00:00
2013-11-04 00:00:00
2013-11-05 00:00:00
2013-11-05 00:00:00
2013-11-06 00:00:00
2013-11-06 00:00:00
2013-11-07 00:00:00
2013-11-07 00:00:00
2013-11-08 00:00:00
2013-11-08 00:00:00
2013-11-11 00:00:00
2013-11-11 00:00:00
2013-11-12 00:00:00
2013-11-12 00:00:00
2013-11-13 00:00:00
2013-11-13 00:00:00
2013-11-14 00:00:00
2013-11-14 00:00:00
2013-11-15 00:00:00
2013-11-15 00:00:00
2013-11-18 00:00:00
2013-11-18 00:00:00
2013-11-19 00:00:00
2013-11-19 00:00:00
2013-11-20 00:00:00
2013-11-20 00:00:00
2013-11-21 00:00:00
2013-11-21 00:00:00
2013-11-22 00:00:00
2013-11-22 00:00:00
2013-11-25 00:00:00


2014-09-02 00:00:00
2014-09-03 00:00:00
2014-09-03 00:00:00
2014-09-04 00:00:00
2014-09-04 00:00:00
2014-09-05 00:00:00
2014-09-05 00:00:00
2014-09-08 00:00:00
2014-09-08 00:00:00
2014-09-09 00:00:00
2014-09-09 00:00:00
2014-09-10 00:00:00
2014-09-10 00:00:00
2014-09-11 00:00:00
2014-09-11 00:00:00
2014-09-12 00:00:00
2014-09-12 00:00:00
2014-09-15 00:00:00
2014-09-15 00:00:00
2014-09-16 00:00:00
2014-09-16 00:00:00
2014-09-17 00:00:00
2014-09-17 00:00:00
2014-09-18 00:00:00
2014-09-18 00:00:00
2014-09-19 00:00:00
2014-09-19 00:00:00
2014-09-22 00:00:00
2014-09-22 00:00:00
2014-09-23 00:00:00
2014-09-23 00:00:00
2014-09-24 00:00:00
2014-09-24 00:00:00
2014-09-25 00:00:00
2014-09-25 00:00:00
2014-09-26 00:00:00
2014-09-26 00:00:00
2014-09-29 00:00:00
2014-09-29 00:00:00
2014-09-30 00:00:00
2014-09-30 00:00:00
2014-10-01 00:00:00
2014-10-01 00:00:00
2014-10-02 00:00:00
2014-10-02 00:00:00
2014-10-03 00:00:00
2014-10-03 00:00:00
2014-10-06 00:00:00
2014-10-06 00:00:00
2014-10-07 00:00:00


2015-08-04 00:00:00
2015-08-05 00:00:00
2015-08-05 00:00:00
2015-08-06 00:00:00
2015-08-06 00:00:00
2015-08-07 00:00:00
2015-08-07 00:00:00
2015-08-10 00:00:00
2015-08-10 00:00:00
2015-08-11 00:00:00
2015-08-11 00:00:00
2015-08-12 00:00:00
2015-08-12 00:00:00
2015-08-13 00:00:00
2015-08-13 00:00:00
2015-08-14 00:00:00
2015-08-14 00:00:00
2015-08-17 00:00:00
2015-08-17 00:00:00
2015-08-18 00:00:00
2015-08-18 00:00:00
2015-08-19 00:00:00
2015-08-19 00:00:00
2015-08-20 00:00:00
2015-08-20 00:00:00
2015-08-21 00:00:00
2015-08-21 00:00:00
2015-08-24 00:00:00
2015-08-24 00:00:00
2015-08-25 00:00:00
2015-08-25 00:00:00
2015-08-26 00:00:00
2015-08-26 00:00:00
2015-08-27 00:00:00
2015-08-27 00:00:00
2015-08-28 00:00:00
2015-08-28 00:00:00
2015-08-31 00:00:00
2015-08-31 00:00:00
2015-09-01 00:00:00
2015-09-01 00:00:00
2015-09-02 00:00:00
2015-09-02 00:00:00
2015-09-03 00:00:00
2015-09-03 00:00:00
2015-09-04 00:00:00
2015-09-04 00:00:00
2015-09-08 00:00:00
2015-09-08 00:00:00
2015-09-09 00:00:00


2016-07-01 00:00:00
2016-07-05 00:00:00
2016-07-05 00:00:00
2016-07-06 00:00:00
2016-07-06 00:00:00
2016-07-07 00:00:00
2016-07-07 00:00:00
2016-07-08 00:00:00
2016-07-08 00:00:00
2016-07-11 00:00:00
2016-07-11 00:00:00
2016-07-12 00:00:00
2016-07-12 00:00:00
2016-07-13 00:00:00
2016-07-13 00:00:00
2016-07-14 00:00:00
2016-07-14 00:00:00
2016-07-15 00:00:00
2016-07-15 00:00:00
2016-07-18 00:00:00
2016-07-18 00:00:00
2016-07-19 00:00:00
2016-07-19 00:00:00
2016-07-20 00:00:00
2016-07-20 00:00:00
2016-07-21 00:00:00
2016-07-21 00:00:00
2016-07-22 00:00:00
2016-07-22 00:00:00
2016-07-25 00:00:00
2016-07-25 00:00:00
2016-07-26 00:00:00
2016-07-26 00:00:00
2016-07-27 00:00:00
2016-07-27 00:00:00
2016-07-28 00:00:00
2016-07-28 00:00:00
2016-07-29 00:00:00
2016-07-29 00:00:00
2016-08-01 00:00:00
2016-08-01 00:00:00
2016-08-02 00:00:00
2016-08-02 00:00:00
2016-08-03 00:00:00
2016-08-03 00:00:00
2016-08-04 00:00:00
2016-08-04 00:00:00
2016-08-05 00:00:00
2016-08-05 00:00:00
2016-08-08 00:00:00


2017-04-27 00:00:00
2017-04-28 00:00:00
2017-04-28 00:00:00
2017-05-01 00:00:00
2017-05-01 00:00:00
2017-05-02 00:00:00
2017-05-02 00:00:00
2017-05-03 00:00:00
2017-05-03 00:00:00
2017-05-04 00:00:00
2017-05-04 00:00:00
2017-05-05 00:00:00
2017-05-05 00:00:00
2017-05-08 00:00:00
2017-05-08 00:00:00
2017-05-09 00:00:00
2017-05-09 00:00:00
2017-05-10 00:00:00
2017-05-10 00:00:00
2017-05-11 00:00:00
2017-05-11 00:00:00
2017-05-12 00:00:00
2017-05-12 00:00:00
2017-05-15 00:00:00
2017-05-15 00:00:00
2017-05-16 00:00:00
2017-05-16 00:00:00
2017-05-17 00:00:00
2017-05-17 00:00:00
2017-05-18 00:00:00
2017-05-18 00:00:00
2017-05-19 00:00:00
2017-05-19 00:00:00
2017-05-22 00:00:00
2017-05-22 00:00:00
2017-05-23 00:00:00
2017-05-23 00:00:00
2017-05-24 00:00:00
2017-05-24 00:00:00
2017-05-25 00:00:00
2017-05-25 00:00:00
2017-05-26 00:00:00
2017-05-26 00:00:00
2017-05-30 00:00:00
2017-05-30 00:00:00
2017-05-31 00:00:00
2017-05-31 00:00:00
2017-06-01 00:00:00
2017-06-01 00:00:00
2017-06-02 00:00:00


2018-03-15 00:00:00
2018-03-16 00:00:00
2018-03-16 00:00:00
2018-03-19 00:00:00
2018-03-19 00:00:00
2018-03-20 00:00:00
2018-03-20 00:00:00
2018-03-21 00:00:00
2018-03-21 00:00:00
2018-03-22 00:00:00
2018-03-22 00:00:00
2018-03-23 00:00:00
2018-03-23 00:00:00
2018-03-26 00:00:00
2018-03-26 00:00:00
2018-03-27 00:00:00
2018-03-27 00:00:00
2018-03-28 00:00:00
2018-03-28 00:00:00
2018-03-29 00:00:00
2018-03-29 00:00:00
2018-04-02 00:00:00
2018-04-02 00:00:00
2018-04-03 00:00:00
2018-04-03 00:00:00
2018-04-04 00:00:00
2018-04-04 00:00:00
2018-04-05 00:00:00
2018-04-05 00:00:00
2018-04-06 00:00:00
2018-04-06 00:00:00
2018-04-09 00:00:00
2018-04-09 00:00:00
2018-04-10 00:00:00
2018-04-10 00:00:00
2018-04-11 00:00:00
2018-04-11 00:00:00
2018-04-12 00:00:00
2018-04-12 00:00:00
2018-04-13 00:00:00
2018-04-13 00:00:00
2018-04-16 00:00:00
2018-04-16 00:00:00
2018-04-17 00:00:00
2018-04-17 00:00:00
2018-04-18 00:00:00
2018-04-18 00:00:00
2018-04-19 00:00:00
2018-04-19 00:00:00
2018-04-20 00:00:00


2019-01-31 00:00:00
2019-01-31 00:00:00
2019-02-01 00:00:00
2019-02-01 00:00:00
2019-02-04 00:00:00
2019-02-04 00:00:00
2019-02-05 00:00:00
2019-02-05 00:00:00
2019-02-06 00:00:00
2019-02-06 00:00:00
2019-02-07 00:00:00
2019-02-07 00:00:00
2019-02-08 00:00:00
2019-02-08 00:00:00
2019-02-11 00:00:00
2019-02-11 00:00:00
2019-02-12 00:00:00
2019-02-12 00:00:00
2019-02-13 00:00:00
2019-02-13 00:00:00
2019-02-14 00:00:00
2019-02-14 00:00:00
2019-02-15 00:00:00
2019-02-15 00:00:00
2019-02-19 00:00:00
2019-02-19 00:00:00
2019-02-20 00:00:00
2019-02-20 00:00:00
2019-02-21 00:00:00
2019-02-21 00:00:00
2019-02-22 00:00:00
2019-02-22 00:00:00
2019-02-25 00:00:00
2019-02-25 00:00:00
2019-02-26 00:00:00
2019-02-26 00:00:00
2019-02-27 00:00:00
2019-02-27 00:00:00
2019-02-28 00:00:00
2019-02-28 00:00:00
2019-03-01 00:00:00
2019-03-01 00:00:00
2019-03-04 00:00:00
2019-03-04 00:00:00
2019-03-05 00:00:00
2019-03-05 00:00:00
2019-03-06 00:00:00
2019-03-06 00:00:00
2019-03-07 00:00:00
2019-03-07 00:00:00


2019-12-10 00:00:00
2019-12-11 00:00:00
2019-12-11 00:00:00
2019-12-12 00:00:00
2019-12-12 00:00:00
2019-12-13 00:00:00
2019-12-13 00:00:00
2019-12-16 00:00:00
2019-12-16 00:00:00
2019-12-17 00:00:00
2019-12-17 00:00:00
2019-12-18 00:00:00
2019-12-18 00:00:00
2019-12-19 00:00:00
2019-12-19 00:00:00
2019-12-20 00:00:00
2019-12-20 00:00:00
2019-12-23 00:00:00
2019-12-23 00:00:00
2019-12-24 00:00:00
2019-12-24 00:00:00
2019-12-26 00:00:00
2019-12-26 00:00:00
2019-12-27 00:00:00
2019-12-27 00:00:00
2019-12-30 00:00:00
2019-12-30 00:00:00
2019-12-31 00:00:00
2019-12-31 00:00:00
2020-01-02 00:00:00
2020-01-02 00:00:00
2020-01-03 00:00:00
2020-01-03 00:00:00
2020-01-06 00:00:00
2020-01-06 00:00:00
2020-01-07 00:00:00
2020-01-07 00:00:00
2020-01-08 00:00:00
2020-01-08 00:00:00
2020-01-09 00:00:00
2020-01-09 00:00:00
2020-01-10 00:00:00
2020-01-10 00:00:00
2020-01-13 00:00:00
2020-01-13 00:00:00
2020-01-14 00:00:00
2020-01-14 00:00:00
2020-01-15 00:00:00
2020-01-15 00:00:00
2020-01-16 00:00:00


2020-10-12 00:00:00
2020-10-12 00:00:00
2020-10-13 00:00:00
2020-10-13 00:00:00
2020-10-14 00:00:00
2020-10-14 00:00:00
2020-10-15 00:00:00
2020-10-15 00:00:00
2020-10-16 00:00:00
2020-10-16 00:00:00
2020-10-19 00:00:00
2020-10-19 00:00:00
2020-10-20 00:00:00
2020-10-20 00:00:00
2020-10-21 00:00:00
2020-10-21 00:00:00
2020-10-22 00:00:00
2020-10-22 00:00:00
2020-10-23 00:00:00
2020-10-23 00:00:00
2020-10-26 00:00:00
2020-10-26 00:00:00
2020-10-27 00:00:00
2020-10-27 00:00:00
2020-10-28 00:00:00
2020-10-28 00:00:00
2020-10-29 00:00:00
2020-10-29 00:00:00
2020-10-30 00:00:00
2020-10-30 00:00:00
2020-11-02 00:00:00
2020-11-02 00:00:00
2020-11-03 00:00:00
2020-11-03 00:00:00
2020-11-04 00:00:00
2020-11-04 00:00:00
2020-11-05 00:00:00
2020-11-05 00:00:00
2020-11-06 00:00:00
2020-11-06 00:00:00
2020-11-09 00:00:00
2020-11-09 00:00:00
2020-11-10 00:00:00
2020-11-10 00:00:00
2020-11-11 00:00:00
2020-11-11 00:00:00
2020-11-12 00:00:00
2020-11-12 00:00:00
2020-11-13 00:00:00
2020-11-13 00:00:00


Unnamed: 0,DateTime,Open,High,Low,Close,Total_Short_EMA,Total_Long_EMA,Higher_High,Lower_Low,Close_Above_High,Close_Below_Low,OHLC_Position_20,OHLC_Position_40,Prev_Week_High_Taken,Prev_Week_Low_Taken,Close_Above_Prev_Week_High,Close_Below_Prev_Week_Low,ADX_4,ADX_7,ADX_14,RSI_up,RSI_down,above_EMA50,touching_EMA50,above_EMA200,touching_EMA200,above_EMA8,touching_EMA8,touching_middle,touching_upper,touching_lower,normalized_value,bullish_candle_pattern,bearish_candle_pattern,ema3_gt_ema9_gt_ema18,ema3_lt_ema9_lt_ema18,ema2_gt_ema5_gt_ema10,ema2_lt_ema5_lt_ema10,lq_close_gt_ph,lq_close_lt_ph,lq_close_gt_ph_low,lq_close_lt_ph_low,minimum_point,max_point,current_point,normalized_slope,Target_goingUp
2905,2020-12-03,3671.0,3682.0,3655.25,3664.5,9,8,1,0,0,0,0.945567,0.969776,True,False,True,False,44.502807,43.34168,29.28595,1,0,1,0,1,0,1,0,0,0,0,0.87448,0,0,1,0,1,0,False,True,False,False,1.164637,1.3,1.3,0.054486,1.0
2906,2020-12-04,3668.25,3700.0,3665.5,3698.0,9,8,1,0,1,0,0.929856,0.964079,True,False,True,False,50.176581,44.95627,29.997061,1,0,1,0,1,0,1,0,0,0,0,0.980626,0,0,1,0,1,0,False,False,False,False,1.294809,1.3,1.3,0.06296,1.0
2907,2020-12-07,3694.75,3705.0,3672.25,3690.75,9,8,1,0,0,0,0.927897,0.970182,True,False,False,False,55.273336,46.633738,30.729597,1,0,1,0,1,0,1,0,0,0,0,0.911428,0,0,1,0,1,0,False,False,False,False,1.3,1.3,1.3,0.071399,0.0
2908,2020-12-08,3682.75,3708.0,3664.25,3702.0,9,8,1,1,0,0,0.906948,0.96118,True,False,True,False,51.438298,46.32616,31.130565,1,0,1,0,1,0,1,1,0,0,0,0.914756,1,0,1,0,1,0,False,False,False,False,1.041125,1.3,1.3,0.085404,0.0
2909,2020-12-09,3707.75,3714.75,3659.5,3672.5,8,8,1,1,0,0,0.870508,0.946656,True,False,False,False,51.085953,46.627876,31.615211,0,0,1,0,1,0,1,1,0,0,0,0.747814,0,1,1,0,1,0,False,False,False,False,1.3,1.3,1.3,0.093828,0.0
2910,2020-12-10,3661.25,3673.25,3636.0,3660.75,2,8,0,1,0,0,0.717782,0.883742,True,False,False,False,42.245357,42.125432,31.230205,0,1,1,0,1,0,0,1,0,0,0,0.669085,0,0,1,0,0,0,False,False,False,False,0.916617,1.3,0.916617,0.103187,0.0
2911,2020-12-11,3662.75,3667.0,3620.75,3653.5,0,8,0,1,0,0,0.675985,0.869832,True,False,False,False,41.407786,36.33798,30.368469,0,1,1,0,1,0,0,1,1,0,0,0.612264,0,0,1,0,0,0,False,False,False,False,0.814144,1.3,0.814144,0.105353,1.0
2912,2020-12-14,3670.0,3691.5,3637.5,3640.0,0,8,1,0,0,0,0.681159,0.887698,False,False,False,False,34.202646,34.023269,30.077672,0,1,1,0,1,0,0,1,0,0,0,0.519412,0,0,0,0,0,0,False,False,False,False,0.558344,1.21263,0.558344,0.113541,0.0
2913,2020-12-15,3647.0,3688.5,3636.25,3687.0,9,8,0,1,0,0,0.709783,0.897779,False,False,False,False,28.106533,31.823338,29.764128,1,0,1,0,1,0,1,1,1,0,0,0.771022,0,0,1,0,1,0,False,False,False,False,0.558344,1.179174,0.890961,0.118159,1.0
2914,2020-12-16,3689.75,3704.5,3680.0,3693.75,9,8,1,0,1,0,0.868116,0.953548,False,False,False,False,30.199992,31.776167,29.80394,1,0,1,0,1,0,1,0,0,0,0,0.779631,0,0,1,0,1,0,True,False,False,False,0.53532,1.167759,0.890183,0.119897,1.0


In [67]:
ES.to_csv('ES_1D_20240310_2.csv', index=True)

In [48]:
ES['Target_goingUp'] = ES['Higher_High'] * (1- ES['Lower_Low']) 
ES['Target_goingUp'] = ES['Target_goingUp'].shift(-1)

In [49]:
ES.tail(8)

Unnamed: 0,DateTime,Open,High,Low,Close,Total_Short_EMA,Total_Long_EMA,Higher_High,Lower_Low,Close_Above_High,Close_Below_Low,OHLC_Average,OHLC_Position_20,OHLC_Position_40,Prev_Week_High_Taken,Prev_Week_Low_Taken,Close_Above_Prev_Week_High,Close_Below_Prev_Week_Low,ADX_4,ADX_7,ADX_14,RSI_up,RSI_down,above_EMA50,touching_EMA50,above_EMA200,touching_EMA200,above_EMA8,touching_EMA8,touching_middle,touching_upper,touching_lower,normalized_value,bullish_candle_pattern,bearish_candle_pattern,ema3_gt_ema9_gt_ema18,ema3_lt_ema9_lt_ema18,ema2_gt_ema5_gt_ema10,ema2_lt_ema5_lt_ema10,lq_close_gt_ph,lq_close_lt_ph,lq_close_gt_ph_low,lq_close_lt_ph_low,minimum_point,max_point,current_point,normalized_slope,Target_goingUp
391,2019-12-19,3199.5,3213.75,3194.5,3211.75,9,8,1,0,1,0,3204.875,0.938475,0.958721,True,False,True,False,60.092199,36.3275,24.856363,1,0,1,0,1,0,1,0,0,0,0,0.97603,0,0,1,0,1,0,True,False,False,False,0.132685,1.3,1.3,0.081847,1.0
392,2019-12-20,3213.5,3229.5,3208.5,3225.75,9,8,1,0,1,0,3219.3125,0.936328,0.95561,True,False,True,False,66.433739,40.047935,26.08713,1,0,1,0,1,0,1,0,0,1,0,0.99623,0,0,1,0,1,0,False,False,False,False,1.23454,1.3,1.3,0.084507,1.0
393,2019-12-23,3227.0,3234.25,3224.25,3227.25,9,8,1,0,0,0,3228.1875,0.963202,0.971671,True,False,False,False,71.663045,43.596055,27.358917,1,0,1,0,1,0,1,0,0,0,0,0.94188,0,0,1,0,1,0,False,False,False,False,1.213101,1.3,1.3,0.088069,0.0
394,2019-12-24,3228.25,3231.25,3222.5,3225.75,9,8,0,1,0,0,3226.9375,0.955615,0.965829,True,False,False,False,72.770464,45.979714,28.414314,1,0,1,0,1,0,1,0,0,0,0,0.881331,0,1,1,0,1,0,False,False,False,False,1.3,1.3,1.3,0.092119,1.0
395,2019-12-26,3227.5,3244.75,3227.0,3244.5,9,8,1,0,1,0,3235.9375,0.949715,0.960746,True,False,True,False,75.88409,49.162208,29.775127,1,0,1,0,1,0,1,0,0,0,0,0.929842,0,0,1,0,1,0,True,False,False,False,1.3,1.3,1.3,0.095327,1.0
396,2019-12-27,3242.0,3254.0,3235.5,3237.5,9,8,1,0,0,0,3242.25,0.936314,0.949733,True,False,True,False,79.174871,52.517277,31.277603,1,0,1,0,1,0,1,0,0,0,0,0.848481,0,0,1,0,1,0,False,False,False,False,1.007237,1.3,1.3,0.09957,0.0
397,2019-12-30,3238.25,3244.25,3217.25,3223.5,8,8,0,1,0,1,3230.8125,0.874322,0.895079,False,True,False,False,62.50423,49.436948,31.389391,0,0,1,0,1,0,1,1,0,0,0,0.747971,0,0,1,0,1,0,False,False,False,True,0.919477,1.3,0.919477,0.101275,0.0
398,2019-12-31,3223.25,3236.25,3213.0,3231.0,9,8,0,1,0,0,3225.875,0.847561,0.852749,False,True,False,False,46.896276,45.644811,31.217909,0,0,1,0,1,0,1,1,0,0,0,0.761346,0,0,1,0,1,0,False,False,False,False,0.662573,1.3,0.820018,0.104498,


In [None]:
'''
Inputs:
- List of candlesticks (each candlestick has Open, Close, High, Low)
- Sensitivity (a user-defined threshold for detecting momentum shifts)
- Mitigation Type ("Close" or "Wick" to determine how OBs are confirmed or invalidated)

Function IsOrderBlock(candlesticks, sensitivity, mitigationType):
    Initialize rateOfChangeList = []

    // Calculate Rate of Change (ROC) for each candlestick compared to 4 candles ago
    For each candlestick in candlesticks (starting from the 5th candle):
        roc = (candlestick.Open - candlestick[4].Open) / candlestick[4].Open * 100
        rateOfChangeList.append(roc)

    // Identify potential OB based on ROC crossing the sensitivity threshold
    For i from 0 to length of rateOfChangeList:
        // Bearish OB Detection
        If rateOfChangeList[i] crosses under -sensitivity:
            // Check if there's a bullish candle in the next few candles
            for j from i+1 to i+12 (or less if not enough candles):
                If candlesticks[j].Close > candlesticks[j].Open:
                    // Found a bullish candle after a bearish momentum, potential bearish OB
                    If mitigationType == "Close":
                        If candlesticks[j+1].Close or candlesticks[j+2].Close > candlesticks[j].High:
                            return False // OB invalidated by price action
                    Else (If mitigationType == "Wick"):
                        // Similar logic for wick-based mitigation
                    return True // Confirmed bearish OB

        // Bullish OB Detection
        Else If rateOfChangeList[i] crosses over sensitivity:
            // Check if there's a bearish candle in the next few candles
            for j from i+1 to i+12 (or less if not enough candles):
                If candlesticks[j].Close < candlesticks[j].Open:
                    // Found a bearish candle after a bullish momentum, potential bullish OB
                    If mitigationType == "Close":
                        If candlesticks[j+1].Close or candlesticks[j+2].Close < candlesticks[j].Low:
                            return False // OB invalidated by price action
                    Else (If mitigationType == "Wick"):
                        // Similar logic for wick-based mitigation
                    return True // Confirmed bullish OB

    return False // No OB detected based on the given criteria

'''

In [None]:
'''
import pandas as pd

def calculate_roc(open_prices):
    """Calculate the Rate of Change (ROC) based on open prices."""
    roc = ((open_prices - open_prices.shift(4)) / open_prices.shift(4)) * 100
    return roc

def identify_order_blocks(df, sensitivity):
    """Identify order blocks in a DataFrame containing candlestick data."""
    # Calculate Rate of Change (ROC) for the Open prices
    df['ROC'] = calculate_roc(df['Open'])
    
    # Identifying potential Order Blocks
    ob_blocks = []
    
    for i in range(4, len(df)):
        # Bearish Order Block Detection
        if df.iloc[i]['ROC'] < -sensitivity:
            for j in range(i+1, min(i+12, len(df))):
                if df.iloc[j]['Close'] > df.iloc[j]['Open']:  # Bullish candle after a bearish momentum
                    ob_blocks.append({'DateTime': df.iloc[j]['DateTime'], 'Type': 'Bearish'})
                    break
                    
        # Bullish Order Block Detection
        elif df.iloc[i]['ROC'] > sensitivity:
            for j in range(i+1, min(i+12, len(df))):
                if df.iloc[j]['Close'] < df.iloc[j]['Open']:  # Bearish candle after a bullish momentum
                    ob_blocks.append({'DateTime': df.iloc[j]['DateTime'], 'Type': 'Bullish'})
                    break
    
    return pd.DataFrame(ob_blocks)

# Example usage
# Assuming 'df' is your DataFrame containing the ES chart data
# df = pd.read_csv('your_data.csv')  # Load your data into a DataFrame

# sensitivity = 2.5  # Define your sensitivity for ROC, adjust based on your analysis
# order_blocks = identify_order_blocks(df, sensitivity)

# print(order_blocks)

'''
