In [1]:
import numpy as np 
import pandas as pd

In [None]:



def correlate_two_indicators(df, indicator1, indicator2): 
    """ 
    Test correlation between two indicators. 
    
    Parameters: 
        1) df (pd.DataFrame): DataFrame containing stock data with techincal and calendar indicators.
    
    Returns: 
        1) correlation (float): Spearman correlation coefficient between input indicators
    """

    correlation = np.correlate(df[indicator1], df[indicator2], method='spearman')

    return correlation


def test_indicators_to_overnight_delta(df): 
    """
    Test correlation between technical/calendar indicators and overnight delta.

    Parameters:
        1) df (pd.DataFrame): DataFrame containing stock data with technical and calendar
    
    Returns:
        1) results (pd.DataFrame): DataFrame summarizing indicator performance
    """
    df_mod = df.copy()
    df_mod = df_mod.dropna(subset=['abnormal'])
    
    results = []
    
    # Binary indicators
    indicator_cols = [
        'volume_spike', 'rsi_oversold', 'rsi_overbought',
        '52_week_high_threshold', '52_week_low_threshold', 
        'intraday_return_strong_positive', 'intraday_return_strong_negative',
        'is_monday', 'is_tuesday', 'is_wednesday', 'is_thursday', 'is_friday',
        'is_jan', 'is_dec', 'first_5d_month', 'final_5d_month'
    ]
    
    base_rate = df_mod['abnormal'].mean()
    
    # iterate across indicator columns 
    for indicator in indicator_cols:
        # skip if indicator not in input DataFrame
        if indicator not in df_mod.columns:
            continue
        
        # filter DataFrame for rows (days) where indicator is True
        indicator_found = df_mod[df_mod[indicator] == True]

        if len(indicator_found) > 0:
            # calculate conditional probability P(abnormal | indicator)
            abnormal_rate_given_indicator = indicator_found['abnormal'].mean()

            # calculate lift
            lift = abnormal_rate_given_indicator / base_rate if base_rate > 0 else 0
            
            results.append({
                'indicator': indicator,
                'occurrences': len(indicator_found),
                'base_abnormal_rate': base_rate * 100,
                'abnormal_rate_with_indicator': abnormal_rate_given_indicator * 100,
                'lift': lift,
                'signal_strength': 'Strong' if lift > 1.5 else 'Moderate' if lift > 1.2 else 'Weak'
            })
    
    # Continuous indicators comparison
    continuous_indicators = ['close_position', 'volume_ratio', 'rsi', 'atr_pct']
    
    
    for indicator in continuous_indicators:
        if indicator in df_mod.columns:
            abnormal_mean = df_mod[df_mod['abnormal'] == True][indicator].mean()
            normal_mean = df_mod[df_mod['abnormal'] == False][indicator].mean()
            difference = abnormal_mean - normal_mean
            
            results.append({
                'indicator': indicator + ' (continuous)',
                'abnormal_mean': abnormal_mean,
                'normal_mean': normal_mean,
                'difference': difference
            })
    
    return pd.DataFrame(results)


# Modification notes 

1. When rolling functions (min, max, mean, etc.) have less than input days, then use min/max/mean seen value to date. 

In [None]:
500*0.038