In [4]:
# Import necessary libraries
import os
import pandas as pd
import numpy as np

In [9]:
def test_mom_features(file_path:str):
    """
    Loads the dataset and empirically tests its momentum columsn for look-ahead bias.
    Args: file_path: the path to the CSV file
    """
    print(f"Analysing file: {file_path}")
    try:
        df = pd.read_csv(file_path)
        # Prepare the data
        df['Date'] = pd.to_datetime(df['Date'])
        df = df.sort_values('Date').reset_index(drop=True)
        #Identify momentum columns
        mom_cols = [c for c in ['mom', 'mom1', 'mom2', 'mom3'] if c in df.columns]
        #Empirically test each momentum feature
        report = {}
        for mom_col in mom_cols:
            report[mom_col] = {}
            print(f"\n Testing '{mom_col}':")
            s = pd.to_numeric(df[mom_col], errors='coerce')

            for k in [1,2,3,5,10]: # Test against k-day future returns
                future_returns = df['Price'].shift(-k) / df['Price'] -1.0
                valid_mask = s.notna() & future_returns.notna()
                if valid_mask.sum() < 20:
                    print(f"  Not enough valid data to test {mom_col} against {k}-day future returns.")
                    continue
                mom_series = s[valid_mask]
                future_series = future_returns[valid_mask]
                correlation = np.corrcoef(mom_series, future_series)[0,1]
                report[mom_col][f'corr_with_{k}d_future_return'] = correlation

                print(f"  Correlation with {k}-day future returns: {correlation:.4f}")
        return report
    except FileNotFoundError:
        print(f"File not found: {file_path}")
    except Exception as e:
        print(f"An error occurred while processing {file_path}: {e}")



In [10]:
# Testing
report_nyse = test_mom_features("/home/ubuntu/michael/MSc-Machine-Learning-Project/Datasets/combined_dataframe_NYSE.csv")
print
report_ixic = test_mom_features("/home/ubuntu/michael/MSc-Machine-Learning-Project/Datasets/combined_dataframe_IXIC.csv")


Analysing file: /home/ubuntu/michael/MSc-Machine-Learning-Project/Datasets/combined_dataframe_NYSE.csv

 Testing 'mom':
  Correlation with 1-day future returns: -0.9991
  Correlation with 2-day future returns: -0.6735
  Correlation with 3-day future returns: -0.5941
  Correlation with 5-day future returns: -0.4331
  Correlation with 10-day future returns: -0.3227

 Testing 'mom1':
  Correlation with 1-day future returns: 0.0891
  Correlation with 2-day future returns: -0.6736
  Correlation with 3-day future returns: -0.4901
  Correlation with 5-day future returns: -0.4094
  Correlation with 10-day future returns: -0.2682

 Testing 'mom2':
  Correlation with 1-day future returns: -0.0893
  Correlation with 2-day future returns: -0.0042
  Correlation with 3-day future returns: -0.5941
  Correlation with 5-day future returns: -0.4665
  Correlation with 10-day future returns: -0.3243

 Testing 'mom3':
  Correlation with 1-day future returns: 0.0324
  Correlation with 2-day future returns: 