In [4]:
import sys
import os
import pandas as pd
import pandas as pd

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '../src')))
from utils.statistical_analysis import correlation_analysis_resample
from utils.statistical_analysis import filter_by_session_nyc
from utils.holiday_utils import extract_weekday
notebook_path = os.path.abspath(os.path.join(os.getcwd(), '../notebooks'))
output_file_path = rf'{notebook_path}\output\correlation_analysis'
output_session_filepath = rf'{notebook_path}\data\processed\session'
input_file_path = rf'{notebook_path}\data\processed'

In [16]:
"""
    Filters the DataFrame by the given trading session or sub-session using NYC time as a base.
   
    Trading session/sub-session times (NYC time):
    - asian: 19:00 (prev day) - 04:00
    - asian_morning: 19:00 (prev day) - 01:00
    - london: 03:00 - 12:00
    - london_morning: 03:00 - 07:00
    - london_afternoon: 07:00 - 12:00
    - ny: 08:00 - 17:00
    - ny_morning: 08:00 - 12:00
    - ny_evening: 12:00 - 17:00
   
    Parameters:
        df (DataFrame): The input time series DataFrame (with NYC timezone-aware timestamps).
        session (str): The trading session or sub-session ('asian', 'london_morning', etc.).
        output_file (str): Path to save the filtered DataFrame as a CSV file. If None, no file is saved.

    Returns:
        DataFrame: Filtered DataFrame for the session.
"""
%debug
year = "2022"
ccy = "eurusd"
session = "ny_morning"
base_price = "prev"

file_path = rf'{input_file_path}/{year}_{ccy}_tick_{base_price}_price_as_base.csv'
output_filepath = rf'{output_session_filepath}/{year}_{ccy}_{session}_tick_{base_price}_price_as_base.csv'

# Load the dataset
df = pd.read_csv(file_path)

filtered_df = filter_by_session_nyc(df, session, output_filepath)

ERROR:root:No traceback has been produced, nothing to debug.


Filtering session: ny_morning
Filtered rows for ny_morning:
                           tick_number   price  actual_openprice  ticks_moved  \
datetime                                                                        
2022-01-03 08:29:00-05:00            9  1.1359           1.13602            1   
2022-01-03 08:43:00-05:00           10  1.1349           1.13480            1   
2022-01-03 09:15:00-05:00           11  1.1339           1.13384            1   
2022-01-03 09:21:00-05:00           12  1.1329           1.13289            1   
2022-01-03 09:50:00-05:00           13  1.1319           1.13171            1   
...                                ...     ...               ...          ...   
2022-12-30 09:55:00-05:00         8284  1.0689           1.06918            1   
2022-12-30 10:06:00-05:00         8285  1.0679           1.06744            1   
2022-12-30 10:47:00-05:00         8286  1.0669           1.06660            1   
2022-12-30 11:06:00-05:00         8287  1.0679   

In [None]:
%debug
year = "2023"
ccy = "eurgbp"
sessions = ["ny", "ny_morning", "ny_evening", "london", "london_morning", "london_afternoon", "asian", "asian_morning"]
base_price = "prev"

for session in sessions:
    file_path = rf'{input_file_path}/{year}_{ccy}_tick_{base_price}_price_as_base.csv'
    output_filepath = rf'{output_session_filepath}/{year}_{ccy}_{session}_tick_{base_price}_price_as_base.csv'

    # Load the dataset
    df = pd.read_csv(file_path)

    filtered_df = filter_by_session_nyc(df, session, output_filepath)

In [2]:
year = "2023"
ccy = "eurgbp"
use_first_price_as_base=False # True: use first price as base, False: use previous price as base
weekdays = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"]
base_price = "prev"

for day_of_week in weekdays:
    if (use_first_price_as_base):
        input_filename = f'{input_file_path}/{year}_{ccy.lower()}_tick_first_price_as_base.csv'
        output_filename = f'{output_session_filepath}/{year}_{ccy.lower()}_tick_first_price_as_base_{day_of_week}.csv'
    else:
        input_filename = f'{input_file_path}/{year}_{ccy.lower()}_tick_prev_price_as_base.csv'
        output_filename = f'{output_session_filepath}/{year}_{ccy.lower()}_tick_prev_price_as_base_{day_of_week}.csv'
    extract_weekday(input_filename, output_filename, day_of_week)

Rows with 'Monday' in the 'day_of_week' column have been extracted.
Filtered data saved to: c:\Users\mmori\Documents\fx_strategy_project\notebooks\data\processed\session/2023_eurgbp_tick_prev_price_as_base_Monday.csv
Rows with 'Tuesday' in the 'day_of_week' column have been extracted.
Filtered data saved to: c:\Users\mmori\Documents\fx_strategy_project\notebooks\data\processed\session/2023_eurgbp_tick_prev_price_as_base_Tuesday.csv
Rows with 'Wednesday' in the 'day_of_week' column have been extracted.
Filtered data saved to: c:\Users\mmori\Documents\fx_strategy_project\notebooks\data\processed\session/2023_eurgbp_tick_prev_price_as_base_Wednesday.csv
Rows with 'Thursday' in the 'day_of_week' column have been extracted.
Filtered data saved to: c:\Users\mmori\Documents\fx_strategy_project\notebooks\data\processed\session/2023_eurgbp_tick_prev_price_as_base_Thursday.csv
Rows with 'Friday' in the 'day_of_week' column have been extracted.
Filtered data saved to: c:\Users\mmori\Documents\fx_

In [3]:
year = "2023"
target_ccy = "usdjpy"
feature_ccy = "spxusd"

target_filepath = rf'{notebook_path}\data\ASCII\M1\{target_ccy.lower()}\{year}\DAT_ASCII_{target_ccy.upper()}_M1_{year}.csv'
feature_filepath = rf'{notebook_path}\data\ASCII\M1\{feature_ccy.lower()}\{year}\DAT_ASCII_{feature_ccy.upper()}_M1_{year}.csv'
correlation_analysis_resample(target_filepath, feature_filepath, target_ccy, feature_ccy, year, output_file_path)

          datetime      open      high       low     close  volume
0  20230102 180000  3872.998  3877.176  3863.860  3865.983       0
1  20230102 180100  3866.128  3867.372  3865.378  3865.980       0
2  20230102 180200  3865.878  3866.878  3865.360  3865.881       0
3  20230102 180300  3865.742  3865.742  3862.860  3863.613       0
4  20230102 180400  3863.363  3863.363  3860.742  3860.878       0
          datetime     open     high      low    close  volume
0  20230101 170000  130.925  130.925  130.910  130.921       0
1  20230101 170100  130.921  130.962  130.921  130.960       0
2  20230101 170200  130.960  130.960  130.959  130.959       0
3  20230101 170400  130.943  130.943  130.808  130.820       0
4  20230101 170500  130.825  130.825  130.814  130.814       0
             datetime  open_spxusd  high_spxusd  low_spxusd  close_spxusd  \
0 2023-01-02 18:00:00     3872.998     3877.176    3863.860      3865.983   
1 2023-01-02 18:01:00     3866.128     3867.372    3865.378      3

np.float64(-0.007286576437847909)

In [12]:
year = "2023"
target_ccy = "usdjpy"
feature_ccy = "spxusd"
resample_interval = "1H"

target_filepath = rf'C:\Users\mmori\Documents\fx_strategy_project\notebooks\data\raw\ASCII\M1\{target_ccy.lower()}\{year}\DAT_ASCII_{target_ccy.upper()}_M1_{year}.csv'
feature_filepath = rf'C:\Users\mmori\Documents\fx_strategy_project\notebooks\data\raw\ASCII\M1\{feature_ccy.lower()}\{year}\DAT_ASCII_{feature_ccy.upper()}_M1_{year}.csv'
correlation_analysis_resample(target_filepath, feature_filepath, target_ccy, feature_ccy, year, output_file_path, resample_interval)

          datetime      open      high       low     close  volume
0  20230102 180000  3872.998  3877.176  3863.860  3865.983       0
1  20230102 180100  3866.128  3867.372  3865.378  3865.980       0
2  20230102 180200  3865.878  3866.878  3865.360  3865.881       0
3  20230102 180300  3865.742  3865.742  3862.860  3863.613       0
4  20230102 180400  3863.363  3863.363  3860.742  3860.878       0
          datetime     open     high      low    close  volume
0  20230101 170000  130.925  130.925  130.910  130.921       0
1  20230101 170100  130.921  130.962  130.921  130.960       0
2  20230101 170200  130.960  130.960  130.959  130.959       0
3  20230101 170400  130.943  130.943  130.808  130.820       0
4  20230101 170500  130.825  130.825  130.814  130.814       0
             datetime  open_spxusd  high_spxusd  low_spxusd  close_spxusd  \
0 2023-01-02 18:00:00     3872.998     3877.176    3846.110      3848.610   
1 2023-01-02 19:00:00     3848.381     3848.872    3833.360      3

  feature_data = feature_data.resample(resample_interval).agg({
  target_data = target_data.resample(resample_interval).agg({


np.float64(0.033394218261811784)

In [11]:
year = "2023"
target_ccy = "usdjpy"
feature_ccy = "spxusd"
resample_interval = "1D"

target_filepath = rf'C:\Users\mmori\Documents\fx_strategy_project\notebooks\data\raw\ASCII\M1\{target_ccy.lower()}\{year}\DAT_ASCII_{target_ccy.upper()}_M1_{year}.csv'
feature_filepath = rf'C:\Users\mmori\Documents\fx_strategy_project\notebooks\data\raw\ASCII\M1\{feature_ccy.lower()}\{year}\DAT_ASCII_{feature_ccy.upper()}_M1_{year}.csv'
correlation_analysis_resample(target_filepath, feature_filepath, target_ccy, feature_ccy, year, output_file_path, resample_interval)

          datetime      open      high       low     close  volume
0  20230102 180000  3872.998  3877.176  3863.860  3865.983       0
1  20230102 180100  3866.128  3867.372  3865.378  3865.980       0
2  20230102 180200  3865.878  3866.878  3865.360  3865.881       0
3  20230102 180300  3865.742  3865.742  3862.860  3863.613       0
4  20230102 180400  3863.363  3863.363  3860.742  3860.878       0
          datetime     open     high      low    close  volume
0  20230101 170000  130.925  130.925  130.910  130.921       0
1  20230101 170100  130.921  130.962  130.921  130.960       0
2  20230101 170200  130.960  130.960  130.959  130.959       0
3  20230101 170400  130.943  130.943  130.808  130.820       0
4  20230101 170500  130.825  130.825  130.814  130.814       0
    datetime  open_spxusd  high_spxusd  low_spxusd  close_spxusd  \
0 2023-01-02     3872.998     3877.176    3820.110      3841.381   
1 2023-01-03     3841.116     3883.863    3792.915      3831.046   
2 2023-01-04    

np.float64(0.07069216151295275)