In [None]:

import os
import sys
import pandas as pd
import numpy as np
import pytz
from typing import List, Optional
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Add the project root to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

from unbiased_data import process_currency_pairs, prepare_unbiased_dataset_row_by_row

from data_management.indicator_manager import IndicatorManager
from data_management.preprocessor import DataPreprocessor

indicator_manager = IndicatorManager()
processor = DataPreprocessor()

import logging


# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('dataset_prep.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger('dataset_prep')



currencies_1 = [
            'GBP_CHF', 'GBP_JPY', 'EUR_CHF', 
 
        ]
currencies_2 = [

            'EUR_CAD', 'EUR_USD', 'GBP_USD', 
    
        ]
currencies_3 = [

            'USD_CAD', 'AUD_USD', 'CHF_JPY', 
 
        ]
currencies_4 = [

            'NZD_JPY', 'XAU_USD', 'XAG_USD', 
        ]
currencies_5 = [

            'USD_CHF', 'USD_JPY', 'AUD_JPY', 
        ]
currencies_6 = [

            'EUR_JPY', 'EUR_GBP', 'NZD_USD',
        ]

for ccy in currencies_1:

    logger.info(f'Starting processing for {ccy} at {pd.Timestamp.now()}')
    df = pd.read_parquet(f'./{ccy}.parquet')
    df_with_indicators = prepare_unbiased_dataset_row_by_row(
                df=df,
                indicator_manager=indicator_manager,
                indicator_timeframe='1h',
                verbose=False
            )
    df_with_indicators = df_with_indicators.dropna()
    
    output_path_not_norm = f'./{ccy}_5min_1H_indic_not_norm_unbiased.parquet'
    df_with_indicators.to_parquet(output_path_not_norm)
    
    df_norm = processor.normalize_simple(df=df_with_indicators)
    
    output_path = f'./{ccy}_5min_1H_norm_unbiased.parquet'
    df_norm.to_parquet(output_path)
    
    logger.info(f'Finished processing for {ccy} at {pd.Timestamp.now()}')
 