In [14]:
import os
import sys
import pandas as pd
import numpy as np
import pytz
from typing import List, Optional
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Add the project root to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

from unbiased_data import process_currency_pairs, prepare_unbiased_dataset_row_by_row
from data_management.indicator_manager import IndicatorManager

# # Example usage:
# currencies_short = ['USD_JPY']
# df = process_currency_pairs(
#     currencies=currencies_short,
#     indicator_timeframe='1h'
# )




In [15]:
source_1min = f'/Volumes/ssd_fat2/ai6_trading_bot/datasets/1min/EUR_USD.parquet'
df = pd.read_parquet(source_1min)
df = df.head(10000)

df

Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2001-01-02 23:01:00+00:00,0.9507,0.9507,0.9507,0.9507,
2001-01-02 23:02:00+00:00,0.9506,0.9506,0.9505,0.9505,
2001-01-02 23:03:00+00:00,0.9505,0.9507,0.9505,0.9506,
2001-01-02 23:04:00+00:00,0.9506,0.9506,0.9506,0.9506,
2001-01-02 23:05:00+00:00,0.9506,0.9506,0.9506,0.9506,
...,...,...,...,...,...
2001-01-12 04:34:00+00:00,0.9534,0.9534,0.9534,0.9534,
2001-01-12 04:35:00+00:00,0.9534,0.9534,0.9534,0.9534,
2001-01-12 04:36:00+00:00,0.9534,0.9534,0.9534,0.9534,
2001-01-12 04:37:00+00:00,0.9534,0.9534,0.9534,0.9534,


In [None]:
from data_management.indicator_manager import IndicatorManager
from data_management.preprocessor import DataPreprocessor
indicator_manager = IndicatorManager()
processor = DataPreprocessor()

import logging


# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('dataset_prep.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger('dataset_prep')



currencies_1 = [
            'GBP_CHF', 'GBP_JPY', 'EUR_CHF', 
 
        ]
currencies_2 = [

            'EUR_CAD', 'EUR_USD', 'GBP_USD', 
    
        ]
currencies_3 = [

            'USD_CAD', 'AUD_USD', 'CHF_JPY', 
 
        ]
currencies_4 = [

            'NZD_JPY', 'XAU_USD', 'XAG_USD', 
        ]
currencies_5 = [

            'USD_CHF', 'USD_JPY', 'AUD_JPY', 
        ]
currencies_6 = [

            'EUR_JPY', 'EUR_GBP', 'NZD_USD',
        ]

for ccy in currencies_1:

    logger.info(f'Starting processing for {ccy} at {pd.Timestamp.now()}')
    df = pd.read_parquet(f'./{ccy}.parquet')
    df_with_indicators = prepare_unbiased_dataset_row_by_row(
                df=df,
                indicator_manager=indicator_manager,
                indicator_timeframe='1h',
                verbose=False
            )
    df_with_indicators = df_with_indicators.dropna()
    
    output_path_not_norm = f'./{ccy}_5min_1H_indic_not_norm_unbiased.parquet'
    df_with_indicators.to_parquet(output_path_not_norm)
    
    df_norm = processor.normalize_simple(df=df_with_indicators)
    
    output_path = f'./{ccy}_5min_1H_norm_unbiased.parquet'
    df_norm.to_parquet(output_path)
    
    logger.info(f'Finished processing for {ccy} at {pd.Timestamp.now()}')
 

In [18]:
df_with_indicators = df_with_indicators.dropna()
df_with_indicators

Unnamed: 0,open,high,low,close,sma_20,sma_50,rsi,macd,macd_signal,macd_hist,...,bb_bandwidth,bb_percent,atr,plus_di,minus_di,adx,senkou_span_a,senkou_span_b,tenkan_sen,kijun_sen
2001-01-08 04:05:00+00:00,0.9566,0.9568,0.9566,0.9568,0.955070,0.949990,60.925632,0.001719,0.001881,-0.000162,...,0.911455,69.873560,0.002093,21.075484,17.537953,20.769060,0.945050,0.94155,0.95725,0.95275
2001-01-08 04:10:00+00:00,0.9566,0.9568,0.9566,0.9568,0.955070,0.949990,60.925632,0.001719,0.001881,-0.000162,...,0.911455,69.873560,0.002093,21.075484,17.537953,20.769060,0.945050,0.94155,0.95725,0.95275
2001-01-08 04:15:00+00:00,0.9566,0.9568,0.9566,0.9568,0.955070,0.949990,60.925632,0.001719,0.001881,-0.000162,...,0.911455,69.873560,0.002093,21.075484,17.537953,20.769060,0.945050,0.94155,0.95725,0.95275
2001-01-08 04:20:00+00:00,0.9566,0.9568,0.9566,0.9567,0.955065,0.949988,60.604954,0.001711,0.001879,-0.000169,...,0.909839,68.815689,0.002093,21.075484,17.537953,20.769060,0.945050,0.94155,0.95725,0.95275
2001-01-08 04:25:00+00:00,0.9566,0.9568,0.9566,0.9568,0.955070,0.949990,60.925632,0.001719,0.001881,-0.000162,...,0.911455,69.873560,0.002093,21.075484,17.537953,20.769060,0.945050,0.94155,0.95725,0.95275
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2001-01-12 04:15:00+00:00,0.9536,0.9539,0.9532,0.9533,0.951250,0.944610,67.060030,0.002408,0.002543,-0.000135,...,0.832174,75.896723,0.001781,32.745897,12.815310,37.169733,0.940475,0.94190,0.95225,0.94700
2001-01-12 04:20:00+00:00,0.9536,0.9539,0.9532,0.9534,0.951255,0.944612,67.570289,0.002416,0.002544,-0.000129,...,0.834395,77.024546,0.001781,32.745897,12.815310,37.169733,0.940475,0.94190,0.95225,0.94700
2001-01-12 04:25:00+00:00,0.9536,0.9539,0.9532,0.9536,0.951265,0.944616,68.614464,0.002432,0.002548,-0.000116,...,0.839120,79.252384,0.001781,32.745897,12.815310,37.169733,0.940475,0.94190,0.95225,0.94700
2001-01-12 04:30:00+00:00,0.9536,0.9539,0.9532,0.9534,0.951255,0.944612,67.570289,0.002416,0.002544,-0.000129,...,0.834395,77.024546,0.001781,32.745897,12.815310,37.169733,0.940475,0.94190,0.95225,0.94700


In [5]:
trial = pd.read_parquet('/Volumes/ssd_fat2/ai6_trading_bot/datasets/5min/best_dataframes_not_norm/EUR_GBP_5T_indics_1H_not_norm.parquet')
trial

Unnamed: 0_level_0,open,high,low,close,sma_20,sma_50,rsi,macd,macd_signal,macd_hist,...,bb_bandwidth,bb_percent,atr,plus_di,minus_di,adx,senkou_span_a,senkou_span_b,tenkan_sen,kijun_sen
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2001-01-08 05:00:00+00:00,0.63570,0.63590,0.63570,0.63580,0.636020,0.633526,54.118817,0.000659,0.000906,-0.000247,...,0.540132,46.506902,0.001218,15.392015,16.179872,26.612034,0.631350,0.62825,0.636450,0.63495
2001-01-08 05:05:00+00:00,0.63580,0.63580,0.63580,0.63580,0.636020,0.633526,54.118817,0.000659,0.000906,-0.000247,...,0.540132,46.506902,0.001218,15.392015,16.179872,26.612034,0.631350,0.62825,0.636450,0.63495
2001-01-08 05:10:00+00:00,0.63570,0.63570,0.63570,0.63570,0.636020,0.633526,54.118817,0.000659,0.000906,-0.000247,...,0.540132,46.506902,0.001218,15.392015,16.179872,26.612034,0.631350,0.62825,0.636450,0.63495
2001-01-08 05:15:00+00:00,0.63570,0.63580,0.63570,0.63570,0.636020,0.633526,54.118817,0.000659,0.000906,-0.000247,...,0.540132,46.506902,0.001218,15.392015,16.179872,26.612034,0.631350,0.62825,0.636450,0.63495
2001-01-08 05:20:00+00:00,0.63580,0.63610,0.63570,0.63610,0.636020,0.633526,54.118817,0.000659,0.000906,-0.000247,...,0.540132,46.506902,0.001218,15.392015,16.179872,26.612034,0.631350,0.62825,0.636450,0.63495
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-11-28 09:55:00+00:00,0.83253,0.83264,0.83244,0.83250,0.833420,0.834114,37.703142,-0.000297,-0.000283,-0.000014,...,0.226750,1.290675,0.000702,11.674206,17.935044,22.939347,0.834437,0.83372,0.833165,0.83399
2024-11-28 10:00:00+00:00,0.83250,0.83263,0.83244,0.83252,0.833336,0.834050,39.720565,-0.000321,-0.000291,-0.000031,...,0.217939,11.649892,0.000676,11.267014,17.309477,22.811170,0.834668,0.83372,0.833165,0.83379
2024-11-28 10:05:00+00:00,0.83254,0.83270,0.83246,0.83266,0.833336,0.834050,39.720565,-0.000321,-0.000291,-0.000031,...,0.217939,11.649892,0.000676,11.267014,17.309477,22.811170,0.834668,0.83372,0.833165,0.83379
2024-11-28 10:10:00+00:00,0.83266,0.83277,0.83251,0.83260,0.833336,0.834050,39.720565,-0.000321,-0.000291,-0.000031,...,0.217939,11.649892,0.000676,11.267014,17.309477,22.811170,0.834668,0.83372,0.833165,0.83379
