In [None]:
import os
import sys

import pandas as pd

from datetime import datetime, timedelta
from pathlib import Path
import torch as th
import numpy as np
import random
# Add the project root to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)
from stable_baselines3 import PPO, A2C, SAC, TD3
from stable_baselines3.common.vec_env import VecNormalize, DummyVecEnv
from trading.environments.forex_env2_flat_simple import ForexTradingEnv
# from trading.environments.forex_env2_flat_simple import ForexTradingEnv2 as ForexTradingEnv
# from trading.environments.forex_env_flat_multi_pair import MultipairForexTradingEnv

from stable_baselines3.common.callbacks import EvalCallback, BaseCallback
from stable_baselines3.common.monitor import Monitor
from data_management.dataset_manager import DatasetManager
from sb3_contrib import RecurrentPPO
from stable_baselines3.common.vec_env import SubprocVecEnv
from stable_baselines3.common.evaluation import evaluate_policy


th.set_num_threads(3)
N_ENVS = 3  # Number of parallel environments
EVAL_FREUQENCY = 500_000
EVAL_FREQ_ADJUSTED = int(EVAL_FREUQENCY / N_ENVS)

hourly_dir = "/Volumes/ssd_fat2/ai6_trading_bot/datasets/1h/unbiased/not_norm/train2/"
source_path = '/Volumes/ssd_fat2/ai6_trading_bot/datasets/5min/df_with_all_indics_unbiased/not_norm/train2/'
source_dfs = [os.path.join(hourly_dir, f) for f in os.listdir(hourly_dir) if f.endswith('.parquet') and not f.startswith('.') and 'validate' not in f]

eval_path = '/Volumes/ssd_fat2/ai6_trading_bot/datasets/1h/unbiased/not_norm/train2/EUR_GBP_validate.parquet'
sequence = 5
saving_path = f'/Volumes/ssd_fat2/ai6_trading_bot/datasets/1h/unbiased/not_norm/train2/results/'
os.makedirs(saving_path, exist_ok=True)

def set_all_seeds(seed):
    np.random.seed(seed)
    random.seed(seed)
    th.manual_seed(seed)
    th.backends.cudnn.deterministic = True

set_all_seeds(42)

class ForexTensorboardCallback(BaseCallback):
    """Custom callback for logging Forex trading metrics to tensorboard."""
    
    def __init__(self, verbose=0):
        super().__init__(verbose)
        self.episode_returns = []  # Track episode returns for averaging
        
    def _on_step(self) -> bool:
        """Called after each step in the environment."""
        # infos is a list of dictionaries, one from each parallel environment
        for info in self.locals['infos']:
            if info is None:  # Skip if no info (can happen at episode boundaries)
                continue
                
            # Log account metrics
            self.logger.record("metrics/balance", info['balance'])
            # self.logger.record("metrics/total_return_pct", info['total_return_pct'])
            # self.logger.record("metrics/net_profit", info['net_profit'])
            
            # Log trade metrics
            # self.logger.record("metrics/total_pnl", info['total_pnl'])
            # self.logger.record("metrics/total_trades", info['total_trades'])
            # self.logger.record("metrics/win_rate", info['win_rate'])
            
            # Log cost metrics
            self.logger.record("metrics/transaction_costs", info['transaction_costs'])
            # self.logger.record("metrics/transaction_costs_pct", info['transaction_costs_pct'])
            
            # Log position metrics
            self.logger.record("metrics/position_size_pct", info['position_size_pct'])
            
        return True
    
    def _on_rollout_end(self) -> None:
        """Called at the end of a rollout."""
        # Episode metrics are handled automatically by stable-baselines3
        pass

class DetailedEvalCallback(EvalCallback):
    def _on_step(self) -> bool:
        """
        Performs evaluation with detailed metric logging throughout the evaluation episodes.
        """
        if self.eval_freq > 0 and self.n_calls % self.eval_freq == 0:
            # Store episode rewards for calculating mean
            episode_rewards = []
            episode_lengths = []
            
            # For each evaluation episode
            for _ in range(self.n_eval_episodes):
                episode_reward = 0
                episode_length = 0
                done = False
                # VecEnv reset returns just the obs
                obs = self.eval_env.reset()
                
                # Run episode until done
                while not done:
                    # Get deterministic action
                    action, _ = self.model.predict(obs, deterministic=True)
                    # VecEnv step returns (obs, reward, done, info)
                    obs, reward, done, info = self.eval_env.step(action)
                    episode_reward += reward[0]  # reward is a numpy array
                    episode_length += 1
                    
                    # Log metrics at each step
                    if info[0] is not None:  # info is a list of dicts
                        info = info[0]  # Get info dict from first env
                        self.logger.record("eval/balance", info.get('balance', 0))
                        self.logger.record("eval/total_pnl", info.get('total_pnl', 0))
                        # self.logger.record("eval/total_trades", info.get('total_trades', 0))
                        # self.logger.record("eval/win_rate", info.get('win_rate', 0))
                        self.logger.record("eval/transaction_costs", info.get('transaction_costs', 0))
                        # Dump metrics at each step
                        self.logger.dump(self.n_calls)
                
                episode_rewards.append(episode_reward)
                episode_lengths.append(episode_length)

            # Calculate mean metrics across episodes
            mean_reward = np.mean(episode_rewards)
            mean_length = np.mean(episode_lengths)
            
            self.logger.record("eval/mean_reward", mean_reward)
            self.logger.record("eval/mean_episode_length", mean_length)

            # Update best model if needed
            if self.best_model_save_path is not None:
                if self.verbose >= 1:
                    print(f"Evaluating the current model: {mean_reward:.2f}")
                
                if mean_reward > self.best_mean_reward:
                    if self.verbose >= 1:
                        print(f"New best mean reward: {mean_reward:.2f} "
                              f"(previous: {self.best_mean_reward:.2f})")
                    self.best_mean_reward = mean_reward
                    self.model.save(self.best_model_save_path)

        return True

    def _get_eval_info(self):
        """Helper method to get the last info dict from eval environment."""
        try:
            # Try to get info directly from environment
            if hasattr(self.eval_env, 'get_info'):
                return self.eval_env.get_info()
            # If that's not available, try to get it from the unwrapped env
            elif hasattr(self.eval_env, 'envs'):
                return self.eval_env.envs[0].get_info()
            return None
        except Exception as e:
            print(f"Warning: Could not get eval info: {e}")
            return None


def make_train_env(rank):
    def _init():
        env = ForexTradingEnv(
            df_paths=source_dfs,
            eval_mode=False,
            sequence_length=sequence,
        )
        env = Monitor(env)
        return env
    return _init


train_env = SubprocVecEnv([make_train_env(i) for i in range(N_ENVS)])
train_env = VecNormalize(train_env, norm_obs=True, norm_reward=True)



def make_eval_env():
    env = ForexTradingEnv(
        df_paths=source_dfs,
        eval_path=eval_path,
        eval_mode=True,
        pair='EUR_GBP',
        sequence_length=sequence,


    )
    env = Monitor(env)
    env = DummyVecEnv([lambda: env])
    env = VecNormalize(env, norm_obs=True, norm_reward=False)
    env.training = False
    return env


eval_env = make_eval_env()

eval_callback = DetailedEvalCallback(
    eval_env,
    best_model_save_path=f'{saving_path}eval_best_model_new_reward/',
    log_path=saving_path,
    eval_freq=EVAL_FREQ_ADJUSTED,
    n_eval_episodes=5,
    deterministic=True,
    render=False
)

# eval_callback = EvalCallback(
#     eval_env,
#     best_model_save_path=saving_path,
#     log_path=saving_path,
#     eval_freq=EVAL_FREQ_ADJUSTED,  # Adjust as needed
#     n_eval_episodes=5,
#     deterministic=True,
#     render=False
# )

# model = PPO(
#     'MlpPolicy',
#     train_env,
#     verbose=0,
#     tensorboard_log=f'{saving_path}sequence_{sequence}__PPO_1h_no_costs_50k_balance_reduced_LSTM',
# )
# Define policy kwargs for the LSTM configuration
# policy_kwargs = dict(
#     # Network Architecture
#     net_arch=dict(
#         # Actor (policy) network
#         pi=[256, 128],  # Larger first layer to process high-dimensional input
#         # Critic (value) network
#         vf=[256, 128]   # Match actor architecture for balanced learning
#     ),
    
#     # LSTM Configuration
#     lstm_hidden_size=256,      # Larger hidden size to capture complex patterns
#     n_lstm_layers=2,           # Multiple layers for hierarchical feature learning
#     enable_critic_lstm=True,   # Share temporal understanding between actor and critic
    
#     # LSTM specific parameters
#     lstm_kwargs=dict(
#         dropout=0.2            # Slightly higher dropout for regularization
#     )
# )

policy_kwargs_complex = dict(
    net_arch=dict(
        pi=[512, 256, 128],
        vf=[512, 256, 128]
    ),
    lstm_hidden_size=512,
    n_lstm_layers=3,
    enable_critic_lstm=True,
    lstm_kwargs=dict(
        dropout=0.25
    )
)

policy_kwargs_memory_efficient = dict(
    net_arch=dict(
        pi=[256, 128],
        vf=[256, 128]
    ),
    lstm_hidden_size=256,
    n_lstm_layers=1,
    lstm_kwargs=dict(
        dropout=0.1
    )
)

model = RecurrentPPO(
    'MlpLstmPolicy',
    train_env,
    verbose=0,
    seed=42,
    tensorboard_log=f'{saving_path}sequence_{sequence}__PPO_1h_no_costs_50k_balance_reduced_LSTM2/',
    policy_kwargs=policy_kwargs_memory_efficient,
)
callbacks = [
    ForexTensorboardCallback(),
    eval_callback
]

model.learn(
    total_timesteps=10_000_000,  # Adjust as needed
    callback=callbacks
)

model.save(f'{saving_path}{sequence}_best_model_core.zip')
train_env.save(f'{saving_path}{sequence}_vec_normalize_core.pkl')


In [1]:
import os
import sys
import pandas as pd
import numpy as np
import pytz
from typing import List, Optional
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Add the project root to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)
import plotly.graph_objects as go
from plotly.subplots import make_subplots


In [None]:
import pandas as pd
import numpy as np
from typing import List, Optional
import logging

def convert_5min_to_hourly(source_path: str, output_path: str) -> pd.DataFrame:
    """
    Convert 5-minute data with daily indicators to hourly data while preserving indicator values.
    
    Args:
        source_path: Path to source 5-minute parquet file
        output_path: Path to save the hourly data
        
    Returns:
        DataFrame with hourly data
    """
    # Read 5-minute data
    df_5min = pd.read_parquet(source_path)
    
    # Resample OHLC data to hourly
    df_hourly = df_5min.resample('1H').agg({
        'open': 'first',
        'high': 'max',
        'low': 'min',
        'close': 'last'
    })
    
    # For daily indicators, we can take the last value of each hour
    # since they only change once per day anyway
    indicator_columns = [col for col in df_5min.columns 
                        if col not in ['open', 'high', 'low', 'close']]
    
    for col in indicator_columns:
        df_hourly[col] = df_5min[col].resample('1H').last()
    
    # Remove any NaN rows
    df_hourly = df_hourly.dropna()
    
    # Save to parquet
    df_hourly.to_parquet(output_path)
    
    return df_hourly

def process_currency_pairs(currency_pairs: List[str], 
                         source_dir: str,
                         output_dir: str) -> None:
    """
    Process multiple currency pairs from 5min to hourly data.
    
    Args:
        currency_pairs: List of currency pairs to process
        source_dir: Directory containing 5min data files
        output_dir: Directory to save hourly data files
    """
    for pair in currency_pairs:
        try:
            source_path = f"{source_dir}/{pair}_5min_1D_not_norm_10dec.parquet"
            output_path = f"{output_dir}/{pair}_1h_1D_not_norm_unbiased.parquet"
            
            print(f"Processing {pair}...")
            df_hourly = convert_5min_to_hourly(source_path, output_path)
            print(f"Completed {pair}. Shape: {df_hourly.shape}")
            
        except Exception as e:
            print(f"Error processing {pair}: {str(e)}")
            continue

# Example usage:
if __name__ == "__main__":
    source_dir = "/Volumes/ssd_fat2/ai6_trading_bot/datasets/5min/df_with_all_indics_unbiased/not_norm"
    output_dir = "/Volumes/ssd_fat2/ai6_trading_bot/datasets/1h/unbiased/not_norm"
    
    currency_pairs = [
        'XAU_USD', 'XAG_USD', 
    ]
    
    process_currency_pairs(currency_pairs, source_dir, output_dir)

In [None]:
output_dir = "/Volumes/ssd_fat2/ai6_trading_bot/datasets/1h/unbiased/not_norm/EUR_USD_1h_1D_not_norm_unbiased.parquet"
df = pd.read_parquet(output_dir)
df

In [None]:

import os
import sys
import pandas as pd
import numpy as np
import pytz
from typing import List, Optional
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Add the project root to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

from unbiased_data import process_currency_pairs, prepare_unbiased_dataset_row_by_row

from data_management.indicator_manager import IndicatorManager
from data_management.preprocessor import DataPreprocessor

indicator_manager = IndicatorManager()
processor = DataPreprocessor()

import logging


# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('dataset_prep.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger('dataset_prep')




# currencies_1 = [
#             'GBP_CHF', 'GBP_JPY', 'EUR_CHF', 
 
#         ]
currencies_2 = [

            'EUR_CAD', 'EUR_USD', 'GBP_USD', 
    
        ]
currencies_3 = [

            'AUD_USD', 'CHF_JPY', 
 
        ]
# currencies_4 = [

#             'NZD_JPY', 'XAU_USD', 'XAG_USD', 
#         ]
currencies_5 = [

            'USD_CHF', 'USD_JPY', 'AUD_JPY', 
        ]
# currencies_6 = [

#             'EUR_JPY', 'EUR_GBP', 'NZD_USD',
#         ]

eur_only = ['EUR_USD']

for ccy in currencies_2:

    logger.info(f'Starting processing for {ccy} at {pd.Timestamp.now()}')
    df = pd.read_parquet(f'/Volumes/ssd_fat2/ai6_trading_bot/datasets/1min/{ccy}.parquet')
    # df = df.head(3_000_000)
    
    df_with_indicators = prepare_unbiased_dataset_row_by_row(
                df=df,
                indicator_manager=indicator_manager,
                indicator_timeframe='D',
                verbose=True
            )
    df_with_indicators = df_with_indicators.dropna()
    
    output_path_not_norm = f'/Volumes/ssd_fat2/ai6_trading_bot/datasets/1h/unbiased/not_norm/{ccy}_1h_1D_not_norm_unbiased.parquet'
    df_with_indicators.to_parquet(output_path_not_norm)
    
    # df_norm = processor.normalize_simple(df=df_with_indicators)
    
    # output_path = f'./{ccy}_5min_1D_norm_unbiased_dll_indics_09dec.parquet'
    # df_norm.to_parquet(output_path)
    
    logger.info(f'Finished processing for {ccy} at {pd.Timestamp.now()}')
 

In [None]:

import os
import sys
import pandas as pd
import numpy as np
import pytz
from typing import List, Optional
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Add the project root to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)
from visualization.chart_manager import ChartManager
chart_manager = ChartManager()


df = pd.read_parquet('EUR_USD_5min_1D_all_indic_not_norm_unbiased.parquet')
# print("Contains inf:", df.isin([float('inf'), float('-inf')]).any().any())
# print("Contains NaN:", df.isna().any().any())
start_time = pd.Timestamp('2022-09-27 08:00').tz_localize('UTC')
end_time = pd.Timestamp('2024-11-27 09:00').tz_localize('UTC')

# chart_manager.chart(df, start_time, end_time)
# df


import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(rows=2, cols=1, shared_xaxes=True, 
                    vertical_spacing=0.1, 
                    subplot_titles=('Ichimoku Cloud', 'MACD'))

# Add traces for first subplot
fig.add_trace(go.Scatter(x=df.index, y=df['close'], name='Close', line=dict(color='blue')), row=1, col=1)
fig.add_trace(go.Scatter(x=df.index, y=df['senkou_span_a'], name='Senkou Span A', line=dict(color='green')), row=1, col=1)
fig.add_trace(go.Scatter(x=df.index, y=df['senkou_span_b'], name='Senkou Span B', line=dict(color='red')), row=1, col=1)
fig.add_trace(go.Scatter(x=df.index, y=df['tenkan_sen'], name='Tenkan Sen', line=dict(color='orange')), row=1, col=1)
fig.add_trace(go.Scatter(x=df.index, y=df['kijun_sen'], name='Kijun Sen', line=dict(color='purple')), row=1, col=1)

# Add traces for second subplot
fig.add_trace(go.Scatter(x=df.index, y=df['macd_signal'], name='MACD Signal', line=dict(color='orange')), row=2, col=1)
fig.add_trace(go.Scatter(x=df.index, y=df['macd'], name='MACD', line=dict(color='blue')), row=2, col=1)
fig.add_trace(go.Bar(x=df.index, y=df['macd_hist'], name='MACD Histogram', marker_color='gray'), row=2, col=1)

# Update layout
fig.update_layout(height=800, width=1200, showlegend=True)

# Show the figure
fig.show()


In [None]:
df = pd.read_parquet('/Volumes/ssd_fat2/ai6_trading_bot/datasets/5min/df_with_all_indics_unbiased/not_norm/GBP_JPY_5min_1D_not_norm_unbiased_dll_indics_09dec.parquet')
df.columns

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# Define start and end dates for the chart
start_date = '2023-01-01'
end_date = '2023-12-31'

# Read and filter data
df = pd.read_parquet('/Volumes/ssd_fat2/ai6_trading_bot/datasets/5min/df_with_all_indics_unbiased/not_norm/AUD_USD_5min_1D_not_norm_unbiased_dll_indics_09dec.parquet')
df_filtered = df.loc[start_date:end_date]

# Create the figure and subplots
# figsize=(12, 8) creates a figure 12 inches wide and 8 inches tall
fig, (ax1, ax2, ax3, ax4, ax5,ax6,ax7,ax8,ax9,ax10,ax11,ax12) = plt.subplots(12, 1, figsize=(12, 40), height_ratios=[2, 1,1,1,1,1,1,1,1,1,1,1])

# Plot Ichimoku Cloud on the first subplot
ax1.plot(df_filtered.index, df_filtered['close'], label='Close', color='blue')
ax1.plot(df_filtered.index, df_filtered['senkou_span_a'], label='Senkou Span A', color='green')
ax1.plot(df_filtered.index, df_filtered['senkou_span_b'], label='Senkou Span B', color='red')
ax1.plot(df_filtered.index, df_filtered['tenkan_sen'], label='Tenkan Sen', color='orange')
ax1.plot(df_filtered.index, df_filtered['kijun_sen'], label='Kijun Sen', color='purple')



# Plot MACD on the second subplot
ax2.plot(df_filtered.index, df_filtered['macd'], label='MACD', color='blue')
ax2.plot(df_filtered.index, df_filtered['macd_signal'], label='Signal', color='orange')
ax2.bar(df_filtered.index, df_filtered['macd_hist'], label='Histogram', color='gray', alpha=0.3)

# Plot roc_10 
ax3.plot(df_filtered.index, df_filtered['roc_10'], label='roc_10', color='blue')

# Plot stoch_rsi 
ax4.plot(df_filtered.index, df_filtered['stoch_rsi'], label='stoch_rsi', color='blue')

# Plot stoch 
ax5.plot(df_filtered.index, df_filtered['stoch_k'], label='stoch_k', color='blue',linewidth=0.3)
ax5.plot(df_filtered.index, df_filtered['stoch_d'], label='stoch_d', color='green',linewidth=0.3)

# Plot bollinger 
ax6.plot(df_filtered.index, df_filtered['bb_upper'], label='bb_upper', color='blue', linewidth=0.3)
ax6.plot(df_filtered.index, df_filtered['bb_middle'], label='bb_middle', color='green',linewidth=0.3)
ax6.plot(df_filtered.index, df_filtered['bb_lower'], label='bb_lower', color='red',linewidth=0.3)
ax6.plot(df_filtered.index, df_filtered['close'], label='close', color='black', linewidth=0.5)

# Plot bb_bandwidth 
ax7.plot(df_filtered.index, df_filtered['bb_bandwidth'], label='bb_bandwidth', color='black')

# Plot bb_percent 
ax8.plot(df_filtered.index, df_filtered['bb_percent'], label='bb_percent', color='black')

# Plot atr 
ax9.plot(df_filtered.index, df_filtered['atr'], label='atr', color='black')

# Plot DMI 
ax10.plot(df_filtered.index, df_filtered['plus_di'], label='plus_di', color='green',linewidth=0.3)
ax10.plot(df_filtered.index, df_filtered['minus_di'], label='minus_di', color='red',linewidth=0.3)



# Plot adx 
ax11.plot(df_filtered.index, df_filtered['adx'], label='adx', color='black')
# Plot rsi 
ax12.plot(df_filtered.index, df_filtered['rsi'], label='rsi', color='black')

# Customize the appearance
ax1.set_title('Ichimoku Cloud')
ax2.set_title('MACD')
ax3.set_title('roc_10')
ax4.set_title('stoch_rsi')
ax5.set_title('stoch')
ax6.set_title('bollinger')
ax7.set_title('bb_bandwidth')
ax8.set_title('bb_percent')
ax9.set_title('atr')
ax10.set_title('dmi')
ax11.set_title('adx')
ax12.set_title('rsi')

# Add legends
ax1.legend(loc='upper left', bbox_to_anchor=(1, 1))
ax2.legend(loc='upper left', bbox_to_anchor=(1, 1))
ax3.legend(loc='upper left', bbox_to_anchor=(1, 1))

# Rotate x-axis labels for better readability
plt.xticks(rotation=45)

# Adjust the layout to prevent overlapping
plt.tight_layout()

# Show the plot
plt.show()

In [None]:


from unbiased_data import process_currency_pairs, prepare_unbiased_dataset_row_by_row

from data_management.indicator_manager import IndicatorManager
from data_management.preprocessor import DataPreprocessor

indicator_manager = IndicatorManager()
processor = DataPreprocessor()

import logging


# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('dataset_prep.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger('dataset_prep')




currencies_1 = [
            'GBP_CHF', 'GBP_JPY', 'EUR_CHF', 
 
        ]
currencies_2 = [

            'EUR_CAD', 'EUR_USD', 'GBP_USD', 
    
        ]
currencies_3 = [

            'USD_CAD', 'AUD_USD', 'CHF_JPY', 
 
        ]
currencies_4 = [

            'NZD_JPY', 'XAU_USD', 'XAG_USD', 
        ]
currencies_5 = [

            'USD_CHF', 'USD_JPY', 'AUD_JPY', 
        ]
currencies_6 = [

            'EUR_JPY', 'EUR_GBP', 'NZD_USD',
        ]

eur_only = ['EUR_USD']

for ccy in eur_only:

    logger.info(f'Starting processing for {ccy} at {pd.Timestamp.now()}')
    df = pd.read_parquet(f'/Users/floriankockler/Library/CloudStorage/OneDrive-kockler/usb_stick_6dec/1min_source/{ccy}.parquet')
    # df = df.head(3_000_000)
    
    df_with_indicators = prepare_unbiased_dataset_row_by_row(
                df=df,
                indicator_manager=indicator_manager,
                indicator_timeframe='D',
                verbose=True
            )
    df_with_indicators = df_with_indicators.dropna()
    
    output_path_not_norm = f'./{ccy}_5min_1D_indic_not_norm_unbiased_full.parquet'
    df_with_indicators.to_parquet(output_path_not_norm)
    
    df_norm = processor.normalize_simple(df=df_with_indicators)
    
    output_path = f'./{ccy}_5min_1D_norm_unbiased_full.parquet'
    df_norm.to_parquet(output_path)
    
    logger.info(f'Finished processing for {ccy} at {pd.Timestamp.now()}')
 

In [None]:
train_set = f'/Users/floriankockler/Code/GitHub.nosync/ai6-gcp-bot/forex_trading_system/notebooks/EUR_USD_5min_1H_norm_unbiased.parquet'
df = pd.read_parquet(train_set)
# df.isna().any()
# np.isinf(df).any()

df['bb_percent'] = df['bb_percent'].replace([np.inf, -np.inf], [1, 0])
df.to_parquet("/Users/floriankockler/Code/GitHub.nosync/ai6-gcp-bot/forex_trading_system/notebooks/EUR_USD_5min_1H_norm_unbiased1.parquet")

In [None]:
not_norm = pd.read_parquet(f'/Volumes/ssd_fat2/ai6_trading_bot/datasets/1min/{ccy}.parquet')
not_norm


In [None]:
norm = pd.read_parquet(f'./{ccy}_5min_1H_norm_unbiased.parquet')
norm

In [None]:
# import pandas as pd
# df = pd.read_parquet(f'/Volumes/ssd_fat2/ai6_trading_bot/datasets/1min/EUR_USD.parquet')
# df_test = df.head(5000)
# df_test.to_parquet(f'/Volumes/ssd_fat2/ai6_trading_bot/datasets/1min/EUR_USD_test.parquet')