In [None]:
import os
import gym
from gym import spaces
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from stable_baselines3 import PPO, DQN
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
import torch as th
import joblib
from scipy.signal import argrelextrema
from torch import nn
from sklearn.preprocessing import MinMaxScaler
from scipy import stats

# Custom Neural Network Architecture
class CustomFeatureExtractor(BaseFeaturesExtractor):
    def __init__(self, observation_space: spaces.Box):
        super(CustomFeatureExtractor, self).__init__(observation_space, features_dim=256)
        n_input_channels = observation_space.shape[0]
        self.net = nn.Sequential(
            nn.Linear(n_input_channels, 128),
            nn.ReLU(),
            nn.Linear(128, 256),
            nn.ReLU(),
        )

    def forward(self, observations: th.Tensor) -> th.Tensor:
        return self.net(observations)

def find_local_extrema(prices, order=5):
    prices = np.array(prices)
    minima_indices = argrelextrema(prices, np.less_equal, order=order)[0]
    maxima_indices = argrelextrema(prices, np.greater_equal, order=order)[0]
    return minima_indices, maxima_indices

def preprocess_data(filepath=None, scaler=None, fit_scaler=False, denoise_method='moving_average', window_size=5, data=None, month_train=None):
    if data is None:
        df = pd.read_csv(
            filepath,
            parse_dates=['timestamp'],
            date_parser=lambda col: pd.to_datetime(col, utc=True)
        )
    else:
        df = data.copy()
    df = df.sort_values('timestamp').reset_index(drop=True)
    df = df.set_index('timestamp')
    
    # Check for column names and select accordingly
    if all(col in df.columns for col in ['open', 'high', 'low', 'close', 'volume']):
        df = df[['open', 'high', 'low', 'close', 'volume']]
        df = df.rename(
            columns={
                'open': 'Open',
                'high': 'High',
                'low': 'Low',
                'close': 'Close',
                'volume': 'Volume'
            }
        )
    elif all(col in df.columns for col in ['Open', 'High', 'Low', 'Close', 'Volume']):
        df = df[['Open', 'High', 'Low', 'Close', 'Volume']]
    else:
        raise KeyError("Dataframe does not have the required columns.")
    
    # Remove rows with zero or negative prices
    df = df[df['Close'] > 0]
    df.dropna(inplace=True)

    # Denoise data
    if denoise_method == 'moving_average':
        df['Close'] = df['Close'].rolling(window=window_size, min_periods=1).mean()
        df['Open'] = df['Open'].rolling(window=window_size, min_periods=1).mean()
        df['High'] = df['High'].rolling(window=window_size, min_periods=1).mean()
        df['Low'] = df['Low'].rolling(window=window_size, min_periods=1).mean()
    elif denoise_method == 'butterworth':
        # Apply Butterworth filter
        from scipy.signal import butter, filtfilt
        def butter_lowpass_filter(data, cutoff, fs, order=5):
            nyq = 0.5 * fs
            normal_cutoff = cutoff / nyq
            b, a = butter(order, normal_cutoff, btype='low', analog=False)
            y = filtfilt(b, a, data)
            return y
        cutoff = 0.1  # Adjust as needed
        fs = 1        # Sampling frequency
        for col in ['Open', 'High', 'Low', 'Close']:
            df[col] = butter_lowpass_filter(df[col], cutoff, fs)
    elif denoise_method == 'wavelet':
        # Apply Wavelet Denoising
        import pywt
        def wavelet_denoise(data):
            coeffs = pywt.wavedec(data, 'db1', level=2)
            coeffs[1:] = [pywt.threshold(i, value=0.1 * max(i)) for i in coeffs[1:]]
            return pywt.waverec(coeffs, 'db1')[:len(data)]
        for col in ['Open', 'High', 'Low', 'Close']:
            df[col] = wavelet_denoise(df[col].values)
    else:
        raise ValueError(f"Unsupported denoise method: {denoise_method}")

    # Keep a copy of the unscaled data for plotting and indicator calculation
    df_unscaled = df.copy().reset_index()

    # Normalize input features (except 'Close' price)
    if scaler is None and fit_scaler:
        scaler = MinMaxScaler()
        scaler.fit(df[['Open', 'High', 'Low', 'Volume']])
        if month_train is not None:
            scaler_filename = f'scaler_{month_train}.save'
        else:
            scaler_filename = 'scaler.save'
        joblib.dump(scaler, scaler_filename)
    elif scaler is None:
        if month_train is not None:
            scaler_filename = f'scaler_{month_train}.save'
        else:
            scaler_filename = 'scaler.save'
        scaler = joblib.load(scaler_filename)

    df[['Open', 'High', 'Low', 'Volume']] = scaler.transform(
        df[['Open', 'High', 'Low', 'Volume']]
    )

    # Create df_full with all columns and reset index to include 'timestamp'
    df_full = df.copy().reset_index()

    # Create df_obs without 'Open', 'High', 'Low', and without 'timestamp'
    df_obs = df[['Close', 'Volume']].copy().reset_index(drop=True)

    # Return unscaled data as well
    return df_full, df_obs.values, df_unscaled, scaler

def detect_price_pattern(prices, window=20):
    returns = np.diff(prices) / prices[:-1]
    rolling_mean = np.convolve(returns, np.ones(window), 'valid') / window

    # Calculate the slope of the trend line
    x = np.arange(len(rolling_mean))
    slope, _, _, _, _ = stats.linregress(x, rolling_mean)

    # Determine the pattern based on the slope and its statistical significance
    t_stat = slope / (np.std(rolling_mean) / np.sqrt(len(rolling_mean)))

    if t_stat > 2:  # Statistically significant positive trend
        return "Uptrend"
    elif t_stat < -2:  # Statistically significant negative trend
        return "Downtrend"
    else:
        return "Sideways"

# Indicator functions
def ema(series, period):
    return series.ewm(span=period, adjust=False).mean()

def rsi(series, period=14):
    delta = series.diff(1)
    gain = delta.where(delta > 0, 0.0)
    loss = -delta.where(delta < 0, 0.0)
    avg_gain = gain.rolling(window=period, min_periods=1).mean()
    avg_loss = loss.rolling(window=period, min_periods=1).mean()
    rs = avg_gain / avg_loss
    return 100 - (100 / (1 + rs))

def macd(series, fastperiod=12, slowperiod=26, signalperiod=9):
    ema_fast = ema(series, fastperiod)
    ema_slow = ema(series, slowperiod)
    macd_line = ema_fast - ema_slow
    signal_line = ema(macd_line, signalperiod)
    return macd_line, signal_line

def calculate_indicators(data):
    data['RSI'] = rsi(data['Close'], period=14)
    data['EMA_Fast'] = ema(data['Close'], period=3)
    data['EMA_Slow'] = ema(data['Close'], period=9)
    data['MACD'], data['MACD_Signal'] = macd(data['Close'])
    envelope_length = 21
    envelope_percent = 0.3 / 100
    data['Envelope_Upper'] = ema(data['Close'], period=envelope_length) * (1 + envelope_percent)
    data['Envelope_Lower'] = ema(data['Close'], period=envelope_length) * (1 - envelope_percent)
    return data

def calculate_scores(data):
    data = data.reset_index(drop=True)
    data['score_ema'] = 0
    data['score_macd'] = 0
    data['score_rsi30'] = 0
    data['score_envelope'] = 0
    data['sell_score_ema'] = 0
    data['sell_score_macd'] = 0
    data['sell_score_rsi70'] = 0
    data['sell_score_envelope'] = 0
    max_buy_score = 400  # Adjusted because there are 4 buy score components
    max_sell_score = 400  # Adjusted because there are 4 sell score components

    for i in range(1, len(data)):
        # Buy scores
        if (data['EMA_Fast'].iloc[i] > data['EMA_Slow'].iloc[i] and 
            data['EMA_Fast'].iloc[i-1] <= data['EMA_Slow'].iloc[i-1]):
            data.at[i, 'score_ema'] = 100
        else:
            data.at[i, 'score_ema'] = max(0, data.iloc[i-1]['score_ema'] - 10)
        
        if (data['MACD'].iloc[i] > data['MACD_Signal'].iloc[i] and 
            data['MACD'].iloc[i] < 0 and 
            data['MACD'].iloc[i-1] <= data['MACD_Signal'].iloc[i-1]):
            data.at[i, 'score_macd'] = 100
        else:
            data.at[i, 'score_macd'] = max(0, data.iloc[i-1]['score_macd'] - 10)
        
        if (data['RSI'].iloc[i] > 30 and 
            data['RSI'].iloc[i-1] <= 30):
            data.at[i, 'score_rsi30'] = 100
        else:
            data.at[i, 'score_rsi30'] = max(0, data.iloc[i-1]['score_rsi30'] - 10)
        
        if (data['EMA_Fast'].iloc[i] > data['Envelope_Lower'].iloc[i] and 
            data['EMA_Fast'].iloc[i-1] <= data['Envelope_Lower'].iloc[i-1]):
            data.at[i, 'score_envelope'] = 100
        else:
            data.at[i, 'score_envelope'] = max(0, data.iloc[i-1]['score_envelope'] - 10)
        
        # Sell scores
        if (data['EMA_Fast'].iloc[i] < data['EMA_Slow'].iloc[i] and 
            data['EMA_Fast'].iloc[i-1] >= data['EMA_Slow'].iloc[i-1]):
            data.at[i, 'sell_score_ema'] = 100
        else:
            data.at[i, 'sell_score_ema'] = max(0, data.iloc[i-1]['sell_score_ema'] - 10)
        
        if (data['MACD'].iloc[i] < data['MACD_Signal'].iloc[i] and 
            data['MACD'].iloc[i] > 0 and 
            data['MACD'].iloc[i-1] >= data['MACD_Signal'].iloc[i-1]):
            data.at[i, 'sell_score_macd'] = 100
        else:
            data.at[i, 'sell_score_macd'] = max(0, data.iloc[i-1]['sell_score_macd'] - 10)
        
        if (data['RSI'].iloc[i] < 70 and 
            data['RSI'].iloc[i-1] >= 70):
            data.at[i, 'sell_score_rsi70'] = 100
        else:
            data.at[i, 'sell_score_rsi70'] = max(0, data.iloc[i-1]['sell_score_rsi70'] - 10)
        
        if (data['EMA_Fast'].iloc[i] < data['Envelope_Upper'].iloc[i] and 
            data['EMA_Fast'].iloc[i-1] >= data['Envelope_Upper'].iloc[i-1]):
            data.at[i, 'sell_score_envelope'] = 100
        else:
            data.at[i, 'sell_score_envelope'] = max(0, data.iloc[i-1]['sell_score_envelope'] - 10)
        
    data['score'] = data['score_ema'] + data['score_macd'] + data['score_rsi30'] + data['score_envelope']
    data['sell_score'] = data['sell_score_ema'] + data['sell_score_macd'] + data['sell_score_rsi70'] + data['sell_score_envelope']
    data['total_percent'] = (data['score'] / max_buy_score) * 100
    data['total_sell_percent'] = (data['sell_score'] / max_sell_score) * 100

    return data

# Trading Environment
class TradingEnv(gym.Env):
    def __init__(self, data_full, data_obs, data_unscaled, scaler, transaction_cost=0.0001, max_stock=100, save_dir='frames',
                 technical_indicators=True, technical_indicators_list=None, window_size=14):
        super(TradingEnv, self).__init__()
        self.data_full = data_full.reset_index(drop=True)  # Scaled data
        self.data_unscaled = data_unscaled.reset_index(drop=True)  # Unscaled data for plotting and indicators
        self.data = data_obs        # For observations (partially scaled)
        self.current_step = 0
        self.balance = 10000
        self.initial_balance = 10000
        self.stock_held = 0
        self.transaction_cost = transaction_cost
        self.max_stock = max_stock
        self.window_size = window_size
        self.save_dir = save_dir
        self.technical_indicators = technical_indicators
        self.technical_indicators_list = technical_indicators_list or []
        self.cost_basis = 0.0
        self.trade_history = []
        self.position_returns = []

        self.action_space = spaces.Discrete(3)

        # Calculate custom indicators and scores on unscaled data
        self.data_unscaled = calculate_indicators(self.data_unscaled)
        self.data_unscaled = calculate_scores(self.data_unscaled)

        # Update technical indicators list with all indicators and scores
        self.technical_indicators_list = ['RSI', 'MACD', 'MACD_Signal', 'score', 'sell_score', 'total_percent', 'total_sell_percent']
        self.tech_ind_columns = self.technical_indicators_list

        if self.technical_indicators:
            # Use the indicators from unscaled data for observations
            self.tech_ind_data = self.data_unscaled[self.tech_ind_columns].fillna(0).values
            num_tech_indicators = self.tech_ind_data.shape[1]
            obs_shape = self.data.shape[1] + num_tech_indicators + 2  # Data columns + tech indicators + balance + stock_held
            # Create a mapping from indicator names to indices
            self.tech_ind_col_indices = {col: idx for idx, col in enumerate(self.tech_ind_columns)}
        else:
            obs_shape = self.data.shape[1] + 2  # Data columns + balance + stock_held

        obs_low = -np.inf * np.ones(obs_shape)
        obs_high = np.inf * np.ones(obs_shape)
        self.observation_space = spaces.Box(low=obs_low, high=obs_high, dtype=np.float32)

        if not os.path.exists(self.save_dir):
            os.makedirs(self.save_dir)

    def _get_observation(self):
        step = min(self.current_step, len(self.data) - 1)
        obs = self.data[step]
        if self.technical_indicators:
            tech_ind_part = self.tech_ind_data[step]
            obs = np.concatenate([
                obs,
                tech_ind_part,
                [self.balance / self.initial_balance, self.stock_held / self.max_stock]
            ]).astype(np.float32)
        else:
            obs = np.concatenate([
                obs,
                [self.balance / self.initial_balance, self.stock_held / self.max_stock]
            ]).astype(np.float32)
        return obs

    def step(self, action):
        current_price = self.data_unscaled.iloc[self.current_step]['Close']  # Use unscaled price
        current_date = self.data_unscaled.iloc[self.current_step]['timestamp']
        reward = 0
        done = False

        # Execute the action
        if action == 2:  # Buy
            max_buyable = int(self.balance / (current_price * (1 + self.transaction_cost)))
            buy_amount = min(max_buyable, self.max_stock - self.stock_held)
            if buy_amount > 0:
                total_cost = buy_amount * current_price * (1 + self.transaction_cost)
                self.balance -= total_cost
                self.stock_held += buy_amount
                self.cost_basis = ((self.cost_basis * (self.stock_held - buy_amount)) + (current_price * buy_amount)) / self.stock_held
                # Record trade
                self.trade_history.append({
                    'step': self.current_step,
                    'type': 'buy',
                    'amount': buy_amount,
                    'price': current_price,
                    'date': current_date
                })
        elif action == 0:  # Sell
            if self.stock_held > 0:
                sell_amount = self.stock_held
                total_sale = sell_amount * current_price * (1 - self.transaction_cost)
                self.balance += total_sale
                position_return = (total_sale - (sell_amount * self.cost_basis)) / (sell_amount * self.cost_basis)
                self.position_returns.append(position_return)
                self.stock_held = 0
                self.cost_basis = 0
                # Record trade
                self.trade_history.append({
                    'step': self.current_step,
                    'type': 'sell',
                    'amount': sell_amount,
                    'price': current_price,
                    'date': current_date
                })

        # Get current scores
        total_percent = self.data_unscaled.iloc[self.current_step]['total_percent']
        total_sell_percent = self.data_unscaled.iloc[self.current_step]['total_sell_percent']

        # Adjust reward based on your criteria
        buy_threshold = 50
        sell_threshold = 50

        if action == 2:  # Buy
            if total_percent >= buy_threshold:
                reward += 1  # Reward for buying when score is high
            else:
                reward -= 1  # Penalty for buying when score is low
        elif action == 0:  # Sell
            if total_sell_percent >= sell_threshold:
                reward += 1  # Reward for selling when sell score is high
            else:
                reward -= 1  # Penalty for selling when sell score is low

        # Update current step
        self.current_step += 1
        if self.current_step >= len(self.data):
            done = True

        # Calculate portfolio value for potential additional reward
        portfolio_value = self.balance + self.stock_held * current_price
        reward += (portfolio_value - self.initial_balance) / self.initial_balance

        obs = self._get_observation()
        info = {}

        return obs, reward, done, info

    def reset(self):
        self.current_step = 0
        self.balance = self.initial_balance
        self.stock_held = 0
        self.cost_basis = 0
        self.trade_history = []
        self.position_returns = []
        obs = self._get_observation()
        return obs

    def render(self):
        pass  # Optional: Implement rendering if needed

def evaluate_model(model, env, num_episodes=1, dataset_name=''):
    total_rewards = []
    cumulative_rewards_list = []
    position_returns = []

    # Collect data for plotting
    prices = []
    actions = []
    dates = []
    positions = []
    balances = []
    stock_held_list = []
    indicators = []

    for episode in range(num_episodes):
        obs = env.reset()
        done = False
        total_reward = 0
        cumulative_rewards = []

        while not done:
            action, _ = model.predict(obs, deterministic=True)
            obs, reward, done, _ = env.step(action)
            total_reward += reward
            cumulative_rewards.append(total_reward)

            # Collect data
            current_step = env.current_step
            if current_step >= len(env.data_unscaled):
                break
            current_price = env.data_unscaled.iloc[current_step]['Close']  # Unscaled price
            current_date = env.data_unscaled.iloc[current_step]['timestamp']
            prices.append(current_price)
            actions.append(action)
            dates.append(current_date)
            positions.append(env.stock_held)
            balances.append(env.balance)
            stock_held_list.append(env.stock_held)

            # If indicators are available
            if env.technical_indicators:
                # Get indicators from unscaled data
                current_indicators = env.data_unscaled.iloc[current_step][env.tech_ind_columns].values
                indicators.append(current_indicators)

        total_rewards.append(total_reward)
        cumulative_rewards_list.append(cumulative_rewards)
        position_returns.extend(env.position_returns)

    avg_reward = np.mean(total_rewards)
    win_rate = len([r for r in position_returns if r > 0]) / len(position_returns) if position_returns else 0
    sharpe_ratio = np.mean(position_returns) / np.std(position_returns) if position_returns else 0

    # Prepare results DataFrame
    results_df = pd.DataFrame({
        'Date': dates,
        'Price': prices,
        'Action': actions,
        'Position': positions,
        'Balance': balances,
        'Stock Held': stock_held_list
    })

    # Add indicators if available
    if env.technical_indicators:
        indicators_df = pd.DataFrame(indicators, columns=env.tech_ind_columns)
        results_df = pd.concat([results_df.reset_index(drop=True), indicators_df.reset_index(drop=True)], axis=1)

    # Calculate Portfolio Value
    results_df['Portfolio Value'] = results_df['Balance'] + results_df['Stock Held'] * results_df['Price']

    # Calculate Returns
    results_df['Returns'] = results_df['Portfolio Value'].pct_change().fillna(0)

    # Calculate Cumulative Returns
    results_df['Cumulative Return'] = (1 + results_df['Returns']).cumprod() - 1

    return avg_reward, win_rate, cumulative_rewards_list, sharpe_ratio, position_returns, results_df

def walk_forward_split(data, train_months=2, val_months=1, test_months=1, step_months=1, start_date=None, end_date=None):
    data = data.copy()
    data['timestamp'] = pd.to_datetime(data['timestamp'], utc=True)
    data = data.sort_values('timestamp')

    # Apply start_date and end_date filters if provided
    if start_date is not None:
        start_datetime = pd.to_datetime(start_date).tz_localize('UTC')
        data = data[data['timestamp'] >= start_datetime]
    if end_date is not None:
        end_datetime = pd.to_datetime(end_date).tz_localize('UTC')
        data = data[data['timestamp'] <= end_datetime]

    if data.empty:
        print("No data available in the specified date range.")
        return []

    min_available_date = data['timestamp'].min()
    max_available_date = data['timestamp'].max()
    
    # Set the start_date and final_end_date based on available data
    start_date = max(min_available_date, data['timestamp'].min())
    final_end_date = min(max_available_date, data['timestamp'].max())

    splits = []
    current_start = start_date
    # max_iterations = 1000
    # iterations = 0

    # while True:
    #     iterations += 1
    #     if iterations > max_iterations:
    #         print("Maximum iterations reached. Breaking the loop to prevent infinite execution.")
    #         break

    train_start = current_start
    train_end = train_start + pd.DateOffset(months=train_months) - pd.DateOffset(seconds=1)
    val_start = train_end + pd.DateOffset(seconds=1)
    val_end = val_start + pd.DateOffset(months=val_months) - pd.DateOffset(seconds=1)
    test_start = val_end + pd.DateOffset(seconds=1)
    test_end = test_start + pd.DateOffset(months=test_months) - pd.DateOffset(seconds=1)

    # if train_start > final_end_date:
    #     print("Train start date exceeds final end date. Exiting loop.")
    #     break

    # Adjust test_end if it exceeds the final available date
    if test_end > final_end_date:
        test_end = final_end_date

    train_data = data[(data['timestamp'] >= train_start) & (data['timestamp'] <= train_end)]
    val_data = data[(data['timestamp'] >= val_start) & (data['timestamp'] <= val_end)]
    test_data = data[(data['timestamp'] >= test_start) & (data['timestamp'] <= test_end)]

    # Ensure that each split has data
    # if train_data.empty or val_data.empty or test_data.empty:
    #     print(f"No data for the period starting at {current_start}. Advancing current_start.")
    #     current_start += pd.DateOffset(months=step_months)
    #     continue

    split_info = {
        'train_data': train_data,
        'val_data': val_data,
        'test_data': test_data,
        'train_start_date': train_start,
        'train_end_date': train_end,
        'val_start_date': val_start,
        'val_end_date': val_end,
        'test_start_date': test_start,
        'test_end_date': test_end
    }

    splits.append(split_info)
    current_start += pd.DateOffset(months=step_months)

    return splits

def infer_new_data(new_data, model_filename, scaler_filename, technical_indicators_list, algo_class):
    # Load the scaler
    scaler = joblib.load(scaler_filename)

    # Preprocess new data
    new_data_full, new_data_obs, new_unscaled_data, _ = preprocess_data(
        data=new_data,
        scaler=scaler,
        fit_scaler=False,
        month_train=None  # No need to save scaler again
    )

    # Create environment
    env = TradingEnv(
        data_full=new_data_full,
        data_obs=new_data_obs,
        data_unscaled=new_unscaled_data,
        scaler=scaler,
        technical_indicators=True,
        technical_indicators_list=technical_indicators_list,
        window_size=14
    )

    # Load the model
    model = algo_class.load(
        model_filename,
        env=env,
        device='cuda',  # Change to 'cpu' if you don't have a GPU
        custom_objects={
            'features_extractor_class': CustomFeatureExtractor
        }
    )

    # Run inference
    obs = env.reset()
    done = False

    # Collect results
    results = {
        'dates': [],
        'prices': [],
        'actions': [],
        'balances': [],
        'positions': [],
        'stock_held': [],
        'indicators': []
    }

    while not done:
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, info = env.step(action)

        # Collect data
        current_step = env.current_step
        if current_step >= len(env.data_unscaled):
            break
        current_price = env.data_unscaled.iloc[current_step]['Close']  # Unscaled price
        current_date = env.data_unscaled.iloc[current_step]['timestamp']
        results['dates'].append(current_date)
        results['prices'].append(current_price)
        results['actions'].append(action)
        results['balances'].append(env.balance)
        results['positions'].append(env.stock_held)
        results['stock_held'].append(env.stock_held)
        if env.technical_indicators:
            current_indicators = env.data_unscaled.iloc[current_step][env.tech_ind_columns].values
            results['indicators'].append(current_indicators)

    # Create a DataFrame with results
    results_df = pd.DataFrame({
        'Date': results['dates'],
        'Price': results['prices'],
        'Action': results['actions'],
        'Position': results['positions'],
        'Balance': results['balances'],
        'Stock Held': results['stock_held']
    })

    if env.technical_indicators:
        indicators_df = pd.DataFrame(results['indicators'], columns=env.tech_ind_columns)
        results_df = pd.concat([results_df.reset_index(drop=True), indicators_df.reset_index(drop=True)], axis=1)

    # Calculate Portfolio Value
    results_df['Portfolio Value'] = results_df['Balance'] + results_df['Stock Held'] * results_df['Price']

    # Return the results DataFrame
    return results_df

if __name__ == '__main__':
    filepath = 'test_06_08.csv'

    # Load full data and fit scaler with the first training set
    full_data, obs_data, unscaled_data, _ = preprocess_data(filepath, fit_scaler=False)

    # Ensure 'timestamp' is in the dataframe
    if 'timestamp' not in full_data.columns:
        full_data['timestamp'] = unscaled_data['timestamp']

    # Adjust your splits accordingly
    # Adjust your splits accordingly
    splits = walk_forward_split(
        unscaled_data,
        train_months=1,
        val_months=1,
        test_months=1,
        step_months=0,
        start_date='2024-06-01',
        end_date='2024-08-30'
    )

    if not splits:
        print("No splits were generated. Please check the date range and data availability.")
    else:
        algorithms = {
            'PPO': PPO,
            'DQN': DQN
        }

        results_list = []

        technical_indicators_list = ['RSI', 'MACD', 'MACD_Signal', 'score', 'sell_score', 'total_percent', 'total_sell_percent']

        for split_index, split_data in enumerate(splits):
            train_data = split_data['train_data']
            val_data = split_data['val_data']
            test_data = split_data['test_data']
            train_start_date = split_data['train_start_date']
            train_end_date = split_data['train_end_date']
            val_start_date = split_data['val_start_date']
            val_end_date = split_data['val_end_date']
            test_start_date = split_data['test_start_date']
            test_end_date = split_data['test_end_date']

            # Get month_train from train_start_date
            month_train = train_start_date.strftime('%Y%m')

            # Detect price patterns
            train_pattern = detect_price_pattern(train_data['Close'].values)
            test_pattern = detect_price_pattern(test_data['Close'].values)

            print(f"\nWalk-Forward Split {split_index + 1}")
            print(f"Train Data Range: {train_start_date.date()} to {train_end_date.date()} - Pattern: {train_pattern}")
            print(f"Validation Data Range: {val_start_date.date()} to {val_end_date.date()}")
            print(f"Test Data Range: {test_start_date.date()} to {test_end_date.date()} - Pattern: {test_pattern}")

            for algo_name, algo_class in algorithms.items():
                print(f"\nTraining {algo_name} on Split {split_index + 1} with Technical Indicators...")

                model = None
                policy_kwargs = dict(
                    features_extractor_class=CustomFeatureExtractor,
                    net_arch=[256, 256],
                    activation_fn=th.nn.ReLU
                )

                # Preprocess training data and save scaler with month_train
                train_data_full, train_data_obs, train_unscaled, scaler = preprocess_data(
                    data=train_data,
                    scaler=None,
                    fit_scaler=True,
                    month_train=month_train
                )

                # Preprocess validation data
                val_data_full, val_data_obs, val_unscaled, _ = preprocess_data(
                    data=val_data,
                    scaler=scaler,
                    fit_scaler=False,
                    month_train=month_train
                )

                env_train = TradingEnv(
                    data_full=train_data_full,
                    data_obs=train_data_obs,
                    data_unscaled=train_unscaled,
                    scaler=scaler,
                    technical_indicators=True,
                    technical_indicators_list=technical_indicators_list,
                    window_size=14
                )

                env_val = TradingEnv(
                    data_full=val_data_full,
                    data_obs=val_data_obs,
                    data_unscaled=val_unscaled,
                    scaler=scaler,
                    technical_indicators=True,
                    technical_indicators_list=technical_indicators_list,
                    window_size=14
                )

                model = algo_class(
                    'MlpPolicy',
                    env_train,
                    policy_kwargs=policy_kwargs,
                    verbose=1,
                    device='cuda'  # Change to 'cpu' if you don't have a GPU
                )

                eval_callback = EvalCallback(
                    env_val,
                    best_model_save_path=f'./logs/{algo_name}_split{split_index + 1}/',
                    log_path=f'./logs/{algo_name}_split{split_index + 1}/',
                    eval_freq=10000,
                    deterministic=True,
                    render=False
                )

                model.learn(total_timesteps=100000, callback=eval_callback)

                best_model_path = f'./logs/{algo_name}_split{split_index + 1}/best_model.zip'
                if os.path.exists(best_model_path):
                    model = algo_class.load(
                        best_model_path,
                        env=env_train,
                        device='cuda',  # Change to 'cpu' if you don't have a GPU
                        custom_objects={
                            'features_extractor_class': CustomFeatureExtractor
                        }
                    )
                    print(f"Loaded best model for {algo_name} from validation.")
                else:
                    print(f"No best model found for {algo_name}; using last trained model.")

                model_filename = f"{algo_name}_Split{split_index + 1}_{month_train}.zip"
                model.save(model_filename)
                print(f"Model saved as {model_filename}")

                avg_reward_train, win_rate_train, cumulative_rewards_train, sharpe_ratio_train, position_returns_train, train_results_df = evaluate_model(model, env_train, dataset_name='Train')
                avg_reward_val, win_rate_val, cumulative_rewards_val, sharpe_ratio_val, position_returns_val, val_results_df = evaluate_model(model, env_val, dataset_name='Validation')

                # Prepare test environment
                test_data_full, test_data_obs, test_unscaled, _ = preprocess_data(
                    data=test_data,
                    scaler=scaler,
                    fit_scaler=False,
                    month_train=month_train
                )

                env_test = TradingEnv(
                    data_full=test_data_full,
                    data_obs=test_data_obs,
                    data_unscaled=test_unscaled,
                    scaler=scaler,
                    technical_indicators=True,
                    technical_indicators_list=technical_indicators_list,
                    window_size=14
                )

                loaded_model = algo_class.load(
                    model_filename,
                    env=env_test,
                    device='cuda',  # Change to 'cpu' if you don't have a GPU
                    custom_objects={
                        'features_extractor_class': CustomFeatureExtractor
                    }
                )
                print(f"Model loaded from {model_filename}")

                avg_reward_test, win_rate_test, cumulative_rewards_test, sharpe_ratio_test, position_returns_test, test_results_df = evaluate_model(loaded_model, env_test, dataset_name='Test')

                # Save results dataframes for interpretation
                train_results_df.to_csv(f'{algo_name}_Split{split_index + 1}_Train_Results.csv', index=False)
                val_results_df.to_csv(f'{algo_name}_Split{split_index + 1}_Validation_Results.csv', index=False)
                test_results_df.to_csv(f'{algo_name}_Split{split_index + 1}_Test_Results.csv', index=False)

                results_list.extend([
                    {
                        'Algorithm': algo_name,
                        'Split': split_index + 1,
                        'Dataset': 'Train',
                        'Start Date': train_start_date.date(),
                        'End Date': train_end_date.date(),
                        'Price Pattern': train_pattern,
                        'Avg Reward': avg_reward_train,
                        'Win Rate': win_rate_train,
                        'Sharpe Ratio': sharpe_ratio_train,
                        'Avg Position Return': np.mean(position_returns_train)
                    },
                    {
                        'Algorithm': algo_name,
                        'Split': split_index + 1,
                        'Dataset': 'Validation',
                        'Start Date': val_start_date.date(),
                        'End Date': val_end_date.date(),
                        'Avg Reward': avg_reward_val,
                        'Win Rate': win_rate_val,
                        'Sharpe Ratio': sharpe_ratio_val,
                        'Avg Position Return': np.mean(position_returns_val)
                    },
                    {
                        'Algorithm': algo_name,
                        'Split': split_index + 1,
                        'Dataset': 'Test',
                        'Start Date': test_start_date.date(),
                        'End Date': test_end_date.date(),
                        'Price Pattern': test_pattern,
                        'Avg Reward': avg_reward_test,
                        'Win Rate': win_rate_test,
                        'Sharpe Ratio': sharpe_ratio_test,
                        'Avg Position Return': np.mean(position_returns_test)
                    }
                ])

        results_df = pd.DataFrame(results_list)
        print("\nDetailed Results:")
        print(results_df)

        results_df.to_csv('trading_rl_results_with_custom_criteria.csv', index=False)

        # Example of using the inference function with new data
        # Assuming you have new data in 'data/new_data.csv'
        # new_data = pd.read_csv('data/new_data.csv')
        # results_df = infer_new_data(
        #     new_data=new_data,
        #     model_filename=model_filename,
        #     scaler_filename=f'scaler_{month_train}.save',
        #     technical_indicators_list=technical_indicators_list,
        #     algo_class=algorithms[algo_name]
        # )
        # print(results_df.head())

  df = pd.read_csv(


TypeError: 'alphabet' must be None, 'ordinal' or array-like with shape (n_bins,) (got 4)