<a href="https://colab.research.google.com/github/peymankhp/AI/blob/main/Pr_Colab_V1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# -*- coding: utf-8 -*-
"""Professional Gold Price Prediction System - Google Colab Version"""
# System imports
import pandas as pd  # Data manipulation and analysis
import numpy as np   # Numerical computations
from sklearn.preprocessing import MinMaxScaler  # Data normalization
from sklearn.model_selection import TimeSeriesSplit  # Time-series validation
import logging  # For logging
import os  # For file operations
from datetime import datetime  # For timestamps
import tensorflow as tf  # Explicitly import tensorflow
import matplotlib.pyplot as plt  # For plotting
import seaborn as sns  # For better plot styling
import math
from google.colab import drive  # For Google Drive integration
import shutil

# Mount Google Drive
try:
    drive.mount('/content/drive', force_remount=True)
    print("Google Drive mounted successfully")
except Exception as e:
    print(f"Error mounting Google Drive: {str(e)}")

# Check for GPU availability
if tf.config.list_physical_devices('GPU'):
    print("GPU is available and will be used")
    # Enable mixed precision training for better performance on GPU
    tf.keras.mixed_precision.set_global_policy('mixed_float16')
else:
    print("No GPU found, using CPU")

# Set plot style
plt.style.use('seaborn-v0_8')  # Use a built-in style
sns.set_palette("husl")

# Set up base directory for Colab
BASE_DIR = '/content/drive/MyDrive/Colab Notebooks'
MODEL_DIR = os.path.join(BASE_DIR, 'models')
os.makedirs(MODEL_DIR, exist_ok=True)

# Set up logging with more detailed format
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s',
    handlers=[
        logging.FileHandler(os.path.join(BASE_DIR, 'gold_price_prediction.log')),
        logging.StreamHandler()
    ]
)

# Configure TensorFlow logging
tf.get_logger().setLevel(logging.ERROR)  # Reduce TensorFlow logging verbosity

# Data file paths for Google Drive
DATA_FILES = {
    'train': os.path.join(BASE_DIR, 'Gold_Price_TA_training.xlsx'),
    'validation': os.path.join(BASE_DIR, 'Gold_Price_TA_validation.xlsx'),
    'test': os.path.join(BASE_DIR, 'Gold_Price_TA_test.xlsx')
}

# Verify data files exist
for name, path in DATA_FILES.items():
    if not os.path.exists(path):
        print(f"Warning: {name} file not found at {path}")
    else:
        print(f"Found {name} file at {path}")

# ==============================================
# 1. DATA LOADING AND PREPROCESSING
# ==============================================

def resample_to_weekly(df):
    """
    Resample daily data to weekly frequency
    Args:
        df: Daily DataFrame with DateTimeIndex
    Returns:
        Weekly resampled DataFrame
    """
    # Resample using last observation of the week for all columns
    weekly_df = df.resample('W').last()
    logging.info(f"Resampled data from {len(df)} daily to {len(weekly_df)} weekly observations")
    return weekly_df

def load_and_preprocess_data(file_path, columns_to_keep=None):
    """
    Comprehensive data loading and cleaning pipeline with improved error handling
    """
    try:
        logging.info(f"Loading data from {file_path}")
        if not os.path.exists(file_path):
            raise FileNotFoundError(f"The file {file_path} was not found")

        # Read Excel file
        df = pd.read_excel(
            file_path,
            engine='openpyxl',
            dtype={'Date': str}
        )

        if df.empty:
            raise ValueError("The loaded file is empty")

        # Convert date column
        df['Date'] = pd.to_datetime(df['Date'])

        # Keep only specified columns if provided
        if columns_to_keep is not None:
            columns_with_date = ['Date'] + columns_to_keep
            df = df[columns_with_date]

        # Set Date as index and sort
        df = df.sort_values('Date').set_index('Date')

        # Convert numeric columns with improved error handling
        for col in df.select_dtypes(include=[object]).columns:
            try:
                # First try direct conversion
                df[col] = pd.to_numeric(df[col], errors='coerce')
            except Exception:
                # If failed, try cleaning the data first
                df[col] = df[col].str.replace(',', '.').str.replace(' ', '')
                df[col] = pd.to_numeric(df[col], errors='coerce')

            # Log number of NaN values after conversion
            nan_count = df[col].isna().sum()
            if nan_count > 0:
                logging.warning(f"Column {col}: {nan_count} values converted to NaN")

        # Handle missing values
        df = df.interpolate(method='time', limit_direction='both', limit=5)
        df = df.fillna(method='ffill', limit=5)
        df = df.fillna(method='bfill', limit=5)
        df = df.fillna(df.mean())

        # Create weekly version
        weekly_df = resample_to_weekly(df)

        logging.info(f"Data shapes - Daily: {df.shape}, Weekly: {weekly_df.shape}")
        return df, weekly_df

    except Exception as e:
        logging.error(f"Error processing data: {str(e)}")
        raise

def calculate_prediction_accuracy(y_true, y_pred):
    """
    Calculate various accuracy metrics for predictions
    Args:
        y_true: True values
        y_pred: Predicted values
    Returns:
        Dictionary of accuracy metrics
    """
    # Calculate percentage errors
    percentage_errors = np.abs((y_true - y_pred) / y_true) * 100

    # Calculate directional accuracy
    direction_true = np.diff(y_true) > 0
    direction_pred = np.diff(y_pred) > 0
    directional_accuracy = np.mean(direction_true == direction_pred) * 100

    metrics = {
        'mape': np.mean(percentage_errors),  # Mean Absolute Percentage Error
        'median_pe': np.median(percentage_errors),  # Median Percentage Error
        'rmse': np.sqrt(np.mean((y_true - y_pred) ** 2)),  # Root Mean Square Error
        'mae': np.mean(np.abs(y_true - y_pred)),  # Mean Absolute Error
        'r2': 1 - (np.sum((y_true - y_pred) ** 2) / np.sum((y_true - np.mean(y_true)) ** 2)),  # R-squared
        'directional_accuracy': directional_accuracy  # Directional Accuracy
    }

    return metrics

def get_common_columns(dataframes, missing_threshold=0.3):
    """
    Identify common columns across all dataframes with acceptable missing value ratios
    Args:
        dataframes: List of pandas DataFrames to analyze
        missing_threshold: Maximum acceptable ratio of missing values (default: 0.3 or 30%)
    Returns:
        List of column names that meet the criteria
    """
    try:
        # Get set of columns from first dataframe
        common_cols = set(dataframes[0].columns)

        # Find intersection with all other dataframes
        for df in dataframes[1:]:
            common_cols = common_cols.intersection(df.columns)

        # Convert to list and sort
        common_cols = sorted(list(common_cols))
        logging.info(f"Found {len(common_cols)} columns common to all datasets")

        # Check missing value ratios
        valid_columns = []
        for col in common_cols:
            # Skip Date column as it's handled separately
            if col == 'Date':
                continue

            # Check missing ratio in all dataframes
            max_missing_ratio = max(
                df[col].isnull().mean()
                for df in dataframes
            )

            if max_missing_ratio <= missing_threshold:
                valid_columns.append(col)
            else:
                logging.warning(f"Column {col} excluded due to high missing ratio: {max_missing_ratio:.2%}")

        logging.info(f"Retained {len(valid_columns)} columns after missing value filtering")
        return valid_columns

    except Exception as e:
        logging.error(f"Error in get_common_columns: {str(e)}")
        raise

# First load raw data to identify common columns
raw_train_df = pd.read_excel(DATA_FILES['train'])
raw_test_df = pd.read_excel(DATA_FILES['test'])

# Get common columns that meet missing value threshold
common_columns = get_common_columns([raw_train_df, raw_test_df])
logging.info(f"Identified {len(common_columns)} common columns across all datasets")

# Load and preprocess all datasets with common columns
train_daily_df, train_weekly_df = load_and_preprocess_data(DATA_FILES['train'], common_columns)
test_daily_df, test_weekly_df = load_and_preprocess_data(DATA_FILES['test'], common_columns)

# ==============================================
# 2. FEATURE ENGINEERING AND SCALING
# ==============================================

def scale_dataset(df, scaler, fit_scaler=False):
    """
    Scale the dataset using the provided scaler
    Args:
        df: DataFrame to scale
        scaler: sklearn scaler object
        fit_scaler: Whether to fit the scaler on this data
    Returns:
        Scaled numpy array
    """
    try:
        # Convert DataFrame to numpy array
        data = df.values

        # Fit scaler if requested
        if fit_scaler:
            data_scaled = scaler.fit_transform(data)
            logging.info(f"Fitted and transformed data with shape: {data_scaled.shape}")
        else:
            data_scaled = scaler.transform(data)
            logging.info(f"Transformed data with shape: {data_scaled.shape}")

        return data_scaled

    except Exception as e:
        logging.error(f"Error scaling dataset: {str(e)}")
        raise

# Define target variable (gold closing price)
target_column = 'Close'  # Value we want to predict

# Create feature list (all columns except target)
feature_columns = [col for col in common_columns if col != target_column]

# Initialize normalization scalers
daily_scaler = MinMaxScaler(feature_range=(0, 1))
weekly_scaler = MinMaxScaler(feature_range=(0, 1))

# Scale daily data
train_daily_scaled = scale_dataset(train_daily_df, daily_scaler, fit_scaler=True)
test_daily_scaled = scale_dataset(test_daily_df, daily_scaler)

# Scale weekly data
train_weekly_scaled = scale_dataset(train_weekly_df, weekly_scaler, fit_scaler=True)
test_weekly_scaled = scale_dataset(test_weekly_df, weekly_scaler)

# ==============================================
# 3. LSTM TIME-SERIES MODEL ARCHITECTURE
# ==============================================

from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input, BatchNormalization, Bidirectional, Concatenate, Add, LayerNormalization
from tensorflow.keras.layers import MultiHeadAttention, GlobalAveragePooling1D
from tensorflow.keras.losses import Huber, BinaryCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import LearningRateScheduler
import tensorflow.keras.backend as K

def lr_schedule(epoch):
    """
    Custom learning rate schedule with warm-up and cosine decay
    """
    initial_lr = 0.001
    warmup_epochs = 5
    decay_epochs = 95  # remaining epochs after warmup
    min_lr = 0.00001

    if epoch < warmup_epochs:
        # Linear warmup
        return initial_lr * (epoch + 1) / warmup_epochs
    else:
        # Cosine decay
        progress = (epoch - warmup_epochs) / decay_epochs
        cosine_decay = 0.5 * (1 + math.cos(math.pi * progress))
        return max(min_lr, initial_lr * cosine_decay)

def directional_accuracy(y_true, y_pred):
    """
    Custom metric to calculate directional accuracy
    """
    # Calculate directions
    dir_true = K.sign(y_true[1:] - y_true[:-1])
    dir_pred = K.sign(y_pred[1:] - y_pred[:-1])

    # Calculate accuracy
    return K.mean(K.cast(K.equal(dir_true, dir_pred), 'float32'))

def combined_loss(alpha=0.7):
    """
    Combined loss function with both value and direction components
    """
    value_loss = Huber(delta=1.0)
    direction_loss = BinaryCrossentropy()

    def loss(y_true, y_pred):
        # Value component
        v_loss = value_loss(y_true, y_pred)

        # Directional component
        dir_true = K.sign(y_true[1:] - y_true[:-1])
        dir_pred = K.sign(y_pred[1:] - y_pred[:-1])
        d_loss = direction_loss(
            K.cast((dir_true + 1) / 2, 'float32'),
            K.cast((dir_pred + 1) / 2, 'float32')
        )

        return alpha * v_loss + (1 - alpha) * d_loss

    return loss

def create_lstm_model(input_shape):
    """
    Build enhanced LSTM neural network optimized for directional accuracy and Colab GPU
    """
    try:
        logging.info(f"Creating enhanced directional LSTM model with input shape: {input_shape}")

        # Enable mixed precision for better GPU performance
        if tf.config.list_physical_devices('GPU'):
            tf.keras.mixed_precision.set_global_policy('mixed_float16')

        # Input layer
        inputs = Input(shape=input_shape, name="Input")

        # First Bidirectional LSTM layer with GPU optimization
        x = Bidirectional(LSTM(
            units=256,
            return_sequences=True,
            kernel_regularizer='l2',
            recurrent_regularizer='l2',
            name="BiLSTM1",
            # Add CuDNN optimization for GPU
            implementation=2
        ))(inputs)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)

        # Enhanced attention mechanism
        attention_output = MultiHeadAttention(
            num_heads=16,  # Increased heads for better pattern recognition
            key_dim=32
        )(x, x)
        x = Add()([x, attention_output])
        x = LayerNormalization()(x)

        # Second Bidirectional LSTM layer
        x = Bidirectional(LSTM(
            units=128,
            return_sequences=True,
            kernel_regularizer='l2',
            recurrent_regularizer='l2',
            name="BiLSTM2"
        ))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)

        # Third Bidirectional LSTM layer
        x = Bidirectional(LSTM(
            units=64,
            return_sequences=False,
            kernel_regularizer='l2',
            recurrent_regularizer='l2',
            name="BiLSTM3"
        ))(x)
        x = BatchNormalization()(x)
        x = Dropout(0.2)(x)

        # Split into two paths: one for value, one for direction
        # Value prediction path
        value_dense = Dense(128, activation='selu')(x)
        value_dense = BatchNormalization()(value_dense)
        value_dense = Dropout(0.2)(value_dense)

        value_dense = Dense(64, activation='selu')(value_dense)
        value_dense = BatchNormalization()(value_dense)
        value_dense = Dropout(0.2)(value_dense)

        # Direction prediction path
        dir_dense = Dense(128, activation='selu')(x)
        dir_dense = BatchNormalization()(dir_dense)
        dir_dense = Dropout(0.2)(dir_dense)

        dir_dense = Dense(64, activation='selu')(dir_dense)
        dir_dense = BatchNormalization()(dir_dense)
        dir_dense = Dropout(0.2)(dir_dense)

        # Combine paths
        combined = Concatenate()([value_dense, dir_dense])

        # Final processing
        x = Dense(64, activation='selu')(combined)
        x = BatchNormalization()(x)
        x = Dropout(0.2)(x)

        x = Dense(32, activation='selu')(x)
        x = BatchNormalization()(x)
        x = Dropout(0.1)(x)

        # If using GPU, ensure final layer uses float32
        if tf.config.list_physical_devices('GPU'):
            outputs = Dense(1, dtype='float32', name="Output")(x)
        else:
            outputs = Dense(1, name="Output")(x)

        # Create model
        model = Model(inputs=inputs, outputs=outputs, name="DirectionalGoldPriceLSTM")

        # Compile with custom loss and metrics
        initial_learning_rate = 0.001
        optimizer = Adam(
            learning_rate=initial_learning_rate,
            clipnorm=1.0,
            beta_1=0.9,
            beta_2=0.999,
            epsilon=1e-07
        )

        # Use mixed precision optimizer if GPU is available
        if tf.config.list_physical_devices('GPU'):
            optimizer = tf.keras.mixed_precision.LossScaleOptimizer(optimizer)

        model.compile(
            optimizer=optimizer,
            loss=combined_loss(alpha=0.7),
            metrics=['mae', 'mse', directional_accuracy]
        )

        logging.info("Enhanced directional LSTM model created successfully")
        model.summary(print_fn=logging.info)
        return model

    except Exception as e:
        logging.error(f"Error creating enhanced LSTM model: {str(e)}")
        raise

# ==============================================
# 4. RISK ANALYSIS COMPONENTS
# ==============================================

def calculate_value_at_risk(returns, confidence=0.95):
    """
    Calculate maximum expected loss using historical simulation
    Args:
        returns: Array of historical returns
        confidence: Probability threshold (95% default)
    Returns:
        VaR value (negative = potential loss)
    """
    return np.percentile(returns, 100 * (1 - confidence))  # Nth percentile loss

def monte_carlo_simulation(start_price, days, num_simulations, mu, sigma):
    """
    Generate stochastic price paths using Geometric Brownian Motion
    Args:
        start_price: Current market price
        days: Projection horizon
        num_simulations: Number of scenarios
        mu: Annualized return expectation
        sigma: Annualized volatility
    Returns:
        Matrix of simulated price paths (days x simulations)
    """
    # Calculate daily drift and volatility
    daily_return = mu / days  # Daily expected return
    daily_vol = sigma / np.sqrt(days)  # Daily volatility

    # Generate random daily returns
    shocks = np.random.normal(
        daily_return,
        daily_vol,
        (days, num_simulations)  # 2D array of returns
    )

    # Convert to price paths using cumulative product
    price_paths = start_price * np.cumprod(1 + shocks, axis=0)

    return price_paths

# ==============================================
# 5. MODEL TRAINING FRAMEWORK
# ==============================================

def train_time_series_model(model, X_train, y_train, validation_data, epochs=100):
    """
    Enhanced training process optimized for Colab environment
    """
    from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard

    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    model_path = os.path.join(MODEL_DIR, f'gold_price_model_{timestamp}.h5')

    # Add TensorBoard logging for Colab
    log_dir = os.path.join(BASE_DIR, 'logs', timestamp)
    os.makedirs(log_dir, exist_ok=True)

    callbacks = [
        EarlyStopping(
            monitor='val_directional_accuracy',
            mode='max',
            patience=25,
            restore_best_weights=True,
            verbose=1
        ),
        ModelCheckpoint(
            filepath=model_path,
            monitor='val_directional_accuracy',
            mode='max',
            save_best_only=True,
            verbose=1
        ),
        LearningRateScheduler(lr_schedule),
        ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=10,
            min_lr=0.000001,
            verbose=1
        ),
        TensorBoard(
            log_dir=log_dir,
            histogram_freq=1,
            write_graph=True,
            write_images=True,
            update_freq='epoch',
            profile_batch=0
        )
    ]

    try:
        logging.info("Starting enhanced directional training...")

        history = model.fit(
            X_train, y_train,
            validation_data=validation_data,
            epochs=epochs,
            batch_size=32,
            callbacks=callbacks,
            verbose=1,
            shuffle=True
        )

        logging.info(f"Model training completed. Best model saved to {model_path}")
        return history

    except Exception as e:
        logging.error(f"Error during model training: {str(e)}")
        raise

def create_sequences(data, window_size):
    """
    Create sequences for time series prediction
    Args:
        data: Scaled numpy array of shape (samples, features)
        window_size: Number of time steps to look back
    Returns:
        X: Input sequences of shape (samples, window_size, features)
        y: Target values of shape (samples, 1)
    """
    X, y = [], []
    for i in range(len(data) - window_size):
        X.append(data[i:(i + window_size)])
        y.append(data[i + window_size, 0])  # Assuming first column is target
    return np.array(X), np.array(y)

def plot_training_history(history):
    """
    Plot training history showing loss and metrics over epochs
    Args:
        history: Keras history object from model training
    """
    try:
        # Create figure with subplots
        fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(12, 15))

        # Plot training and validation loss
        ax1.plot(history.history['loss'], label='Training Loss')
        ax1.plot(history.history['val_loss'], label='Validation Loss')
        ax1.set_title('Model Loss Over Epochs')
        ax1.set_xlabel('Epoch')
        ax1.set_ylabel('Loss')
        ax1.legend()
        ax1.grid(True)

        # Plot MAE
        ax2.plot(history.history['mae'], label='Training MAE')
        ax2.plot(history.history['val_mae'], label='Validation MAE')
        ax2.set_title('Mean Absolute Error Over Epochs')
        ax2.set_xlabel('Epoch')
        ax2.set_ylabel('MAE')
        ax2.legend()
        ax2.grid(True)

        # Plot MSE
        ax3.plot(history.history['mse'], label='Training MSE')
        ax3.plot(history.history['val_mse'], label='Validation MSE')
        ax3.set_title('Mean Squared Error Over Epochs')
        ax3.set_xlabel('Epoch')
        ax3.set_ylabel('MSE')
        ax3.legend()
        ax3.grid(True)

        plt.tight_layout()

        # Save the plot
        plot_path = os.path.join(BASE_DIR, 'training_history.png')
        plt.savefig(plot_path)
        plt.close()

        logging.info(f"Training history plot saved to {plot_path}")

    except Exception as e:
        logging.error(f"Error plotting training history: {str(e)}")
        raise

def plot_predictions(y_true, y_pred, title, save_path, dates=None):
    """
    Plot actual vs predicted values
    Args:
        y_true: True values
        y_pred: Predicted values
        title: Plot title
        save_path: Path to save the plot
        dates: Optional array of dates for x-axis
    """
    try:
        plt.figure(figsize=(15, 8))

        # Plot actual and predicted values
        if dates is not None:
            plt.plot(dates, y_true, label='Actual', alpha=0.8)
            plt.plot(dates, y_pred, label='Predicted', alpha=0.8)
            plt.xticks(rotation=45)
        else:
            plt.plot(y_true, label='Actual', alpha=0.8)
            plt.plot(y_pred, label='Predicted', alpha=0.8)

        plt.title(title)
        plt.xlabel('Time')
        plt.ylabel('Gold Price (USD)')
        plt.legend()
        plt.grid(True)

        # Add prediction accuracy metrics to the plot
        metrics = calculate_prediction_accuracy(y_true, y_pred)
        metrics_text = f"MAPE: {metrics['mape']:.2f}%\n"
        metrics_text += f"RMSE: ${metrics['rmse']:.2f}\n"
        metrics_text += f"Directional Accuracy: {metrics['directional_accuracy']:.2f}%"

        plt.text(0.02, 0.98, metrics_text,
                transform=plt.gca().transAxes,
                verticalalignment='top',
                bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

        plt.tight_layout()

        # Save the plot
        plt.savefig(save_path)
        plt.close()

        logging.info(f"Prediction plot saved to {save_path}")

    except Exception as e:
        logging.error(f"Error plotting predictions: {str(e)}")
        raise

def evaluate_model(model, X_test, y_test, scaler, freq='daily'):
    """
    Evaluate model performance on test set
    Args:
        model: Trained Keras model
        X_test: Test features
        y_test: Test labels
        scaler: Scaler used for the data
        freq: Frequency of predictions ('daily' or 'weekly')
    Returns:
        Dictionary of evaluation metrics and predictions
    """
    try:
        logging.info(f"Evaluating {freq} model performance...")
        # Get model predictions
        y_pred = model.predict(X_test)

        # Calculate metrics on scaled data
        mse = np.mean((y_test - y_pred.flatten()) ** 2)
        rmse = np.sqrt(mse)
        mae = np.mean(np.abs(y_test - y_pred.flatten()))

        metrics = {
            'mse': mse,
            'rmse': rmse,
            'mae': mae
        }

        # Inverse transform predictions and actual values
        n_features = X_test.shape[2]  # Number of features
        dummy_test = np.zeros((len(y_test), n_features))
        dummy_test[:, 0] = y_test  # First column is the target (Close price)
        y_test_actual = scaler.inverse_transform(dummy_test)[:, 0]

        dummy_pred = np.zeros((len(y_pred), n_features))
        dummy_pred[:, 0] = y_pred.flatten()
        y_pred_actual = scaler.inverse_transform(dummy_pred)[:, 0]

        # Calculate accuracy metrics
        accuracy_metrics = calculate_prediction_accuracy(y_test_actual, y_pred_actual)
        metrics.update(accuracy_metrics)

        # Plot predictions
        plot_path = os.path.join(BASE_DIR, f'predictions_{freq}.png')
        plot_predictions(
            y_test_actual,
            y_pred_actual,
            f'Gold Price Predictions vs Actual Values ({freq.capitalize()} Test Set)',
            plot_path
        )

        logging.info(f"{freq.capitalize()} model evaluation metrics:")
        for metric_name, metric_value in metrics.items():
            if metric_name in ['mape', 'directional_accuracy']:
                logging.info(f"{metric_name}: {metric_value:.2f}%")
            else:
                logging.info(f"{metric_name}: {metric_value:.4f}")

        return metrics, y_pred_actual

    except Exception as e:
        logging.error(f"Error during model evaluation: {str(e)}")
        raise

# ==============================================
# 6. MAIN EXECUTION FLOW
# ==============================================

if __name__ == "__main__":
    try:
        logging.info("Starting Gold Price Prediction System on Google Colab")

        # Print environment information
        logging.info(f"TensorFlow version: {tf.__version__}")
        logging.info(f"GPU Available: {tf.config.list_physical_devices('GPU')}")

        # Log data shapes for debugging
        logging.info(f"Training data shape: {train_daily_df.shape}, {train_weekly_df.shape}")
        logging.info(f"Test data shape: {test_daily_df.shape}, {test_weekly_df.shape}")

        # Example risk analysis workflow
        price_returns = train_daily_df['Close'].pct_change().dropna()  # Daily returns
        var_95 = calculate_value_at_risk(price_returns)  # 95% confidence level
        logging.info(f"Daily 95% Value-at-Risk: {var_95*100:.2f}%")

        # Monte Carlo simulation example
        logging.info("Starting Monte Carlo simulation...")
        mc_simulations = monte_carlo_simulation(
            start_price=train_daily_df['Close'].iloc[-1],  # Latest price
            days=252,  # Trading days in year
            num_simulations=1000,  # Number of scenarios
            mu=price_returns.mean() * 252,  # Annualized return
            sigma=price_returns.std() * np.sqrt(252)  # Annualized volatility
        )
        logging.info("Monte Carlo simulation completed")

        # Create sequences for LSTM training
        window_size = 60
        logging.info(f"Creating sequences with window size: {window_size}")
        X_train_daily, y_train_daily = create_sequences(train_daily_scaled, window_size)
        X_train_weekly, y_train_weekly = create_sequences(train_weekly_scaled, window_size)
        X_test_daily, y_test_daily = create_sequences(test_daily_scaled, window_size)
        X_test_weekly, y_test_weekly = create_sequences(test_weekly_scaled, window_size)

        # Log sequence shapes
        logging.info(f"Training sequences shape: X_daily={X_train_daily.shape}, y_daily={y_train_daily.shape}, X_weekly={X_train_weekly.shape}, y_weekly={y_train_weekly.shape}")
        logging.info(f"Test sequences shape: X_daily={X_test_daily.shape}, y_daily={y_test_daily.shape}, X_weekly={X_test_weekly.shape}, y_weekly={y_test_weekly.shape}")

        # Create and train the LSTM model
        input_shape_daily = (window_size, train_daily_scaled.shape[1])
        input_shape_weekly = (window_size, train_weekly_scaled.shape[1])
        lstm_model_daily = create_lstm_model(input_shape_daily)
        lstm_model_weekly = create_lstm_model(input_shape_weekly)

        # Train the LSTM model
        history_daily = train_time_series_model(
            lstm_model_daily,
            X_train_daily,
            y_train_daily,
            validation_data=(X_test_daily, y_test_daily),
            epochs=100
        )
        history_weekly = train_time_series_model(
            lstm_model_weekly,
            X_train_weekly,
            y_train_weekly,
            validation_data=(X_test_weekly, y_test_weekly),
            epochs=100
        )

        # Plot training history
        plot_training_history(history_daily)
        plot_training_history(history_weekly)

        # Evaluate model performance and get predictions
        metrics_daily, y_pred_daily = evaluate_model(
            lstm_model_daily,
            X_test_daily,
            y_test_daily,
            daily_scaler,
            freq='daily'
        )
        metrics_weekly, y_pred_weekly = evaluate_model(
            lstm_model_weekly,
            X_test_weekly,
            y_test_weekly,
            weekly_scaler,
            freq='weekly'
        )

        # Compare daily vs weekly performance
        logging.info("\nModel Performance Comparison:")
        logging.info("============================")
        metrics_comparison = {
            'Daily MAPE': f"{metrics_daily['mape']:.2f}%",
            'Weekly MAPE': f"{metrics_weekly['mape']:.2f}%",
            'Daily Directional Accuracy': f"{metrics_daily['directional_accuracy']:.2f}%",
            'Weekly Directional Accuracy': f"{metrics_weekly['directional_accuracy']:.2f}%",
            'Daily RMSE': f"${metrics_daily['rmse']:.2f}",
            'Weekly RMSE': f"${metrics_weekly['rmse']:.2f}"
        }
        for metric, value in metrics_comparison.items():
            logging.info(f"{metric}: {value}")

        logging.info("Gold Price Prediction System completed successfully")

    except Exception as e:
        logging.error(f"An error occurred during execution: {str(e)}")
        raise

def move_file_to_project_dir(filename, project_dir):
    found_paths = find_files(filename)
    if found_paths:
        source_path = found_paths[0]  # Take the first found instance
        dest_path = os.path.join(project_dir, filename)
        try:
            # Create project directory if it doesn't exist
            os.makedirs(project_dir, exist_ok=True)
            # Copy the file (using copy2 to preserve metadata)
            shutil.copy2(source_path, dest_path)
            print(f"✅ Successfully moved {filename} to {project_dir}")
            return True
        except Exception as e:
            print(f"❌ Error moving file: {str(e)}")
            return False
    else:
        print(f"❌ Could not find {filename}")
        return False

# Move both files if needed
for filename in ['Gold_Price_TA_training.xlsx', 'Gold_Price_TA_test.xlsx']:
    move_file_to_project_dir(filename, project_dir)

Mounted at /content/drive
Google Drive mounted successfully
No GPU found, using CPU
Found train file at /content/drive/MyDrive/Colab Notebooks/Gold_Price_TA_training.xlsx
Found validation file at /content/drive/MyDrive/Colab Notebooks/Gold_Price_TA_validation.xlsx
Found test file at /content/drive/MyDrive/Colab Notebooks/Gold_Price_TA_test.xlsx


  df = df.fillna(method='ffill', limit=5)
  df = df.fillna(method='bfill', limit=5)
  df = df.fillna(method='ffill', limit=5)
  df = df.fillna(method='bfill', limit=5)
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))


Epoch 1/100
[1m69/94[0m [32m━━━━━━━━━━━━━━[0m[37m━━━━━━[0m [1m40s[0m 2s/step - directional_accuracy: 0.0000e+00 - loss: nan - mae: nan - mse: nan