In [None]:
# Function to preprocess the input date for prediction
def preprocess_date_input(input_date, df, training_columns):
    """
    Preprocess the input date to generate the necessary features for prediction.
    """
    input_date = pd.to_datetime(input_date)

    # Prepare the DataFrame for the given date (features must match the training set)
    date_features = pd.DataFrame(index=[input_date])

    # Add basic date-related features
    date_features['day_of_week'] = input_date.weekday  # Weekday as a feature
    date_features['day_of_month'] = input_date.day
    date_features['month'] = input_date.month
    date_features['year'] = input_date.year

    # Include price features like 'high', 'low', 'close' if they were used in training
    date_features['high'] = df['high'].iloc[-1]  # Last available high
    date_features['low'] = df['low'].iloc[-1]  # Last available low
    date_features['close'] = df['close'].iloc[-1]  # Last available close
    date_features['open'] = df['open'].iloc[-1]  # Last available open

    # Add percentage change (assuming this was used in training)
    date_features['pct_change'] = df['close'].pct_change().iloc[-1]  # Percentage change from previous close

    # Add rolling and lag features (ensure to use the same lags and windows used in training)
    for feature in ['high', 'low', 'close', 'open']:
        for lag in [30, 60]:
            # Only use lag values that exist (i.e., check if the index is available)
            if len(df) >= lag:
                date_features[f'lag_{lag}_{feature}'] = df[feature].iloc[-lag:].mean()
            else:
                date_features[f'lag_{lag}_{feature}'] = df[feature].iloc[-1]  # Fallback to most recent value
        for window in [30, 60]:
            if len(df) >= window:
                date_features[f'rolling_{window}_mean_{feature}'] = df[feature].iloc[-window:].mean()
                date_features[f'rolling_{window}_std_{feature}'] = df[feature].iloc[-window:].std()
            else:
                date_features[f'rolling_{window}_mean_{feature}'] = df[feature].iloc[-1]  # Fallback to most recent value
                date_features[f'rolling_{window}_std_{feature}'] = df[feature].iloc[-1]  # Use most recent std as default

    # Ensure the columns match the training set
    missing_columns = [col for col in training_columns if col not in date_features.columns]

    # Add missing columns (set them to 0 or NaN, depending on the use case)
    for col in missing_columns:
        date_features[col] = 0  # Default value for missing columns

    # Reorder columns to match the training set
    date_features = date_features[training_columns]

    return date_features

# Example input date (change this to any date you want to predict)
input_date = "2025-04-12"

# Get the features for the input date
date_features = preprocess_date_input(input_date, df, X.columns)

# Scale the input features using the same scaler that was used for training
scaled_date_features = scaler.transform(date_features)

# Make prediction with the trained stacking model
y_pred_proba = stacking_model.predict_proba(scaled_date_features)[:, 1]  # probabilities for class 1 (Bullish)

# Adjust threshold for class 1 (Bullish) prediction
threshold = 0.1  # Use a sensible threshold like 0.3 or 0.4 to favor Bullish predictions

# If predicted probability >= threshold, predict Bullish (1), otherwise Bearish (0)
y_pred_adjusted = (y_pred_proba >= threshold).astype(int)

# Map the prediction to the trend label (0 for Bearish, 1 for Bullish)
trend_label = 'Bullish' if y_pred_adjusted[0] == 1 else 'Bearish'

# Print the result
print(f"Predicted Trend for {input_date}: The trend is {trend_label}!")


In [None]:
# Assuming you have already loaded the stacking model using joblib.load()
from joblib import load

# Load the trained model (assuming you have saved it previously)
stacking_model = load('/content/stacking_model.pkl')

# Access the feature names that the model was trained on
# X is the DataFrame used for training the model. So, if X is available, you can use:
print(X.columns)

# Alternatively, if you don't have access to `X`, but you have the scaler:
# You can inspect the columns by directly checking the feature names used for scaling:
print("Columns used for training the stacking model:", X.columns)