In [1]:
import vectorbt as vbt
import pandas_ta as ta
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import xgboost as xgb
import numpy as np

from sklearn.model_selection import train_test_split# Import additional libraries for dimension reduction
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest, f_classif, RFE
from sklearn.preprocessing import StandardScaler
import seaborn as sns

In [None]:


# First, let's create more features to make dimension reduction more meaningful
def create_extended_features(df):
    """
    Create additional technical indicators to have more features for dimension reduction.
    This demonstrates the value of dimension reduction when dealing with many features.
    """
    # Momentum indicators
    df['RSI_21'] = ta.rsi(df.Close, length=21)
    df['Williams_R'] = ta.willr(df.High, df.Low, df.Close, length=14)
    df['ROC'] = ta.roc(df.Close, length=10)
    
    # Moving averages with different periods
    df['SMA_10'] = ta.sma(df.Close, length=10)
    df['SMA_20'] = ta.sma(df.Close, length=20)
    df['EMA_12'] = ta.ema(df.Close, length=12)
    df['EMA_26'] = ta.ema(df.Close, length=26)
    
    # Volatility indicators
    df['BBANDS_upper'], df['BBANDS_middle'], df['BBANDS_lower'] = ta.bbands(df.Close, length=20).iloc[:, 0], ta.bbands(df.Close, length=20).iloc[:, 1], ta.bbands(df.Close, length=20).iloc[:, 2]
    df['BB_width'] = df['BBANDS_upper'] - df['BBANDS_lower']
    df['BB_position'] = (df['Close'] - df['BBANDS_lower']) / (df['BBANDS_upper'] - df['BBANDS_lower'])
    
    # Volume indicators (if volume data was available)
    # df['Volume_SMA'] = ta.sma(df.Volume, length=20)
    # df['Volume_ratio'] = df.Volume / df['Volume_SMA']
    
    # Price-based features
    df['High_Low_ratio'] = df['High'] / df['Low']
    df['Close_Open_ratio'] = df['Close'] / df['Open']
    df['Price_change'] = df['Close'].pct_change()
    df['Price_change_3d'] = df['Close'].pct_change(periods=3)
    
    # Lag features (previous day values)
    df['Close_lag1'] = df['Close'].shift(1)
    df['Close_lag2'] = df['Close'].shift(2)
    df['RSI_lag1'] = df['RSI'].shift(1)
    
    return df

# Apply extended feature creation
df = create_extended_features(df)

# Define extended feature list
extended_features = [
    'RSI', 'SMA', 'EMA', 'ATR', 'MACD', 'MACD_signal',
    'RSI_21', 'Williams_R', 'ROC', 'SMA_10', 'SMA_20', 
    'EMA_12', 'EMA_26', 'BBANDS_upper', 'BBANDS_middle', 
    'BBANDS_lower', 'BB_width', 'BB_position', 'High_Low_ratio',
    'Close_Open_ratio', 'Price_change', 'Price_change_3d',
    'Close_lag1', 'Close_lag2', 'RSI_lag1'
]

print(f"Total features created: {len(extended_features)}")

In [None]:
# Clean the data and prepare for dimension reduction
df_clean = df.copy()

# Drop rows with NaN values
df_clean.dropna(inplace=True)

# Prepare features and target
X_extended = df_clean[extended_features]
y_extended = df_clean['Signal']

print(f"Dataset shape after cleaning: {X_extended.shape}")
print(f"Features: {X_extended.columns.tolist()}")

# Check for any remaining NaN values
print(f"NaN values in features: {X_extended.isnull().sum().sum()}")
print(f"NaN values in target: {y_extended.isnull().sum()}")

In [None]:
# DIMENSION REDUCTION TECHNIQUE 1: CORRELATION-BASED FEATURE REMOVAL
def remove_highly_correlated_features(X, threshold=0.95):
    """
    Remove features that are highly correlated with other features.
    High correlation indicates redundancy in features.
    
    Args:
        X: Feature matrix
        threshold: Correlation threshold above which features are considered redundant
    
    Returns:
        X_reduced: Feature matrix with highly correlated features removed
        removed_features: List of removed feature names
    """
    # Calculate correlation matrix
    corr_matrix = X.corr().abs()
    
    # Create a mask for the upper triangle (to avoid duplicate comparisons)
    upper_triangle = np.triu(np.ones_like(corr_matrix, dtype=bool), k=1)
    
    # Find features to remove (correlation > threshold)
    to_remove = []
    for i in range(len(corr_matrix.columns)):
        for j in range(i+1, len(corr_matrix.columns)):
            if corr_matrix.iloc[i, j] > threshold:
                # Remove the feature with lower mean correlation to all other features
                mean_corr_i = corr_matrix.iloc[i, :].mean()
                mean_corr_j = corr_matrix.iloc[j, :].mean()
                if mean_corr_i > mean_corr_j:
                    to_remove.append(corr_matrix.columns[j])
                else:
                    to_remove.append(corr_matrix.columns[i])
    
    # Remove duplicates and create reduced feature set
    to_remove = list(set(to_remove))
    X_reduced = X.drop(columns=to_remove)
    
    print(f"Removed {len(to_remove)} highly correlated features: {to_remove}")
    print(f"Remaining features: {X_reduced.shape[1]}")
    
    return X_reduced, to_remove

# Apply correlation-based feature removal
X_corr_reduced, removed_corr_features = remove_highly_correlated_features(X_extended, threshold=0.9)

# Visualize correlation matrix before and after
plt.figure(figsize=(15, 6))

plt.subplot(1, 2, 1)
sns.heatmap(X_extended.corr(), annot=False, cmap='coolwarm', center=0)
plt.title('Original Feature Correlations')

plt.subplot(1, 2, 2)
sns.heatmap(X_corr_reduced.corr(), annot=True, cmap='coolwarm', center=0, fmt='.2f')
plt.title('After Correlation-based Reduction')

plt.tight_layout()
plt.show()

In [None]:
# DIMENSION REDUCTION TECHNIQUE 2: PRINCIPAL COMPONENT ANALYSIS (PCA)
def apply_pca_reduction(X, n_components=None, variance_threshold=0.95):
    """
    Apply PCA for dimension reduction while preserving specified variance.
    
    PCA finds the principal components (linear combinations of original features)
    that capture the most variance in the data. This helps reduce noise and
    computational complexity while retaining most of the information.
    
    Args:
        X: Feature matrix
        n_components: Number of components to keep (if None, use variance_threshold)
        variance_threshold: Minimum cumulative variance to preserve
    
    Returns:
        X_pca: Transformed feature matrix
        pca: Fitted PCA object
        n_components_used: Number of components actually used
    """
    # Standardize features (important for PCA)
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    if n_components is None:
        # Determine number of components needed for variance threshold
        pca_temp = PCA()
        pca_temp.fit(X_scaled)
        cumsum_variance = np.cumsum(pca_temp.explained_variance_ratio_)
        n_components = np.argmax(cumsum_variance >= variance_threshold) + 1
    
    # Apply PCA with determined number of components
    pca = PCA(n_components=n_components)
    X_pca = pca.fit_transform(X_scaled)
    
    # Print information about the reduction
    total_variance = np.sum(pca.explained_variance_ratio_)
    print(f"PCA Reduction Results:")
    print(f"Original dimensions: {X.shape[1]}")
    print(f"Reduced dimensions: {n_components}")
    print(f"Variance preserved: {total_variance:.3f} ({total_variance*100:.1f}%)")
    print(f"Compression ratio: {X.shape[1]/n_components:.2f}:1")
    
    # Create DataFrame with PCA components
    pca_columns = [f'PC{i+1}' for i in range(n_components)]
    X_pca_df = pd.DataFrame(X_pca, index=X.index, columns=pca_columns)
    
    return X_pca_df, pca, scaler, n_components

# Apply PCA to correlation-reduced features
X_pca, pca_model, pca_scaler, n_pca_components = apply_pca_reduction(
    X_corr_reduced, 
    variance_threshold=0.95
)

# Visualize PCA results
plt.figure(figsize=(15, 5))

# Plot 1: Explained variance ratio
plt.subplot(1, 3, 1)
plt.bar(range(1, len(pca_model.explained_variance_ratio_) + 1), 
        pca_model.explained_variance_ratio_)
plt.xlabel('Principal Component')
plt.ylabel('Explained Variance Ratio')
plt.title('Variance Explained by Each PC')

# Plot 2: Cumulative explained variance
plt.subplot(1, 3, 2)
cumsum_variance = np.cumsum(pca_model.explained_variance_ratio_)
plt.plot(range(1, len(cumsum_variance) + 1), cumsum_variance, 'bo-')
plt.axhline(y=0.95, color='r', linestyle='--', label='95% threshold')
plt.xlabel('Number of Components')
plt.ylabel('Cumulative Explained Variance')
plt.title('Cumulative Variance Explained')
plt.legend()

# Plot 3: Feature contributions to first two PCs
plt.subplot(1, 3, 3)
feature_contributions = pca_model.components_[:2].T
plt.scatter(feature_contributions[:, 0], feature_contributions[:, 1])
plt.xlabel('PC1 Contribution')
plt.ylabel('PC2 Contribution')
plt.title('Feature Contributions to PC1 vs PC2')

# Add feature labels
for i, feature in enumerate(X_corr_reduced.columns):
    plt.annotate(feature, (feature_contributions[i, 0], feature_contributions[i, 1]), 
                fontsize=8, alpha=0.7)

plt.tight_layout()
plt.show()

In [None]:
# DIMENSION REDUCTION TECHNIQUE 3: UNIVARIATE FEATURE SELECTION
def apply_univariate_selection(X, y, k=10):
    """
    Select k best features based on univariate statistical tests.
    
    This method evaluates each feature individually against the target variable
    using statistical tests (F-test for classification). Features with the
    highest scores are selected.
    
    Args:
        X: Feature matrix
        y: Target variable
        k: Number of features to select
    
    Returns:
        X_selected: Feature matrix with selected features
        selector: Fitted SelectKBest object
        selected_features: Names of selected features
    """
    # Apply SelectKBest with f_classif scoring function
    selector = SelectKBest(score_func=f_classif, k=k)
    X_selected = selector.fit_transform(X, y)
    
    # Get selected feature names
    selected_features = X.columns[selector.get_support()].tolist()
    
    # Get feature scores
    feature_scores = pd.DataFrame({
        'Feature': X.columns,
        'Score': selector.scores_,
        'Selected': selector.get_support()
    }).sort_values('Score', ascending=False)
    
    print(f"Univariate Feature Selection Results:")
    print(f"Selected {k} features out of {X.shape[1]}")
    print(f"Selected features: {selected_features}")
    print("\nTop 10 feature scores:")
    print(feature_scores.head(10))
    
    # Create DataFrame with selected features
    X_selected_df = pd.DataFrame(X_selected, index=X.index, columns=selected_features)
    
    return X_selected_df, selector, selected_features, feature_scores

# Apply univariate feature selection
X_univariate, univariate_selector, selected_features, feature_scores = apply_univariate_selection(
    X_corr_reduced, y_extended, k=8
)

# Visualize feature selection results
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.barh(range(len(feature_scores)), feature_scores['Score'])
plt.yticks(range(len(feature_scores)), feature_scores['Feature'])
plt.xlabel('F-Score')
plt.title('Feature Importance Scores')
plt.gca().invert_yaxis()

plt.subplot(1, 2, 2)
selected_scores = feature_scores[feature_scores['Selected']]['Score']
plt.bar(range(len(selected_scores)), selected_scores)
plt.xticks(range(len(selected_scores)), 
           feature_scores[feature_scores['Selected']]['Feature'], 
           rotation=45, ha='right')
plt.ylabel('F-Score')
plt.title('Selected Features and Their Scores')

plt.tight_layout()
plt.show()

In [None]:
# DIMENSION REDUCTION TECHNIQUE 4: RECURSIVE FEATURE ELIMINATION (RFE)
def apply_rfe_selection(X, y, n_features=8, estimator=None):
    """
    Apply Recursive Feature Elimination using XGBoost as the estimator.
    
    RFE works by recursively eliminating features and building the model
    on the remaining attributes. It uses the model's feature importance
    to eliminate the least important features.
    
    Args:
        X: Feature matrix
        y: Target variable
        n_features: Number of features to select
        estimator: Base estimator (if None, uses XGBoost)
    
    Returns:
        X_rfe: Feature matrix with selected features
        rfe: Fitted RFE object
        selected_features: Names of selected features
    """
    if estimator is None:
        # Use XGBoost as the base estimator for RFE
        estimator = xgb.XGBClassifier(
            objective='multi:softmax',
            num_class=3,
            eval_metric='mlogloss',
            learning_rate=0.1,
            max_depth=6,
            random_state=42,
            verbosity=0
        )
    
    # Map target values for XGBoost classifier
    y_mapped = y.map({-1: 0, 0: 1, 1: 2})
    
    # Apply RFE
    rfe = RFE(estimator=estimator, n_features_to_select=n_features, step=1)
    X_rfe = rfe.fit_transform(X, y_mapped)
    
    # Get selected feature names
    selected_features = X.columns[rfe.support_].tolist()
    
    # Get feature rankings
    feature_rankings = pd.DataFrame({
        'Feature': X.columns,
        'Ranking': rfe.ranking_,
        'Selected': rfe.support_
    }).sort_values('Ranking')
    
    print(f"RFE Results:")
    print(f"Selected {n_features} features out of {X.shape[1]}")
    print(f"Selected features: {selected_features}")
    print("\nFeature rankings:")
    print(feature_rankings)
    
    # Create DataFrame with selected features
    X_rfe_df = pd.DataFrame(X_rfe, index=X.index, columns=selected_features)
    
    return X_rfe_df, rfe, selected_features, feature_rankings

# Apply RFE selection
X_rfe, rfe_selector, rfe_selected_features, feature_rankings = apply_rfe_selection(
    X_corr_reduced, y_extended, n_features=6
)

# Visualize RFE results
plt.figure(figsize=(10, 6))
colors = ['green' if selected else 'red' for selected in feature_rankings['Selected']]
plt.barh(range(len(feature_rankings)), feature_rankings['Ranking'], color=colors)
plt.yticks(range(len(feature_rankings)), feature_rankings['Feature'])
plt.xlabel('Ranking (1 = best)')
plt.title('RFE Feature Rankings (Green = Selected, Red = Eliminated)')
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()

In [None]:
# COMPARISON OF DIMENSION REDUCTION TECHNIQUES
def compare_dimension_reduction_methods(X_sets, y, method_names):
    """
    Compare different dimension reduction methods by training XGBoost models
    and evaluating their performance.
    
    Args:
        X_sets: List of feature matrices from different reduction methods
        y: Target variable
        method_names: Names of the reduction methods
    
    Returns:
        results: DataFrame with comparison results
    """
    results = []
    
    for i, (X, method_name) in enumerate(zip(X_sets, method_names)):
        print(f"\nEvaluating {method_name}...")
        print(f"Features shape: {X.shape}")
        
        # Split data (time-based split)
        X_sorted = X.sort_index()
        y_sorted = y.sort_index()
        
        split_point = int(len(X_sorted) * 0.8)
        X_train = X_sorted.iloc[:split_point]
        X_test = X_sorted.iloc[split_point:]
        y_train = y_sorted.iloc[:split_point]
        y_test = y_sorted.iloc[split_point:]
        
        # Map target values
        y_train_mapped = y_train.map({-1: 0, 0: 1, 1: 2})
        y_test_mapped = y_test.map({-1: 0, 0: 1, 1: 2})
        
        # Train XGBoost model
        dtrain = xgb.DMatrix(X_train, label=y_train_mapped)
        dtest = xgb.DMatrix(X_test, label=y_test_mapped)
        
        params = {
            'objective': 'multi:softmax',
            'num_class': 3,
            'eval_metric': 'mlogloss',
            'eta': 0.1,
            'max_depth': 6,
            'seed': 42,
            'verbosity': 0
        }
        
        # Train model with early stopping
        evals = [(dtrain, 'train'), (dtest, 'test')]
        bst = xgb.train(
            params, dtrain, 
            num_boost_round=100,
            evals=evals,
            early_stopping_rounds=10,
            verbose_eval=False
        )
        
        # Make predictions
        y_pred = bst.predict(dtest)
        
        # Calculate accuracy
        accuracy = np.mean(y_pred == y_test_mapped)
        
        # Get best iteration and test error
        best_iteration = bst.best_iteration
        test_error = bst.best_score
        
        results.append({
            'Method': method_name,
            'Features': X.shape[1],
            'Accuracy': accuracy,
            'Test_Error': test_error,
            'Best_Iteration': best_iteration,
            'Feature_Names': list(X.columns) if hasattr(X, 'columns') else None
        })
        
        print(f"Accuracy: {accuracy:.4f}")
        print(f"Test Error: {test_error:.4f}")
    
    return pd.DataFrame(results)

# Prepare datasets for comparison
comparison_datasets = [
    X_extended,  # Original features
    X_corr_reduced,  # Correlation-based reduction
    X_pca,  # PCA reduction
    X_univariate,  # Univariate selection
    X_rfe  # RFE selection
]

method_names = [
    'Original Features',
    'Correlation Reduction',
    'PCA',
    'Univariate Selection',
    'RFE Selection'
]

# Compare methods
comparison_results = compare_dimension_reduction_methods(
    comparison_datasets, y_extended, method_names
)

print("\n" + "="*50)
print("DIMENSION REDUCTION COMPARISON RESULTS")
print("="*50)
print(comparison_results.to_string(index=False))

In [None]:
# VISUALIZE COMPARISON RESULTS
plt.figure(figsize=(15, 10))

# Plot 1: Accuracy comparison
plt.subplot(2, 3, 1)
plt.bar(comparison_results['Method'], comparison_results['Accuracy'])
plt.title('Model Accuracy by Reduction Method')
plt.ylabel('Accuracy')
plt.xticks(rotation=45, ha='right')

# Plot 2: Number of features
plt.subplot(2, 3, 2)
plt.bar(comparison_results['Method'], comparison_results['Features'])
plt.title('Number of Features by Method')
plt.ylabel('Number of Features')
plt.xticks(rotation=45, ha='right')

# Plot 3: Test error
plt.subplot(2, 3, 3)
plt.bar(comparison_results['Method'], comparison_results['Test_Error'])
plt.title('Test Error by Method')
plt.ylabel('Test Error (Log Loss)')
plt.xticks(rotation=45, ha='right')

# Plot 4: Accuracy vs Number of Features
plt.subplot(2, 3, 4)
plt.scatter(comparison_results['Features'], comparison_results['Accuracy'])
for i, method in enumerate(comparison_results['Method']):
    plt.annotate(method, 
                (comparison_results['Features'].iloc[i], 
                 comparison_results['Accuracy'].iloc[i]),
                fontsize=8, alpha=0.7)
plt.xlabel('Number of Features')
plt.ylabel('Accuracy')
plt.title('Accuracy vs Feature Count')

# Plot 5: Training efficiency (Best iteration)
plt.subplot(2, 3, 5)
plt.bar(comparison_results['Method'], comparison_results['Best_Iteration'])
plt.title('Training Iterations to Best Score')
plt.ylabel('Best Iteration')
plt.xticks(rotation=45, ha='right')

# Plot 6: Efficiency ratio (Accuracy per Feature)
plt.subplot(2, 3, 6)
efficiency_ratio = comparison_results['Accuracy'] / comparison_results['Features']
plt.bar(comparison_results['Method'], efficiency_ratio)
plt.title('Efficiency Ratio (Accuracy/Features)')
plt.ylabel('Efficiency Ratio')
plt.xticks(rotation=45, ha='right')

plt.tight_layout()
plt.show()

# Select the best performing method
best_method_idx = comparison_results['Accuracy'].idxmax()
best_method = comparison_results.iloc[best_method_idx]

print(f"\n" + "="*50)
print("BEST PERFORMING METHOD")
print("="*50)
print(f"Method: {best_method['Method']}")
print(f"Features: {best_method['Features']}")
print(f"Accuracy: {best_method['Accuracy']:.4f}")
print(f"Test Error: {best_method['Test_Error']:.4f}")

In [None]:
# IMPLEMENT FINAL MODEL WITH BEST DIMENSION REDUCTION METHOD
def implement_final_model_with_reduction():
    """
    Implement the final trading model using the best dimension reduction method.
    This function shows how to integrate dimension reduction into your trading pipeline.
    """
    # Based on comparison results, select the best method
    # For this example, let's assume RFE performed best
    print("Implementing final model with RFE-selected features...")
    
    # Use RFE-selected features
    X_final = X_rfe.copy()
    y_final = y_extended.copy()
    
    # Time-based split for final evaluation
    split_point = int(len(X_final) * 0.8)
    X_train_final = X_final.iloc[:split_point]
    X_test_final = X_final.iloc[split_point:]
    y_train_final = y_final.iloc[:split_point]
    y_test_final = y_final.iloc[split_point:]
    
    # Map target values
    y_train_mapped = y_train_final.map({-1: 0, 0: 1, 1: 2})
    y_test_mapped = y_test_final.map({-1: 0, 0: 1, 1: 2})
    
    # Create DMatrix
    dtrain_final = xgb.DMatrix(X_train_final, label=y_train_mapped)
    dtest_final = xgb.DMatrix(X_test_final, label=y_test_mapped)
    
    # Optimized XGBoost parameters for the reduced feature set
    params_final = {
        'objective': 'multi:softmax',
        'num_class': 3,
        'eval_metric': 'mlogloss',
        'eta': 0.1,
        'max_depth': 4,  # Reduced depth due to fewer features
        'min_child_weight': 3,
        'subsample': 0.8,
        'colsample_bytree': 0.8,
        'seed': 42,
        'verbosity': 0
    }
    
    # Train final model
    evals = [(dtrain_final, 'train'), (dtest_final, 'test')]
    final_model = xgb.train(
        params_final, dtrain_final,
        num_boost_round=200,
        evals=evals,
        early_stopping_rounds=20,
        verbose_eval=10
    )
    
    # Make predictions
    y_pred_final = final_model.predict(dtest_final)
    
    # Map predictions back to trading signals
    y_pred_signals = pd.Series(y_pred_final, index=y_test_final.index).map({0: -1, 1: 0, 2: 1})
    
    # Create full signal series for backtesting
    full_signals = pd.Series(0, index=df_clean.index)
    full_signals.update(y_pred_signals)
    
    return final_model, full_signals, X_final.columns.tolist()

# Implement final model
final_model, trading_signals, final_features = implement_final_model_with_reduction()

print(f"\nFinal model trained with {len(final_features)} features:")
print(final_features)

In [None]:
# BACKTEST THE FINAL MODEL WITH DIMENSION REDUCTION
def backtest_reduced_model(df, signals):
    """
    Backtest the trading strategy using the dimension-reduced model.
    
    Args:
        df: Price data DataFrame
        signals: Trading signals from the reduced model
    
    Returns:
        portfolio: VectorBT portfolio object
    """
    # Create entry and exit signals
    entry_signals = signals == 1
    exit_signals = signals == -1
    
    # Backtest with vectorbt
    portfolio = vbt.Portfolio.from_signals(
        close=df['Close'],
        entries=entry_signals,
        exits=exit_signals,
        freq='D',
        init_cash=10000,
        fees=0.001,
        sl_stop=0.05,  # 5% stop loss
        upon_opposite_entry='ignore'
    )
    
    return portfolio

# Backtest the final model
portfolio_reduced = backtest_reduced_model(df_clean, trading_signals)

# Print performance metrics
print("\n" + "="*50)
print("BACKTESTING RESULTS - DIMENSION REDUCED MODEL")
print("="*50)
print(f"Total Return: {portfolio_reduced.total_return():.2%}")
print(f"Annualized Return: {portfolio_reduced.annualized_return():.2%}")
print(f"Max Drawdown: {portfolio_reduced.max_drawdown():.2%}")
print(f"Sharpe Ratio: {portfolio_reduced.sharpe_ratio():.3f}")
print(f"Number of Trades: {portfolio_reduced.trades.count()}")
print(f"Win Rate: {portfolio_reduced.trades.win_rate():.2%}")

# Plot portfolio performance
portfolio_reduced.plot().show()
plt.title("Portfolio Performance - Dimension Reduced Model")
plt.show()

# Feature importance from the final model
feature_importance = final_model.get_score(importance_type='weight')
importance_df = pd.DataFrame(
    list(feature_importance.items()), 
    columns=['Feature', 'Importance']
).sort_values('Importance', ascending=False)

plt.figure(figsize=(10, 6))
plt.barh(range(len(importance_df)), importance_df['Importance'])
plt.yticks(range(len(importance_df)), importance_df['Feature'])
plt.xlabel('Feature Importance (Weight)')
plt.title('Final Model Feature Importance')
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()

print("\nFinal Model Feature Importance:")
print(importance_df.to_string(index=False))