In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib

# Use safe backend
import matplotlib
matplotlib.use('Agg')

# Load the model and features list we saved earlier
model = joblib.load('../models/exchange_rate_rf_model.pkl')
features = joblib.load('../models/feature_list.pkl')

print("Model and features loaded successfully!")

Model and features loaded successfully!


In [2]:
# Extract importance from the random forest
importances = model.feature_importances_
feat_importances = pd.Series(importances, index=features)

# Plot
plt.figure(figsize=(10, 6))
feat_importances.nlargest(10).sort_values().plot(kind='barh', color='teal')
plt.title('Which Features Matter Most to the AI?')
plt.xlabel('Relative Importance')
plt.savefig('feature_importance.png')
print("Feature importance chart saved as 'feature_importance.png'")

Feature importance chart saved as 'feature_importance.png'


In [3]:
# Load the processed data to get a test set
df = pd.read_csv('../data/processed/features_engineered.csv')
df['target'] = df['rate'].shift(-1)
data = df.dropna()

# Split exactly like before
split_index = int(len(data) * 0.8)
X_test = data[features].iloc[split_index:]
y_test = data['target'].iloc[split_index:]

# Get predictions and calculate residuals
preds = model.predict(X_test)
residuals = y_test - preds

# Plot distribution of errors
plt.figure(figsize=(10, 6))
sns.histplot(residuals, kde=True, color='red')
plt.title('Distribution of Prediction Errors (Residuals)')
plt.xlabel('Error Magnitude (Actual - Predicted)')
plt.savefig('error_distribution.png')
print("Error distribution saved as 'error_distribution.png'")

Error distribution saved as 'error_distribution.png'
