# 9. Evaluation

Comprehensive evaluation of the final model including detailed metrics, predictions analysis, residual analysis, and business insights.

In [None]:
# Final Model Evaluation Setup
print("📊 Final Model Evaluation")
print("="*50)

# Use the best model from training (before potentially problematic hyperparameter tuning)
final_model = best_trained_model
final_model_name = best_model['Model']


print(f"Evaluating final model: {final_model_name}")
print(f"Features used: {len(available_features)} selected features")
print(f"Training samples: {len(X_train_selected):,}")
print(f"Test samples: {len(X_test_selected):,}")

# Generate final predictions
final_train_pred = final_model.predict(X_train_selected)
final_test_pred = final_model.predict(X_test_selected)

# Calculate comprehensive metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, explained_variance_score

train_metrics = {
    'R²': r2_score(y_train, final_train_pred),
    'RMSE': np.sqrt(mean_squared_error(y_train, final_train_pred)),
    'MAE': mean_absolute_error(y_train, final_train_pred),
    'Explained Variance': explained_variance_score(y_train, final_train_pred),
    'Mean Actual': y_train.mean(),
    'Std Actual': y_train.std()
}

test_metrics = {
    'R²': r2_score(y_test, final_test_pred),
    'RMSE': np.sqrt(mean_squared_error(y_test, final_test_pred)),
    'MAE': mean_absolute_error(y_test, final_test_pred),
    'Explained Variance': explained_variance_score(y_test, final_test_pred),
    'Mean Actual': y_test.mean(),
    'Std Actual': y_test.std()
}

print(f"\n🎯 FINAL MODEL PERFORMANCE:")
print(f"{'Metric':<20} {'Training':<12} {'Testing':<12} {'Difference':<12}")
print("-" * 60)

for metric in ['R²', 'RMSE', 'MAE', 'Explained Variance']:
    train_val = train_metrics[metric]
    test_val = test_metrics[metric]
    diff = train_val - test_val
    print(f"{metric:<20} {train_val:<12.4f} {test_val:<12.4f} {diff:<12.4f}")

print(f"\n📈 DATA STATISTICS:")
print(f"{'Dataset':<20} {'Mean':<12} {'Std Dev':<12}")
print("-" * 45)
print(f"{'Training Target':<20} {train_metrics['Mean Actual']:<12.4f} {train_metrics['Std Actual']:<12.4f}")
print(f"{'Testing Target':<20} {test_metrics['Mean Actual']:<12.4f} {test_metrics['Std Actual']:<12.4f}")

# Model interpretation (if available)
if hasattr(final_model, 'feature_importances_'):
    feature_importance_final = pd.DataFrame({
        'feature': available_features,
        'importance': final_model.feature_importances_
    }).sort_values('importance', ascending=False)
    
    print(f"\n🔍 TOP 5 FEATURE IMPORTANCES:")
    for i, (_, row) in enumerate(feature_importance_final.head(5).iterrows(), 1):
        print(f"  {i}. {row['feature']:<40} {row['importance']:.4f}")

print(f"\n✅ Final model evaluation setup complete")

In [None]:
# Predictions vs Actual Analysis
print("📈 Predictions vs Actual Analysis")
print("="*50)

# Create comprehensive prediction analysis
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle(f'Final Model Evaluation: {final_model_name}', fontsize=16, fontweight='bold')

# 1. Predictions vs Actual (Training)
ax1.scatter(y_train, final_train_pred, alpha=0.6, s=10)
min_val = min(y_train.min(), final_train_pred.min())
max_val = max(y_train.max(), final_train_pred.max())
ax1.plot([min_val, max_val], [min_val, max_val], 'r--', alpha=0.8)
ax1.set_xlabel('Actual Vibration')
ax1.set_ylabel('Predicted Vibration')
ax1.set_title(f'Training Set: Predicted vs Actual\nR² = {train_metrics["R²"]:.4f}')
ax1.grid(True, alpha=0.3)

# 2. Predictions vs Actual (Testing)
ax2.scatter(y_test, final_test_pred, alpha=0.6, s=10, color='orange')
min_val = min(y_test.min(), final_test_pred.min())
max_val = max(y_test.max(), final_test_pred.max())
ax2.plot([min_val, max_val], [min_val, max_val], 'r--', alpha=0.8)
ax2.set_xlabel('Actual Vibration')
ax2.set_ylabel('Predicted Vibration')
ax2.set_title(f'Test Set: Predicted vs Actual\nR² = {test_metrics["R²"]:.4f}')
ax2.grid(True, alpha=0.3)

# 3. Residuals Analysis (Training)
train_residuals = y_train - final_train_pred
ax3.scatter(final_train_pred, train_residuals, alpha=0.6, s=10)
ax3.axhline(y=0, color='r', linestyle='--', alpha=0.8)
ax3.set_xlabel('Predicted Vibration')
ax3.set_ylabel('Residuals (Actual - Predicted)')
ax3.set_title(f'Training Residuals\nRMSE = {train_metrics["RMSE"]:.4f}')
ax3.grid(True, alpha=0.3)

# 4. Residuals Analysis (Testing)
test_residuals = y_test - final_test_pred
ax4.scatter(final_test_pred, test_residuals, alpha=0.6, s=10, color='orange')
ax4.axhline(y=0, color='r', linestyle='--', alpha=0.8)
ax4.set_xlabel('Predicted Vibration')
ax4.set_ylabel('Residuals (Actual - Predicted)')
ax4.set_title(f'Test Residuals\nRMSE = {test_metrics["RMSE"]:.4f}')
ax4.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Residual statistics
print(f"\n📊 RESIDUAL ANALYSIS:")
print(f"{'Dataset':<15} {'Mean Residual':<15} {'Std Residual':<15} {'Max |Residual|':<15}")
print("-" * 65)

train_residual_stats = {
    'mean': train_residuals.mean(),
    'std': train_residuals.std(),
    'max_abs': abs(train_residuals).max()
}

test_residual_stats = {
    'mean': test_residuals.mean(),
    'std': test_residuals.std(),
    'max_abs': abs(test_residuals).max()
}

print(f"{'Training':<15} {train_residual_stats['mean']:<15.4f} {train_residual_stats['std']:<15.4f} {train_residual_stats['max_abs']:<15.4f}")
print(f"{'Testing':<15} {test_residual_stats['mean']:<15.4f} {test_residual_stats['std']:<15.4f} {test_residual_stats['max_abs']:<15.4f}")

# Prediction accuracy analysis
print(f"\n🎯 PREDICTION ACCURACY ANALYSIS:")

def accuracy_within_threshold(actual, predicted, threshold):
    return np.mean(abs(actual - predicted) <= threshold) * 100

thresholds = [0.001, 0.002, 0.005, 0.01]
print(f"{'Threshold':<12} {'Training %':<12} {'Testing %':<12}")
print("-" * 40)

for threshold in thresholds:
    train_acc = accuracy_within_threshold(y_train, final_train_pred, threshold)
    test_acc = accuracy_within_threshold(y_test, final_test_pred, threshold)
    print(f"±{threshold:<11.3f} {train_acc:<12.1f} {test_acc:<12.1f}")

print(f"\n✅ Predictions vs actual analysis complete")

In [None]:
# Time Series Analysis and Business Insights
print("⏱️  Time Series Analysis and Business Insights")
print("="*50)

# Time series plot of predictions vs actual
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(16, 10))

# Get time indices for plotting
train_time = X_train.index
test_time = X_test.index

# 1. Training period
ax1.plot(train_time, y_train, label='Actual', alpha=0.7, linewidth=1)
ax1.plot(train_time, final_train_pred, label='Predicted', alpha=0.8, linewidth=1)
ax1.set_ylabel('Vibration (mm/s)')
ax1.set_title(f'Training Period: Actual vs Predicted Vibration\nR² = {train_metrics["R²"]:.4f}')
ax1.legend()
ax1.grid(True, alpha=0.3)

# 2. Testing period
ax2.plot(test_time, y_test, label='Actual', alpha=0.7, linewidth=1, color='orange')
ax2.plot(test_time, final_test_pred, label='Predicted', alpha=0.8, linewidth=1, color='blue')
ax2.set_xlabel('Time')
ax2.set_ylabel('Vibration (mm/s)')
ax2.set_title(f'Test Period: Actual vs Predicted Vibration\nR² = {test_metrics["R²"]:.4f}')
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Business insights and model performance summary
print(f"\n💼 BUSINESS INSIGHTS AND RECOMMENDATIONS:")
print("="*60)

# Model performance assessment
r2_score = test_metrics['R²']
if r2_score > 0.9:
    performance_level = "Excellent"
    business_confidence = "High confidence for production deployment"
elif r2_score > 0.8:
    performance_level = "Good"
    business_confidence = "Suitable for production with monitoring"
elif r2_score > 0.6:
    performance_level = "Fair"
    business_confidence = "Requires improvement before production"
else:
    performance_level = "Poor"
    business_confidence = "Not suitable for production deployment"

print(f"🎯 MODEL PERFORMANCE ASSESSMENT:")
print(f"  • Overall Performance: {performance_level}")
print(f"  • Business Recommendation: {business_confidence}")
print(f"  • Test R² Score: {r2_score:.4f}")
print(f"  • Prediction RMSE: {test_metrics['RMSE']:.4f} mm/s")

# Operational insights
rmse_as_percent = (test_metrics['RMSE'] / test_metrics['Mean Actual']) * 100
print(f"\n🔧 OPERATIONAL INSIGHTS:")
print(f"  • Average prediction error: {test_metrics['RMSE']:.4f} mm/s")
print(f"  • Error as % of mean vibration: {rmse_as_percent:.1f}%")
print(f"  • Model explains {r2_score*100:.1f}% of vibration variance")

# Feature insights
if hasattr(final_model, 'feature_importances_'):
    most_important_feature = feature_importance_final.iloc[0]['feature']
    most_important_value = feature_importance_final.iloc[0]['importance']
    print(f"  • Most critical process variable: {most_important_feature}")
    print(f"    - Contributes {most_important_value*100:.1f}% to prediction accuracy")

# Recommendations for deployment
print(f"\n📋 DEPLOYMENT RECOMMENDATIONS:")
deployment_rmse_threshold = 0.005  # Example threshold for industrial application

if test_metrics['RMSE'] <= deployment_rmse_threshold:
    print(f"  ✅ Model meets accuracy requirements (RMSE ≤ {deployment_rmse_threshold})")
    print(f"  • Ready for production deployment")
    print(f"  • Implement real-time monitoring dashboard")
    print(f"  • Set up automated alerts for prediction drift")
else:
    print(f"  ⚠️  Model needs improvement (RMSE = {test_metrics['RMSE']:.4f} > {deployment_rmse_threshold})")
    print(f"  • Collect more training data")
    print(f"  • Consider advanced feature engineering")
    print(f"  • Implement ensemble methods")

print(f"\n📊 MONITORING RECOMMENDATIONS:")
print(f"  • Monitor key features: {', '.join(available_features[:3])}")
print(f"  • Track prediction accuracy over time")
print(f"  • Retrain model when performance degrades")
print(f"  • Alert when residuals exceed ±{test_residual_stats['std']*2:.4f} mm/s")

print(f"\n✅ Time series analysis and business insights complete")

In [None]:
# Test Period Time Series Analysis - All Model Predictions
print("📊 Creating detailed test period plots with all model predictions...")
print("="*60)

chunk_size = 2000

# Get test time indices for plotting
test_time = X_test.index

# Generate predictions from all models
print(f"🔍 Generating predictions from all {len(model_results_df[:3])} models...")
all_model_predictions = {}

for idx, row in model_results_df[:3].iterrows():
    model_name = row['Model']
    model_obj = row['Model Object']
    
    # Generate predictions based on whether model needs scaling
    if 'Scaled' in model_name:
        pred = model_obj.predict(X_test_scaled)
    else:
        pred = model_obj.predict(X_test_selected)
    
    all_model_predictions[model_name] = pred

print(f"✅ Generated predictions for {len(all_model_predictions)} models")

# Plot testing period in chunks
print(f"🔍 Testing period analysis - splitting {len(test_time)} observations into chunks...")
test_chunks = [test_time[i:i+chunk_size] for i in range(0, len(test_time), chunk_size)]
test_y_chunks = [y_test.iloc[i:i+chunk_size] for i in range(0, len(y_test), chunk_size)]

# Calculate number of chunks and create subplot layout
n_chunks = len(test_chunks)
n_cols = 2  # Use 2 columns for better visibility with multiple models
n_rows = (n_chunks + n_cols - 1) // n_cols

# Create subplots
fig, axes = plt.subplots(n_rows, n_cols, figsize=(20, 6*n_rows))
fig.suptitle('Test Period Analysis - All Model Predictions', fontsize=16, fontweight='bold')

# Handle single row case
if n_rows == 1:
    axes = axes.reshape(1, -1)
elif n_cols == 1:
    axes = axes.reshape(-1, 1)

# Flatten axes for easier indexing
axes_flat = axes.flatten()

# Define distinct colors for better visibility
distinct_colors = ['#e41a1c', '#377eb8', '#4daf4a', '#ff7f00', '#984ea3', '#ffff33', '#a65628', '#f781bf', '#999999']
model_colors = {}
model_names = list(all_model_predictions.keys())
for i, model_name in enumerate(model_names):
    model_colors[model_name] = distinct_colors[i % len(distinct_colors)]

for idx, (time_chunk, y_chunk) in enumerate(zip(test_chunks, test_y_chunks)):
    if len(time_chunk) == 0:
        continue
        
    ax = axes_flat[idx]
    
    # Plot actual values
    ax.plot(time_chunk, y_chunk, label='Actual', alpha=0.9, linewidth=2.5, color='black')
    
    # Plot predictions from all models
    for model_name, predictions in all_model_predictions.items():
        pred_chunk = predictions[idx*chunk_size:(idx+1)*chunk_size][:len(time_chunk)]
        ax.plot(time_chunk, pred_chunk, 
               label=f'{model_name} (R²={model_results_df[:3][model_results_df[:3]["Model"]==model_name]["Test R²"].iloc[0]:.3f})', 
               alpha=0.7, linewidth=1.5, color=model_colors[model_name])
    
    ax.set_ylabel('Vibration (mm/s)')
    ax.set_xlabel('Time')
    ax.set_title(f'Chunk {idx+1}/{len(test_chunks)} (n={len(time_chunk)})')
    
    # Only show legend for first subplot to avoid clutter
    if idx == 0:
        ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=9)
    
    ax.grid(True, alpha=0.3)

# Hide unused subplots
for idx in range(n_chunks, len(axes_flat)):
    axes_flat[idx].set_visible(False)

plt.tight_layout()
plt.show()

print(f"✅ Test period detailed analysis complete - {len(test_chunks)} chunks visualized with {len(all_model_predictions)} models")

# Print model performance summary
print(f"📊 Model Performance Summary (Test R²):")
print("-" * 50)
for idx, row in model_results_df[:3].iterrows():
    print(f"  {row['Model']:<25}: {row['Test R²']:.4f}")

In [None]:
# Project Summary and Conclusions
print("📋 PROJECT SUMMARY AND CONCLUSIONS")
print("="*50)

print("🏭 INDUSTRIAL VIBRATION PREDICTION PROJECT")
print("="*50)

print(f"\n📊 DATA SUMMARY:")
print(f"  • Dataset: Industrial roller mill vibration data")
print(f"  • Original samples: {data_info['total_rows']:,}")
print(f"  • Clean samples: {len(df_clean):,}")
print(f"  • Features engineered: {df_features.shape[1]} (from {df_clean.shape[1]} original)")
print(f"  • Final features used: {len(available_features)} selected features")
print(f"  • Time period: {data_info['time_range'][0].strftime('%Y-%m-%d')} to {data_info['time_range'][1].strftime('%Y-%m-%d')}")

print(f"\n🔧 METHODOLOGY:")
print(f"  • Feature Engineering: Rolling statistics, temporal features")
print(f"  • Data Leakage Prevention: Excluded all vibration columns from predictors")
print(f"  • Model Selection: Compared {len(models)} different algorithms")
print(f"  • Best Model: {final_model_name}")
print(f"  • Validation: Time series split (80% train, 20% test)")

print(f"\n🎯 FINAL RESULTS:")
print(f"  • Test R² Score: {test_metrics['R²']:.4f}")
print(f"  • Test RMSE: {test_metrics['RMSE']:.4f} mm/s")
print(f"  • Test MAE: {test_metrics['MAE']:.4f} mm/s")
print(f"  • Prediction Accuracy (±0.001): {accuracy_within_threshold(y_test, final_test_pred, 0.001):.1f}%")
print(f"  • Model Performance: {performance_level}")

print(f"\n🔍 KEY FINDINGS:")
print(f"  • Engineered features improved correlation by 38.9%")
# print(f"  • {most_important_feature} is the most predictive variable")
print(f"  • Model explains {r2_score*100:.1f}% of vibration variance")
print(f"  • Average prediction error: {rmse_as_percent:.1f}% of mean vibration")

print(f"\n✅ DELIVERABLES:")
print(f"  • Clean, reproducible ML pipeline")
print(f"  • Trained {final_model_name} model")
print(f"  • Feature importance analysis")
print(f"  • Comprehensive evaluation metrics")
print(f"  • Business insights and deployment recommendations")

print(f"\n🚀 NEXT STEPS:")
if test_metrics['RMSE'] <= 0.005:
    print(f"  • Deploy model to production environment")
    print(f"  • Implement real-time monitoring dashboard")
    print(f"  • Set up automated retraining pipeline")
else:
    print(f"  • Improve model performance (current RMSE: {test_metrics['RMSE']:.4f})")
    print(f"  • Collect additional training data")
    print(f"  • Explore advanced modeling techniques")
    
print(f"  • Monitor model drift and performance degradation")
print(f"  • Extend to other industrial equipment")

print(f"\n🎉 PROJECT COMPLETED SUCCESSFULLY!")
print(f"   Delivered a {performance_level.lower()} performing vibration prediction model")
print(f"   with comprehensive analysis and business insights.")

print(f"\n" + "="*50)
print(f"📝 End of Industrial Vibration Prediction Analysis")
print(f"="*50)