# SmartFlush Predictive Model - Example Analysis

This notebook demonstrates how to use the SmartFlush predictive model components for flush-volume optimization.

In [None]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from src.data_loading import DataLoader
from src.eda import EDAAnalyzer
from src.utils import load_config, calculate_vif, scale_features
from src.models import ModelTrainer
from src.metrics import evaluate_model, print_metrics_report

plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

## 1. Load Configuration and Data

In [None]:
# Load configuration
config = load_config('../config.yaml')

# Load data
loader = DataLoader('../data/sensor_data.xlsx')
df = loader.load_excel()

print(f"Data shape: {df.shape}")
df.head()

## 2. Exploratory Data Analysis

In [None]:
# Clean data
loader.clean_data()

# Split features and target
X, y = loader.split_features_target('flush_volume')

# Create EDA analyzer
eda = EDAAnalyzer(X, y, config)

# Get summary report
report = eda.generate_summary_report()
print(f"Samples: {report['n_samples']}")
print(f"Features: {report['n_features']}")

In [None]:
# Calculate correlations with target
correlations = eda.target_correlation()
print("Feature correlations with target:")
print(correlations)

In [None]:
# Plot correlation heatmap
eda.plot_correlation_heatmap()
plt.show()

## 3. Feature Engineering

In [None]:
# Calculate VIF
vif_df = calculate_vif(X, threshold=10.0)
print("\nVIF Analysis:")
print(vif_df)

In [None]:
# Scale features
X_scaled, scaler = scale_features(X, method='standard')
print(f"Scaled features shape: {X_scaled.shape}")

## 4. Model Training

In [None]:
from sklearn.model_selection import train_test_split

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

print(f"Training set: {X_train.shape}")
print(f"Test set: {X_test.shape}")

In [None]:
# Train a single model (MLR for quick demo)
trainer = ModelTrainer(config)
trainer.train_mlr(X_train.values, y_train.values)

# Make predictions
y_pred = trainer.predict('mlr', X_test.values)

# Evaluate
metrics = evaluate_model(y_test.values, y_pred, config)
print_metrics_report(metrics, "MLR")

## 5. Results Visualization

In [None]:
# Plot predictions vs actual
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, alpha=0.6)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
plt.xlabel('Actual Flush Volume (L)')
plt.ylabel('Predicted Flush Volume (L)')
plt.title('Predicted vs Actual Flush Volume')
plt.grid(True, alpha=0.3)
plt.show()

In [None]:
# Plot residuals
residuals = y_test - y_pred

plt.figure(figsize=(10, 6))
plt.scatter(y_pred, residuals, alpha=0.6)
plt.axhline(y=0, color='r', linestyle='--', lw=2)
plt.xlabel('Predicted Flush Volume (L)')
plt.ylabel('Residuals (L)')
plt.title('Residual Plot')
plt.grid(True, alpha=0.3)
plt.show()