# SmartFlush Artificial Neural Network (ANN) Development

This notebook focuses on developing and experimenting with ANN models for flush level prediction.

## Contents
1. Data Preparation
2. ANN Architecture Design
3. Model Training
4. Learning Curves
5. Hyperparameter Tuning
6. Model Evaluation
7. Comparison with Other Models

In [None]:
# Import required libraries
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import tensorflow as tf
from tensorflow import keras

# Import custom modules
from src.data_loading import load_and_combine_data, prepare_data
from src.models import (
    create_ann_model,
    train_ann_model,
    plot_learning_curves
)
from src.metrics import evaluate_model, plot_confusion_matrix

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Configure plotting
%matplotlib inline
plt.rcParams['figure.figsize'] = (12, 8)

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU available: {tf.config.list_physical_devices('GPU')}")

## 1. Data Preparation

In [None]:
# Load data
data_files = [
    '../data/Combined_Data.xlsx',
    '../data/mon_fichier.xlsx'
]

existing_files = [f for f in data_files if Path(f).exists()]

if existing_files:
    df = load_and_combine_data(existing_files, how='concat')
else:
    print("Creating synthetic data for demonstration")
    np.random.seed(42)
    n_samples = 1000
    df = pd.DataFrame({
        'photodiode_1': np.random.randn(n_samples),
        'photodiode_2': np.random.randn(n_samples),
        'waste_level': np.random.randint(1, 6, n_samples),
        'sensor_1': np.random.randn(n_samples),
        'sensor_2': np.random.randn(n_samples),
        'flush_level': np.random.randint(0, 11, n_samples)  # 0-10 for 11 classes
    })

print(f"Data shape: {df.shape}")

# Prepare data
target_col = 'flush_level' if 'flush_level' in df.columns else df.columns[-1]
data_dict = prepare_data(
    df,
    target_col=target_col,
    test_size=0.2,
    random_state=42,
    vif_threshold=10.0,
    apply_standardization=True
)

X_train = data_dict['X_train']
X_test = data_dict['X_test']
y_train = data_dict['y_train']
y_test = data_dict['y_test']

print(f"\nTraining set: {X_train.shape}")
print(f"Test set: {X_test.shape}")
print(f"Number of classes: {len(np.unique(y_train))}")

## 2. ANN Architecture Design

In [None]:
# Design ANN architecture
input_dim = X_train.shape[1]
num_classes = len(np.unique(y_train))

# Architecture 1: Deep network with 3 hidden layers
hidden_layers_1 = [128, 64, 32]

model_1 = create_ann_model(
    input_dim=input_dim,
    num_classes=num_classes,
    hidden_layers=hidden_layers_1,
    dropout_rate=0.2,
    learning_rate=0.001
)

print("Model Architecture 1:")
model_1.summary()

## 3. Model Training

In [None]:
# Split training data for validation
from sklearn.model_selection import train_test_split

X_train_nn, X_val_nn, y_train_nn, y_val_nn = train_test_split(
    X_train, y_train,
    test_size=0.2,
    random_state=42
)

print(f"Training set: {X_train_nn.shape}")
print(f"Validation set: {X_val_nn.shape}")

# Train model
ann_model, history = train_ann_model(
    X_train_nn, y_train_nn,
    X_val_nn, y_val_nn,
    hidden_layers=[128, 64, 32],
    dropout_rate=0.2,
    learning_rate=0.001,
    batch_size=32,
    epochs=100,
    early_stopping_patience=10
)

## 4. Learning Curves

In [None]:
# Plot learning curves
plot_learning_curves(history)

# Analyze training history
print(f"\nFinal Training Accuracy: {history.history['accuracy'][-1]:.4f}")
print(f"Final Validation Accuracy: {history.history['val_accuracy'][-1]:.4f}")
print(f"Final Training Loss: {history.history['loss'][-1]:.4f}")
print(f"Final Validation Loss: {history.history['val_loss'][-1]:.4f}")

## 5. Hyperparameter Tuning Experiments

In [None]:
# Experiment with different architectures
architectures = [
    [128, 64, 32],
    [256, 128, 64],
    [64, 32],
    [128, 64]
]

results = []

for i, hidden_layers in enumerate(architectures):
    print(f"\n{'='*60}")
    print(f"Training Architecture {i+1}: {hidden_layers}")
    print(f"{'='*60}")
    
    try:
        model, hist = train_ann_model(
            X_train_nn, y_train_nn,
            X_val_nn, y_val_nn,
            hidden_layers=hidden_layers,
            dropout_rate=0.2,
            learning_rate=0.001,
            batch_size=32,
            epochs=50,
            early_stopping_patience=5
        )
        
        # Evaluate on test set
        test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
        
        results.append({
            'architecture': str(hidden_layers),
            'val_accuracy': hist.history['val_accuracy'][-1],
            'test_accuracy': test_acc,
            'parameters': model.count_params()
        })
        
    except Exception as e:
        print(f"Error training architecture {hidden_layers}: {e}")

# Compare results
results_df = pd.DataFrame(results)
print("\nArchitecture Comparison:")
print(results_df.to_string())

# Plot comparison
fig, ax = plt.subplots(figsize=(10, 6))
x = np.arange(len(results_df))
width = 0.35

ax.bar(x - width/2, results_df['val_accuracy'], width, label='Validation Accuracy', alpha=0.8)
ax.bar(x + width/2, results_df['test_accuracy'], width, label='Test Accuracy', alpha=0.8)

ax.set_xlabel('Architecture')
ax.set_ylabel('Accuracy')
ax.set_title('ANN Architecture Comparison')
ax.set_xticks(x)
ax.set_xticklabels([f"Arch {i+1}" for i in range(len(results_df))], rotation=45)
ax.legend()
ax.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

## 6. Model Evaluation

In [None]:
# Evaluate best model on test set
y_pred_proba = ann_model.predict(X_test)
y_pred = np.argmax(y_pred_proba, axis=1)

# Calculate metrics
ann_metrics = evaluate_model(y_test, y_pred, 'ANN')
print("\nANN Performance Metrics:")
for metric, value in ann_metrics.items():
    print(f"  {metric}: {value}")

# Confusion matrix
plot_confusion_matrix(y_test, y_pred, title='ANN - Confusion Matrix')

## 7. Probability Analysis

In [None]:
# Analyze prediction probabilities
avg_max_prob = np.mean(np.max(y_pred_proba, axis=1))
print(f"Average maximum probability: {avg_max_prob:.4f}")

# Plot probability distribution
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Distribution of maximum probabilities
axes[0].hist(np.max(y_pred_proba, axis=1), bins=30, alpha=0.7, color='steelblue')
axes[0].axvline(avg_max_prob, color='red', linestyle='--', label=f'Mean: {avg_max_prob:.3f}')
axes[0].set_xlabel('Maximum Probability')
axes[0].set_ylabel('Count')
axes[0].set_title('Distribution of Maximum Prediction Probabilities')
axes[0].legend()
axes[0].grid(alpha=0.3)

# Entropy of predictions
from scipy.stats import entropy
entropies = [entropy(probs) for probs in y_pred_proba]
axes[1].hist(entropies, bins=30, alpha=0.7, color='coral')
axes[1].set_xlabel('Prediction Entropy')
axes[1].set_ylabel('Count')
axes[1].set_title('Distribution of Prediction Entropy')
axes[1].grid(alpha=0.3)

plt.tight_layout()
plt.show()

print(f"\nAverage prediction entropy: {np.mean(entropies):.4f}")

## Conclusions

### Key Findings:
1. **Best Architecture**: [Add best performing architecture]
2. **Performance**: [Add accuracy and other metrics]
3. **Training Insights**: [Add observations about convergence, overfitting]
4. **Comparison**: [Add comparison with other models]

### Recommendations:
1. [Add recommendation 1]
2. [Add recommendation 2]
3. [Add recommendation 3]