In [1]:

# %% [markdown]
# # Automatica: Improved Implementation
# Combined analysis and preprocessing with interactive feature selection

# %%
import gradio as gr
import os
import pandas as pd
import numpy as np
import pygwalker as pyg
import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import mutual_info_regression, mutual_info_classif
import warnings
warnings.filterwarnings('ignore')


# Import additional required libraries at the top of your file
import torch
import torch.nn as nn
from dataclasses import dataclass
from datetime import datetime
from typing import Dict, List, Any, Tuple  # Added Tuple here
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import torch
import torch.nn as nn
import time
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import mean_squared_error, r2_score
import math
from skopt import BayesSearchCV
from skopt.space import Real, Integer, Categorical
from sklearn.base import BaseEstimator, RegressorMixin
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import TensorDataset, DataLoader
import time

from sklearn.metrics import accuracy_score
from sklearn.base import BaseEstimator, RegressorMixin
from datetime import datetime
import traceback
from joblib import dump, load
from tqdm.auto import tqdm

import jinja2
from jinja2 import Template

In [2]:
# %% [markdown]
# ## Base Classes

# %%
class TorchWrapper(BaseEstimator, RegressorMixin):
    """Wrapper class to make PyTorch models compatible with scikit-learn"""
    def __init__(self, model_class, model_params=None, criterion=nn.MSELoss, 
                 lr=0.001, batch_size=32, epochs=100, device=None):
        self.model_class = model_class
        self.model_params = model_params or {}
        self.criterion = criterion
        self.lr = lr
        self.batch_size = batch_size
        self.epochs = epochs
        self.device = device or torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model = None

    def get_params(self, deep=True):
        """Get parameters for this estimator.
        Required for scikit-learn compatibility."""
        params = {
            'model_class': self.model_class,
            'model_params': self.model_params,
            'criterion': self.criterion,
            'lr': self.lr,
            'batch_size': self.batch_size,
            'epochs': self.epochs,
            'device': self.device
        }
        if deep:
            params['model_params'] = copy.deepcopy(self.model_params)
        return params

    def set_params(self, **params):
        """Set the parameters of this estimator.
        Required for scikit-learn compatibility."""
        for key, value in params.items():
            if key == 'model_params':
                self.model_params.update(value)
            else:
                setattr(self, key, value)
        return self

    def fit(self, X, y):
        # Initialize model
        self.model = self.model_class(**self.model_params).to(self.device)
        
        # Convert data to tensors
        X = torch.FloatTensor(X).to(self.device)
        y = torch.FloatTensor(y.reshape(-1, 1)).to(self.device)  # Ensure y is 2D
        
        # Create data loader
        dataset = TensorDataset(X, y)
        loader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True)
        
        # Training setup
        criterion = self.criterion()
        optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.lr)
        
        # Training loop
        self.model.train()
        for epoch in range(self.epochs):
            for batch_X, batch_y in loader:
                optimizer.zero_grad()
                output = self.model(batch_X)
                # Ensure output and batch_y have the same shape
                output = output.view(-1, 1)
                loss = criterion(output, batch_y)
                loss.backward()
                optimizer.step()
        
        return self

    def predict(self, X):
        """Basic prediction method for scikit-learn compatibility"""
        X = torch.FloatTensor(X).to(self.device)
        self.model.eval()
        with torch.no_grad():
            output = self.model(X)
            return output.view(-1).cpu().numpy()  # Ensure 1D output for scikit-learn

@dataclass
class ModelTrialResults:
    """Store results for a single model's trial run"""
    model_name: str
    train_losses: List[float]
    val_losses: List[float]
    train_metrics: Dict[str, float]
    val_metrics: Dict[str, float]
    training_time: float
    peak_memory_usage: float

class AutomaticaModelSelector:
    def __init__(
        self,
        task_type: str,
        input_dim: int,
        trial_epochs: int = 100,
        batch_size: int = 128,
        timeout_minutes: int = 10
    ):
        self.task_type = task_type
        self.input_dim = input_dim  # This should be the dimension after preprocessing
        self.trial_epochs = trial_epochs
        self.batch_size = batch_size
        self.timeout_minutes = timeout_minutes
        self.results = {}
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    def train_model(self, model, model_name, X_train, y_train, X_val, y_val):
        """Generic training function for both models"""
        try:
            # Convert data to proper format
            X_train = X_train.astype(np.float32)
            y_train = np.array(y_train).astype(np.float32)
            X_val = X_val.astype(np.float32)
            y_val = np.array(y_val).astype(np.float32)
            
            # Scale the target variable
            y_scaler = StandardScaler()
            y_train_scaled = y_scaler.fit_transform(y_train.reshape(-1, 1)).ravel()
            y_val_scaled = y_scaler.transform(y_val.reshape(-1, 1)).ravel()
            
            # Convert to tensors
            X_train_tensor = torch.FloatTensor(X_train).to(self.device)
            y_train_tensor = torch.FloatTensor(y_train_scaled).to(self.device)
            X_val_tensor = torch.FloatTensor(X_val).to(self.device)
            y_val_tensor = torch.FloatTensor(y_val_scaled).to(self.device)
            
            # Training setup remains the same...
            train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
            train_loader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True)
            model = model.to(self.device)
            criterion = nn.MSELoss()
            optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)
            
            # Training tracking
            train_losses = []
            val_losses = []
            start_time = time.time()
            best_val_loss = float('inf')
            patience = 3
            patience_counter = 0
            
            # Training loop remains the same...
            for epoch in range(self.trial_epochs):
                if (time.time() - start_time) > (self.timeout_minutes * 60):
                    print(f"Training timeout after {epoch} epochs")
                    break
                
                # Training
                model.train()
                epoch_losses = []
                for batch_X, batch_y in train_loader:
                    optimizer.zero_grad()
                    outputs = model(batch_X)
                    loss = criterion(outputs, batch_y)
                    loss.backward()
                    optimizer.step()
                    epoch_losses.append(loss.item())
                
                avg_train_loss = np.mean(epoch_losses)
                train_losses.append(avg_train_loss)
                
                # Validation
                model.eval()
                with torch.no_grad():
                    val_outputs = model(X_val_tensor)
                    val_loss = criterion(val_outputs, y_val_tensor).item()
                    val_losses.append(val_loss)
                
                print(f"{model_name} - Epoch {epoch+1}/{self.trial_epochs} - "
                    f"Train Loss: {avg_train_loss:.4f}, Val Loss: {val_loss:.4f}")
                
                # Early stopping
                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    patience_counter = 0
                else:
                    patience_counter += 1
                    if patience_counter >= patience:
                        print(f"Early stopping triggered at epoch {epoch+1}")
                        break
            
            # Calculate final metrics with scaled predictions
            training_time = time.time() - start_time
            peak_memory = torch.cuda.max_memory_allocated() / 1024**2 if torch.cuda.is_available() else 0
            
            # Calculate all metrics
            model.eval()
            with torch.no_grad():
                # Get scaled predictions
                train_preds_scaled = model(X_train_tensor).cpu().numpy()
                val_preds_scaled = model(X_val_tensor).cpu().numpy()
                
                # Calculate metrics on scaled data first
                train_metrics = {
                    'MSE': mean_squared_error(y_train_scaled, train_preds_scaled),
                    'MAE': np.mean(np.abs(y_train_scaled - train_preds_scaled)),
                    'R2': r2_score(y_train_scaled, train_preds_scaled)
                }
                
                val_metrics = {
                    'MSE': mean_squared_error(y_val_scaled, val_preds_scaled),
                    'MAE': np.mean(np.abs(y_val_scaled - val_preds_scaled)),
                    'R2': r2_score(y_val_scaled, val_preds_scaled)
                }
            
            # Store results
            self.results[model_name] = ModelTrialResults(
                model_name=model_name,
                train_losses=train_losses,
                val_losses=val_losses,
                train_metrics=train_metrics,
                val_metrics=val_metrics,
                training_time=training_time,
                peak_memory_usage=peak_memory
            )
            
            # Clear GPU memory
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
            
            return True
            
        except Exception as e:
            print(f"Detailed error in {model_name} training: {str(e)}")
            return False
            
    def _calculate_metrics(self, model, X, y) -> Dict[str, float]:
        """Calculate task-specific metrics"""
        metrics = {}
        y_pred = model(torch.FloatTensor(X).to(self.device)).cpu().detach().numpy()
        
        if self.task_type == 'regression':
            metrics['MSE'] = mean_squared_error(y, y_pred)
            metrics['MAE'] = np.mean(np.abs(y - y_pred))
            metrics['RMSE'] = math.sqrt(mean_squared_error(y, y_pred))
            metrics['R2'] = r2_score(y, y_pred)
        else:
            metrics['Accuracy'] = (y_pred == y).mean()
            
        return metrics

    def run_trial(self, X_train, y_train, X_val, y_val):
        """Run trials for all models"""
        print(f"Input dimension after preprocessing: {X_train.shape[1]}")
        
        # Train ResNetMLP
        resnet = ResNetMLP(
            input_dim=X_train.shape[1],
            hidden_dims=(256, 512, 256),
            output_dim=1 if self.task_type == 'regression' else len(np.unique(y_train))
        )
        success_resnet = self.train_model(resnet, 'ResNetMLP', X_train, y_train, X_val, y_val)
        
        # Train TransformerTabular
        transformer = TransformerTabular(
            input_dim=X_train.shape[1],
            output_dim=1 if self.task_type == 'regression' else len(np.unique(y_train))
        )
        success_transformer = self.train_model(transformer, 'TransformerTabular', X_train, y_train, X_val, y_val)
        
        # Train DenseNetTabular
        densenet = DenseNetTabular(
            input_dim=X_train.shape[1],
            growth_rate=32,
            block_config=(6, 12, 24, 16),
            output_dim=1 if self.task_type == 'regression' else len(np.unique(y_train))
        )
        success_densenet = self.train_model(densenet, 'DenseNetTabular', X_train, y_train, X_val, y_val)
        
        # Train TabNetModel
        tabnet = TabNetModel(
            input_dim=X_train.shape[1],
            output_dim=1 if self.task_type == 'regression' else len(np.unique(y_train))
        )
        success_tabnet = self.train_model(tabnet, 'TabNetModel', X_train, y_train, X_val, y_val)
        
        # Print validation metrics for all models
        for model_name, results in self.results.items():
            val_metrics = results.val_metrics
            print(f"\n{model_name} Validation Metrics:")
            print(f"MSE: {val_metrics['MSE']:.4f}")
            print(f"MAE: {val_metrics['MAE']:.4f}")
            print(f"R2: {val_metrics['R2']:.4f}")
        
        return any([success_resnet, success_transformer, success_densenet, success_tabnet])

    def plot_training_curves(self) -> go.Figure:
        """Plot comparative training curves"""
        fig = make_subplots(
            rows=2, cols=1,
            subplot_titles=('Training Loss', 'Validation Loss'),
            shared_xaxes=True,
            vertical_spacing=0.15
        )
        
        colors = {
            'ResNetMLP': 'blue',
            'TransformerTabular': 'red',
            'DenseNetTabular': 'green',
            'TabNetModel': 'purple'
        }
        
        for model_name, results in self.results.items():
            # Training loss
            fig.add_trace(
                go.Scatter(
                    x=list(range(1, len(results.train_losses) + 1)),
                    y=results.train_losses,
                    name=f'{model_name} (Train)',
                    line=dict(color=colors.get(model_name, 'gray')),
                ),
                row=1, col=1
            )
            
            # Validation loss
            fig.add_trace(
                go.Scatter(
                    x=list(range(1, len(results.val_losses) + 1)),
                    y=results.val_losses,
                    name=f'{model_name} (Val)',
                    line=dict(color=colors.get(model_name, 'gray'), dash='dash'),
                ),
                row=2, col=1
            )
        
        fig.update_layout(
            height=600,
            title_text="Model Comparison - Training Progress",
            template="plotly_white",
            showlegend=True,
            legend=dict(
                orientation="h",
                yanchor="bottom",
                y=1.02,
                xanchor="right",
                x=1
            )
        )
        
        fig.update_xaxes(title_text="Epoch", row=2, col=1)
        fig.update_yaxes(title_text="Loss", row=1, col=1)
        fig.update_yaxes(title_text="Loss", row=2, col=1)
        
        return fig

    def plot_metrics_comparison(self) -> go.Figure:
        """Plot comparative metrics including MSE, MAE, and R2"""
        metrics_data = []
        
        for model_name, results in self.results.items():
            # Get validation metrics
            val_metrics = results.val_metrics
            
            # Add each metric as a separate row for MSE, MAE, and R2
            metrics_data.extend([
                {
                    'Model': model_name,
                    'Metric': 'MSE',
                    'Value': val_metrics['MSE']
                },
                {
                    'Model': model_name,
                    'Metric': 'MAE',
                    'Value': val_metrics['MAE']
                },
                {
                    'Model': model_name,
                    'Metric': 'R2',
                    'Value': val_metrics['R2']
                }
            ])
        
        # Convert to DataFrame
        df_metrics = pd.DataFrame(metrics_data)
        
        # Create the comparison plot
        fig = px.bar(
            df_metrics,
            x='Model',
            y='Value',
            color='Metric',
            title='Model Comparison - Validation Metrics',
            barmode='group',
            template="plotly_white",
            color_discrete_map={
                'MSE': 'rgb(147, 155, 255)',  # Light blue
                'MAE': 'rgb(255, 127, 127)',  # Light red
                'R2': 'rgb(102, 204, 153)'    # Light green
            }
        )
        
        # Update layout for better readability
        fig.update_layout(
            height=500,
            xaxis_title="Model Type",
            yaxis_title="Metric Value",
            legend_title="Metric",
            legend=dict(
                orientation="h",
                yanchor="bottom",
                y=1.02,
                xanchor="right",
                x=1
            ),
            yaxis=dict(
                gridcolor='rgba(0,0,0,0.1)',
                showgrid=True
            ),
            plot_bgcolor='white'
        )
        
        # Add specific colors and adjust order if needed
        fig.update_traces(
            dict(
                marker_line_width=0.5,
                marker_line_color='black',
                opacity=0.8
            )
        )
        
        return fig


    def plot_resource_usage(self) -> go.Figure:
        """Plot comparative resource usage"""
        fig = make_subplots(
            rows=1, cols=2,
            subplot_titles=('Training Time (s)', 'Peak Memory Usage (MB)')
        )
        
        model_names = list(self.results.keys())
        
        fig.add_trace(
            go.Bar(
                x=model_names,
                y=[results.training_time for results in self.results.values()],
                name='Training Time'
            ),
            row=1, col=1
        )
        
        fig.add_trace(
            go.Bar(
                x=model_names,
                y=[results.peak_memory_usage for results in self.results.values()],
                name='Memory Usage'
            ),
            row=1, col=2
        )
        
        fig.update_layout(
            height=400,
            title_text="Model Comparison - Resource Usage",
            template="plotly_white",
            showlegend=False
        )
        
        return fig

In [3]:
# %% [markdown]
# ## Model Architectures

# %%
class DenseLayer(nn.Module):
    def __init__(self, num_input_features, growth_rate, dropout_rate):
        super().__init__()
        self.layer = nn.Sequential(
            nn.BatchNorm1d(num_input_features),
            nn.GELU(),
            nn.Linear(num_input_features, growth_rate),
            nn.Dropout(dropout_rate)
        )

    def forward(self, x):
        new_features = self.layer(x)
        return torch.cat([x, new_features], 1)

class DenseNetTabular(nn.Module):
    """DenseNet-inspired architecture for tabular data with dense connections"""
    def __init__(
        self,
        input_dim: int,
        growth_rate: int = 32,
        block_config: Tuple[int, ...] = (6, 12, 24, 16),
        output_dim: int = 1,
        dropout_rate: float = 0.2,
    ):
        super().__init__()
        
        # Initial layer
        self.features = []
        num_features = growth_rate * 2
        
        # Initial dense layer
        self.features.append(nn.Sequential(
            nn.Linear(input_dim, num_features),
            nn.BatchNorm1d(num_features),
            nn.GELU()
        ))
        
        # Dense blocks
        for i, num_layers in enumerate(block_config):
            block = self._make_dense_block(
                num_layers=num_layers,
                num_input_features=num_features,
                growth_rate=growth_rate,
                dropout_rate=dropout_rate
            )
            self.features.append(block)
            num_features = num_features + num_layers * growth_rate
            
            # Transition layers
            if i != len(block_config) - 1:
                self.features.append(self._make_transition(
                    num_input_features=num_features,
                    num_output_features=num_features // 2
                ))
                num_features = num_features // 2
        
        self.features = nn.Sequential(*self.features)
        
        # Final classification/regression layer
        self.classifier = nn.Sequential(
            nn.Linear(num_features, num_features // 2),
            nn.BatchNorm1d(num_features // 2),
            nn.GELU(),
            nn.Dropout(dropout_rate),
            nn.Linear(num_features // 2, output_dim)
        )

    def _make_dense_block(self, num_layers, num_input_features, growth_rate, dropout_rate):
        layers = []
        for i in range(num_layers):
            layers.append(DenseLayer(
                num_input_features + i * growth_rate,
                growth_rate,
                dropout_rate
            ))
        return nn.Sequential(*layers)

    def _make_transition(self, num_input_features, num_output_features):
        return nn.Sequential(
            nn.BatchNorm1d(num_input_features),
            nn.GELU(),
            nn.Linear(num_input_features, num_output_features)
        )

    def forward(self, x):
        features = self.features(x)
        out = self.classifier(features)
        return out.squeeze(-1)

class TabNetModel(nn.Module):
    """TabNet implementation with feature selection and self-attention"""
    def __init__(
        self,
        input_dim: int,
        output_dim: int = 1,
        feature_dim: int = 64,
        output_dim_per_step: int = 8,
        num_steps: int = 3,
        num_shared: int = 2,
        attention_dim: int = 32,
        dropout_rate: float = 0.2
    ):
        super().__init__()
        self.num_steps = num_steps
        
        # Feature transformer
        self.feature_transform = nn.Sequential(
            nn.Linear(input_dim, feature_dim),
            nn.BatchNorm1d(feature_dim),
            nn.GELU()
        )
        
        # Shared layers
        self.shared_layers = nn.ModuleList([
            nn.Sequential(
                nn.Linear(feature_dim, feature_dim),
                nn.BatchNorm1d(feature_dim),
                nn.GELU()
            ) for _ in range(num_shared)
        ])
        
        # Step-specific layers
        self.step_layers = nn.ModuleList([
            nn.Sequential(
                nn.Linear(feature_dim, output_dim_per_step),
                nn.BatchNorm1d(output_dim_per_step),
                nn.GELU()
            ) for _ in range(num_steps)
        ])
        
        # Attention mechanism
        self.attention = nn.Sequential(
            nn.Linear(feature_dim, attention_dim),
            nn.Tanh(),
            nn.Linear(attention_dim, 1),
            nn.Sigmoid()
        )
        
        # Output layers
        self.output_transform = nn.Sequential(
            nn.Linear(output_dim_per_step * num_steps, output_dim_per_step * num_steps // 2),
            nn.BatchNorm1d(output_dim_per_step * num_steps // 2),
            nn.GELU(),
            nn.Dropout(dropout_rate),
            nn.Linear(output_dim_per_step * num_steps // 2, output_dim)
        )

    def forward(self, x):
        # Initial feature transformation
        x = self.feature_transform(x)
        
        # Store step outputs
        step_outputs = []
        prior_scales = torch.ones_like(x)
        
        # Process steps
        for step in range(self.num_steps):
            # Apply shared layers
            h = x
            for shared in self.shared_layers:
                h = shared(h)
            
            # Compute attention masks
            mask = self.attention(h)
            masked_x = x * mask * prior_scales
            
            # Update prior scales
            prior_scales = prior_scales * (1 - mask)
            
            # Apply step-specific transformation
            step_out = self.step_layers[step](masked_x)
            step_outputs.append(step_out)
        
        # Combine step outputs
        combined = torch.cat(step_outputs, dim=1)
        
        # Final output transformation
        out = self.output_transform(combined)
        return out.squeeze(-1)

class TransformerTabular(nn.Module):
    """Transformer architecture adapted for tabular data"""
    def __init__(
        self,
        input_dim: int,
        hidden_dim: int = 256,
        num_heads: int = 8,
        num_layers: int = 4,
        output_dim: int = 1,
        dropout_rate: float = 0.1
    ):
        super().__init__()
        
        # Input embedding
        self.input_embedding = nn.Linear(input_dim, hidden_dim)
        
        # Modified positional embedding for batched input
        self.pos_embedding = nn.Parameter(torch.randn(1, 1, hidden_dim))
        
        # Transformer encoder
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=hidden_dim,
            nhead=num_heads,
            dim_feedforward=hidden_dim * 4,
            dropout=dropout_rate,
            batch_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        
        # Output layers with reduced complexity
        self.output_layer = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.LayerNorm(hidden_dim // 2),
            nn.GELU(),
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_dim // 2, output_dim)
        )

    def forward(self, x):
        # Add feature dimension for transformer
        x = x.unsqueeze(1)  # Shape: [batch_size, 1, input_dim]
        
        # Input embedding
        x = self.input_embedding(x)  # Shape: [batch_size, 1, hidden_dim]
        x = x + self.pos_embedding
        
        # Transformer encoding
        x = self.transformer(x)
        
        # Take the output of the first (and only) position
        x = x.squeeze(1)  # Shape: [batch_size, hidden_dim]
        
        # Output layer
        x = self.output_layer(x)
        return x.squeeze(-1)

class ResNetMLP(nn.Module):
    """Deep Residual MLP with skip connections for tabular data"""
    def __init__(
        self,
        input_dim: int,
        hidden_dims: tuple = (256, 512, 256),
        output_dim: int = 1,
        dropout_rate: float = 0.2
    ):
        super().__init__()
        layers = []
        prev_dim = input_dim
        
        # Build main layers with residual connections
        for dim in hidden_dims:
            layers.extend([
                nn.Linear(prev_dim, dim),
                nn.LayerNorm(dim),
                nn.GELU(),
                nn.Dropout(dropout_rate)
            ])
            
            # Add residual connection if dimensions match
            if prev_dim == dim:
                layers.append(nn.Linear(dim, dim))
                layers.append(lambda x: x + layers[-2](x))
                
            prev_dim = dim
            
        # Output layer
        self.feature_extractor = nn.Sequential(*layers)
        self.output_layer = nn.Linear(prev_dim, output_dim)
        
    def forward(self, x):
        x = self.feature_extractor(x)
        x = self.output_layer(x)
        return x.squeeze(-1)

In [4]:
# %% [markdown]
# ## Training System

# %%
class AutomaticaTrainer:
    """Handles model training and hyperparameter optimization"""
    def __init__(
        self,
        model_class: str,
        input_dim: int,
        task_type: str,
        output_dim: int = 1,
        n_iter: int = 25,
        cv: int = 3,
        random_state: int = 42
    ):
        self.model_class = self._get_model_class(model_class)
        self.input_dim = input_dim
        self.task_type = task_type
        self.output_dim = output_dim
        self.n_iter = n_iter
        self.cv = cv
        self.random_state = random_state
        self.best_params = None
        self.best_model = None
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.feature_scaler = StandardScaler()
        self.target_scaler = StandardScaler()

    def _get_model_class(self, model_name):
        """Get the model class from the name"""
        model_map = {
            'ResNetMLP': ResNetMLP,
            'TransformerTabular': TransformerTabular,
            'DenseNetTabular': DenseNetTabular,
            'TabNetModel': TabNetModel
        }
        return model_map[model_name]

    def _get_param_space(self):
        """Define hyperparameter search space based on model"""
        param_space = {
            'model_params__input_dim': [self.input_dim],
            'model_params__output_dim': [self.output_dim],
            'lr': Real(1e-4, 1e-2, prior='log-uniform'),
            'batch_size': Categorical([32, 64, 128, 256]),
            'epochs': Integer(50, 200)
        }
        
        # Model-specific parameters
        if self.model_class == DenseNetTabular:
            param_space.update({
                'model_params__growth_rate': Integer(16, 64),
                'model_params__dropout_rate': Real(0.1, 0.5)
            })
        elif self.model_class == TabNetModel:
            param_space.update({
                'model_params__feature_dim': Categorical([32, 64]),
                'model_params__dropout_rate': Real(0.1, 0.5)
            })
        elif self.model_class == TransformerTabular:
            param_space.update({
                'model_params__hidden_dim': Categorical([128, 256]),
                'model_params__num_heads': Categorical([4, 8]),
                'model_params__dropout_rate': Real(0.1, 0.5)
            })
            
        return param_space

    def optimize_hyperparameters(self, X_train, y_train):
        """Run Bayesian optimization for hyperparameter tuning"""
        print("Starting hyperparameter optimization...")
        
        # Scale the data
        X_train_scaled = self.feature_scaler.fit_transform(X_train)
        y_train_scaled = self.target_scaler.fit_transform(y_train.reshape(-1, 1))
        
        # Create base model wrapper
        base_model = TorchWrapper(
            model_class=self.model_class,
            model_params={'input_dim': self.input_dim, 'output_dim': self.output_dim}
        )
        
        # Configure search
        param_space = self._get_param_space()
        optimizer = BayesSearchCV(
            base_model,
            param_space,
            n_iter=self.n_iter,
            cv=self.cv,
            scoring='neg_mean_squared_error' if self.task_type == 'regression' else 'accuracy',
            n_jobs=1,
            random_state=self.random_state
        )
        
        # Run optimization
        optimizer.fit(X_train_scaled, y_train_scaled)
        
        self.best_params = optimizer.best_params_
        self.optimizer = optimizer
        
        return optimizer

    def train_final_model(self, X_train, y_train, X_val, y_val, patience=5, min_delta=1e-4):
        """Train final model with early stopping and improved scaling"""
        if self.best_params is None:
            raise ValueError("Run optimize_hyperparameters first")
            
        # Initialize model with best parameters
        model_params = {k.replace('model_params__', ''): v 
                    for k, v in self.best_params.items() 
                    if k.startswith('model_params__')}
        
        self.best_model = self.model_class(**model_params).to(self.device)
        
        # Scale the data
        X_train_scaled = self.feature_scaler.fit_transform(X_train)
        X_val_scaled = self.feature_scaler.transform(X_val)
        
        if self.task_type == 'regression':
            y_train_scaled = self.target_scaler.fit_transform(y_train.reshape(-1, 1)).ravel()
            y_val_scaled = self.target_scaler.transform(y_val.reshape(-1, 1)).ravel()
        else:
            y_train_scaled = y_train
            y_val_scaled = y_val
        
        # Convert to tensors
        X_train = torch.FloatTensor(X_train_scaled).to(self.device)
        y_train = torch.FloatTensor(y_train_scaled).to(self.device)
        X_val = torch.FloatTensor(X_val_scaled).to(self.device)
        y_val = torch.FloatTensor(y_val_scaled).to(self.device)
        
        # Create data loader
        train_dataset = TensorDataset(X_train, y_train)
        train_loader = DataLoader(
            train_dataset, 
            batch_size=self.best_params['batch_size'],
            shuffle=True
        )
        
        # Training setup with improved stability
        criterion = nn.MSELoss()
        optimizer = torch.optim.AdamW(
            self.best_model.parameters(),
            lr=self.best_params['lr'],
            weight_decay=0.01  # L2 regularization
        )
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='min', factor=0.5, patience=3, min_lr=1e-6
        )
        
        # Training tracking
        history = {
            'train_loss': [],
            'val_loss': [],
            'train_metric': [],
            'val_metric': []
        }
        
        best_val_loss = float('inf')
        patience_counter = 0
        
        # Training loop with progress bar
        pbar = tqdm(range(self.best_params['epochs']), desc="Training Progress")
        
        for epoch in pbar:
            # Training
            self.best_model.train()
            train_losses = []
            
            for batch_X, batch_y in train_loader:
                optimizer.zero_grad()
                output = self.best_model(batch_X)
                loss = criterion(output, batch_y)
                loss.backward()
                
                # Gradient clipping
                torch.nn.utils.clip_grad_norm_(self.best_model.parameters(), max_norm=1.0)
                
                optimizer.step()
                train_losses.append(loss.item())
            
            # Validation
            self.best_model.eval()
            with torch.no_grad():
                val_output = self.best_model(X_val)
                val_loss = criterion(val_output, y_val).item()
            
            # Store metrics
            avg_train_loss = np.mean(train_losses)
            history['train_loss'].append(avg_train_loss)
            history['val_loss'].append(val_loss)
            
            # Calculate scaled metrics
            train_pred = self.best_model(X_train).detach().cpu().numpy()
            val_pred = val_output.cpu().numpy()
            
            if self.task_type == 'regression':
                # Convert predictions back to original scale for metrics
                train_pred_orig = self.target_scaler.inverse_transform(train_pred.reshape(-1, 1)).ravel()
                val_pred_orig = self.target_scaler.inverse_transform(val_pred.reshape(-1, 1)).ravel()
                
                train_metric = r2_score(y_train.cpu().numpy(), train_pred)
                val_metric = r2_score(y_val.cpu().numpy(), val_pred)
            else:
                train_metric = accuracy_score(y_train.cpu().numpy(), train_pred > 0.5)
                val_metric = accuracy_score(y_val.cpu().numpy(), val_pred > 0.5)
            
            history['train_metric'].append(train_metric)
            history['val_metric'].append(val_metric)
            
            # Update progress bar
            pbar.set_postfix({
                'train_loss': f'{avg_train_loss:.4f}',
                'val_loss': f'{val_loss:.4f}',
                'val_metric': f'{val_metric:.4f}'
            })
            
            # Learning rate scheduling
            scheduler.step(val_loss)
            
            # Early stopping
            if val_loss < best_val_loss - min_delta:
                best_val_loss = val_loss
                patience_counter = 0
            else:
                patience_counter += 1
                
            if patience_counter >= patience:
                print(f"\nEarly stopping triggered at epoch {epoch+1}")
                break
        
        return history

    def plot_optimization_results(self):
        """Plot the optimization results"""
        if not hasattr(self, 'optimizer') or not hasattr(self.optimizer, 'cv_results_'):
            return go.Figure()
            
        results = self.optimizer.cv_results_
        
        fig = make_subplots(specs=[[{"secondary_y": True}]])
        
        fig.add_trace(
            go.Scatter(
                x=list(range(len(results['mean_test_score']))),
                y=-np.array(results['mean_test_score']),
                name="Mean Test Score",
                mode='lines+markers'
            ),
            secondary_y=False
        )
        
        fig.add_trace(
            go.Scatter(
                x=list(range(len(results['std_test_score']))),
                y=results['std_test_score'],
                name="Score Std Dev",
                mode='lines+markers'
            ),
            secondary_y=True
        )
        
        fig.update_layout(
            title_text="Hyperparameter Optimization Progress",
            showlegend=True,
            template="plotly_white"
        )
        
        fig.update_xaxes(title_text="Iteration")
        fig.update_yaxes(title_text="Mean Test Score", secondary_y=False)
        fig.update_yaxes(title_text="Score Standard Deviation", secondary_y=True)
        
        self.optimization_plot = fig
        return fig

    def plot_training_history(self, history):
        """Plot the training history"""
        if not history:
            return go.Figure()
            
        fig = make_subplots(rows=2, cols=1,
                            subplot_titles=('Loss', 'Metrics'))
        
        fig.add_trace(
            go.Scatter(
                x=list(range(len(history['train_loss']))),
                y=history['train_loss'],
                name="Train Loss",
                mode='lines'
            ),
            row=1, col=1
        )
        
        fig.add_trace(
            go.Scatter(
                x=list(range(len(history['val_loss']))),
                y=history['val_loss'],
                name="Val Loss",
                mode='lines'
            ),
            row=1, col=1
        )
        
        fig.add_trace(
            go.Scatter(
                x=list(range(len(history['train_metric']))),
                y=history['train_metric'],
                name="Train Metric",
                mode='lines'
            ),
            row=2, col=1
        )
        
        fig.add_trace(
            go.Scatter(
                x=list(range(len(history['val_metric']))),
                y=history['val_metric'],
                name="Val Metric",
                mode='lines'
            ),
            row=2, col=1
        )
        
        fig.update_layout(
            height=600,
            title_text="Training History",
            showlegend=True,
            template="plotly_white"
        )
        
        self.training_plot = fig
        return fig
    
    def save_model(self, filepath):
        if self.best_model is None:
            raise ValueError("No trained model available")
            
        save_dict = {
            'model_state': self.best_model.state_dict(),
            'model_class': self.model_class.__name__,
            'params': self.best_params,
            'task_type': self.task_type,
            'input_dim': self.input_dim,
            'output_dim': self.output_dim,
            'feature_scaler': self.feature_scaler,
            'target_scaler': self.target_scaler,
            'optimization_plot': self.optimization_plot,
            'training_plot': self.training_plot
        }
        
        dump(save_dict, filepath)  # Using imported joblib.dump
        print(f"Model and scalers saved to {filepath}")    

In [5]:
# HTML template for the report
REPORT_TEMPLATE = """
<!DOCTYPE html>
<html>
<head>
    <title>Automatica ML Analysis Report</title>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" rel="stylesheet">
    <style>
        body { 
            padding: 20px;
            font-family: system-ui, -apple-system, "Segoe UI", Roboto, sans-serif;
        }
        .container { max-width: 1200px; }
        .section { margin-bottom: 40px; }
        .plot-container { margin: 20px 0; }
        .metric-card {
            background: #f8f9fa;
            padding: 15px;
            border-radius: 8px;
            margin-bottom: 15px;
        }
        pre {
            background: #f8f9fa;
            padding: 15px;
            border-radius: 8px;
        }
        .params-table {
            margin: 20px 0;
        }
        .card-body {
            text-align: center;
            padding: 15px;
        }
        .display-6 {
            font-size: 1.5rem;
            font-weight: 500;
        }
        .features-container {
            display: flex;
            flex-wrap: wrap;
            gap: 8px;
            margin: 10px 0;
        }

        .feature-badge {
            background-color: #e9ecef;
            padding: 6px 12px;
            border-radius: 16px;
            font-size: 0.9rem;
            color: #495057;
        }
        .model-architecture {
            font-family: 'Consolas', 'Monaco', monospace;
            background: #f8f9fa;
            padding: 15px;
            border-radius: 8px;
            line-height: 1.5;
            overflow-x: auto;
            color: #212529;
        }
        .table {
            width: 100%;
            margin-bottom: 1rem;
            background-color: transparent;
            border-collapse: collapse;
        }

        .table thead th {
            vertical-align: bottom;
            border-bottom: 2px solid #dee2e6;
            background-color: #f8f9fa;
            padding: 12px;
            text-align: left;
        }

        .table tbody td {
            padding: 12px;
            border-top: 1px solid #dee2e6;
        }

        .table-hover tbody tr:hover {
            background-color: rgba(0, 0, 0, 0.02);
        }

        .table-responsive {
            display: block;
            width: 100%;
            overflow-x: auto;
            -webkit-overflow-scrolling: touch;
            margin-bottom: 1rem;
        }

        .mt-4 {
            margin-top: 1.5rem;
        }
    </style>
</head>
<body>
    <div class="container">
        <h1 class="mb-4">Automatica ML Analysis Report</h1>
        <p class="text-muted">Generated on {{ datetime.now().strftime('%B %d, %Y %H:%M:%S') }}</p>

        <div class="section">
            <h2>Executive Summary</h2>
            <div class="metric-card">
                <p>This report provides a comprehensive analysis of the deep learning pipeline executed using Automatica.</p>
                <ul>
                    <li><strong>Dataset:</strong> {{ dataset_name }}</li>
                    <li><strong>Target Variable:</strong> {{ target_variable }}</li>
                    <li><strong>Task Type:</strong> {{ task_type }}</li>
                    <li><strong>Features:</strong> {{ n_features }}</li>
                    <li><strong>Total Samples:</strong> {{ n_samples }}</li>
                </ul>
            </div>
        </div>

        <div class="section">
            <h2>Data Preprocessing</h2>
            <h3>Data Split</h3>
            <div class="metric-card">
                <ul>
                    <li>Training Set: {{ preprocessing.train_size }} samples</li>
                    <li>Validation Set: {{ preprocessing.val_size }} samples</li>
                    <li>Test Set: {{ preprocessing.test_size }} samples</li>
                </ul>
            </div>

            <h3>Target Distribution</h3>
            <div class="plot-container">
                {{ plots.target_distribution | safe }}
            </div>

            <h3>Feature Distributions</h3>
            <div class="plot-container">
                {{ plots.feature_distributions | safe }}
            </div>

            <h3>Feature Relationships</h3>
            <div class="plot-container">
                {{ plots.feature_relationships | safe }}
            </div>
        </div>

        <div class="section">
            <h2>Model Selection</h2>
            
            <h3>Training Progress</h3>
            <div class="plot-container">
                {{ plots.training_curves | safe }}
            </div>

            <h3>Model Comparison</h3>
            <div class="plot-container">
                {{ plots.metrics_comparison | safe }}
            </div>

            <h3>Resource Usage</h3>
            <div class="plot-container">
                {{ plots.resource_usage | safe }}
            </div>

            <h3>Model Selection Summary</h3>
            <div class="metric-card">
                <h4>Training Metrics</h4>
                <div class="table-responsive">
                    <table class="table table-hover">
                        <thead>
                            <tr>
                                <th>Model</th>
                                <th>Training Loss</th>
                                <th>Validation Loss</th>
                                <th>Training Time</th>
                                <th>Memory Usage</th>
                            </tr>
                        </thead>
                        <tbody>
                            {% for model_name, results in model_selection.results.items() %}
                            <tr>
                                <td><strong>{{ model_name }}</strong></td>
                                <td>{{ "%.4f"|format(results.train_losses[-1]) }}</td>
                                <td>{{ "%.4f"|format(results.val_losses[-1]) }}</td>
                                <td>{{ "%.2f"|format(results.training_time) }}s</td>
                                <td>{{ "%.1f"|format(results.peak_memory_usage) }} MB</td>
                            </tr>
                            {% endfor %}
                        </tbody>
                    </table>
                </div>

                <h4 class="mt-4">Performance Metrics</h4>
                <div class="table-responsive">
                    <table class="table table-hover">
                        <thead>
                            <tr>
                                <th>Model</th>
                                <th>MSE</th>
                                <th>MAE</th>
                                <th>R²</th>
                            </tr>
                        </thead>
                        <tbody>
                            {% for model_name, results in model_selection.results.items() %}
                            <tr>
                                <td><strong>{{ model_name }}</strong></td>
                                <td>{{ "%.4f"|format(results.val_metrics.MSE) }}</td>
                                <td>{{ "%.4f"|format(results.val_metrics.MAE) }}</td>
                                <td>{{ "%.4f"|format(results.val_metrics.R2) }}</td>
                            </tr>
                            {% endfor %}
                        </tbody>
                    </table>
                </div>
            </div>

        <div class="section">
            <h2>Final Model Training</h2>
            
            <h3>Selected Model Details</h3>
            <div class="metric-card">
                <h4>Model: {{ final_training.selected_model }}</h4>
                <h4>Best Hyperparameters:</h4>
                <div class="params-table">
                    <table class="table table-striped">
                        <thead>
                            <tr>
                                <th>Parameter</th>
                                <th>Value</th>
                            </tr>
                        </thead>
                        <tbody>
                            {% for key, value in final_training.best_params.items() %}
                            <tr>
                                <td>{{ key }}</td>
                                <td>{{ value }}</td>
                            </tr>
                            {% endfor %}
                        </tbody>
                    </table>
                </div>
            </div>

            <h3>Training Progress</h3>
            <div class="plot-container">
                {{ final_training.optimization_plot | safe }}
            </div>
            <div class="plot-container">
                {{ final_training.training_plot | safe }}
            </div>

            <h3>Performance Metrics</h3>
            <div class="metric-card">
                <div class="row">
                    {% for key, value in final_training.final_metrics.items() %}
                    <div class="col-md-6 mb-3">
                        <div class="card">
                            <div class="card-body">
                                <h5 class="card-title">{{ key }}</h5>
                                <p class="card-text display-6">{{ "%.4f"|format(value) }}</p>
                            </div>
                        </div>
                    </div>
                    {% endfor %}
                </div>
            </div>
        </div>

        <div class="section">
            <h2>Technical Details</h2>
            
            <h3>Feature Engineering</h3>
            <div class="metric-card">
                <h4>Selected Features:</h4>
                <div class="features-container">
                    {% for feature in technical_details.selected_features %}
                        <span class="feature-badge">{{ feature }}</span>
                    {% endfor %}
                </div>
            </div>

            <h3>Model Architecture</h3>
            <div class="metric-card">
                <div class="model-architecture">
                    {{ technical_details.model_architecture | replace("  ", "&nbsp;&nbsp;") | replace("\n", "<br>") | safe }}
                </div>
            </div>

            <h3>Training Configuration</h3>
            <div class="metric-card">
                <ul>
                    <li>Batch Size: {{ technical_details.training_config.batch_size }}</li>
                    <li>Learning Rate: {{ technical_details.training_config.learning_rate }}</li>
                    <li>Optimizer: {{ technical_details.training_config.optimizer }}</li>
                    <li>Loss Function: {{ technical_details.training_config.loss_function }}</li>
                    <li>Early Stopping Patience: {{ technical_details.training_config.early_stopping_patience }}</li>
                </ul>
            </div>
        </div>
    </div>
</body>
</html>
"""

class AutomaticaReporter:
    """Handles report generation for Automatica ML pipeline"""
    def __init__(self):
        self.report_data = {
            "dataset_info": {},
            "preprocessing": {},
            "model_selection": {},
            "final_training": {},
            "plots": {},
            "metrics": {},
            "technical_details": {}
        }
        
    def gather_data(self, ui_instance):
        """Gather all necessary data from UI instance and its components"""
        try:
            # Dataset Information
            self.gather_dataset_info(ui_instance)
            
            # Preprocessing Information
            self.gather_preprocessing_info(ui_instance)
            
            # Model Selection Information
            self.gather_model_selection_info(ui_instance)
            
            # Final Training Information
            self.gather_training_info(ui_instance)
            
            # Technical Details
            self.gather_technical_details(ui_instance)
            
            return True, "Data gathered successfully"
        except Exception as e:
            return False, f"Error gathering report data: {str(e)}"
    
    def gather_dataset_info(self, ui):
        """Gather dataset information"""
        try:
            if hasattr(ui, 'preprocessor') and ui.preprocessor is not None:
                # Get dataset name
                dataset_name = "Uploaded Dataset"
                if hasattr(ui, 'file_input') and ui.file_input is not None:
                    dataset_name = os.path.basename(ui.file_input.name)
                
        #         # Get other info
        #         self.report_data["dataset_info"].update({
        #             "dataset_name": dataset_name,
        #             "target_variable": ui.target_var if ui.target_var is not None else "Unknown",
        #             "task_type": ui.model_type if ui.model_type is not None else "Unknown",
        #             "n_features": ui.X_train.shape[1] if hasattr(ui, 'X_train') else 0,
        #             "n_samples": len(ui.X_train) if hasattr(ui, 'X_train') else 0,
        #             "feature_names": ui.feature_names if hasattr(ui, 'feature_names') else []
        #         })
        # except Exception as e:
        #     print(f"Error gathering dataset info: {str(e)}")
        #     # Set default values if there's an error
        #     self.report_data["dataset_info"].update({
        #         "dataset_name": "Unknown Dataset",
        #         "target_variable": "Unknown",
        #         "task_type": "Unknown",
        #         "n_features": 0,
        #         "n_samples": 0,
        #         "feature_names": []
        #     })
        
                    # Calculate total features and samples from original data
            n_features = len(ui.important_features) if hasattr(ui, 'important_features') else 0
            
            # Calculate total samples by summing all splits
            total_samples = (
                (len(ui.X_train) if hasattr(ui, 'X_train') else 0) +
                (len(ui.X_val) if hasattr(ui, 'X_val') else 0) +
                (len(ui.X_test) if hasattr(ui, 'X_test') else 0)
            )
            
            # Get other info
            self.report_data["dataset_info"].update({
                "dataset_name": dataset_name,
                "target_variable": ui.target_var if ui.target_var is not None else "Unknown",
                "task_type": ui.model_type if ui.model_type is not None else "Unknown",
                "n_features": n_features,
                "n_samples": total_samples,
                "feature_names": ui.feature_names if hasattr(ui, 'feature_names') else []
            })
        except Exception as e:
            print(f"Error gathering dataset info: {str(e)}")
            # Set default values if there's an error
            self.report_data["dataset_info"].update({
                "dataset_name": "Unknown Dataset",
                "target_variable": "Unknown",
                "task_type": "Unknown",
                "n_features": 0,
                "n_samples": 0,
                "feature_names": []
            })
    
    def gather_preprocessing_info(self, ui):
        """Gather preprocessing information"""
        if hasattr(ui, 'X_train') and ui.X_train is not None:
            self.report_data["preprocessing"].update({
                "train_size": ui.X_train.shape[0],
                "val_size": ui.X_val.shape[0] if hasattr(ui, 'X_val') else 0,
                "test_size": ui.X_test.shape[0] if hasattr(ui, 'X_test') else 0
            })
            
            # Store preprocessing plots with error handling for each plot
            try:
                if hasattr(ui, 'target_dist_plot') and ui.target_dist_plot is not None:
                    self.report_data["plots"]["target_distribution"] = ui.target_dist_plot
            except Exception as e:
                print(f"Error storing target distribution plot: {str(e)}")
                
            try:
                if hasattr(ui, 'feature_dist_plot') and ui.feature_dist_plot is not None:
                    self.report_data["plots"]["feature_distributions"] = ui.feature_dist_plot
            except Exception as e:
                print(f"Error storing feature distributions plot: {str(e)}")
                
            try:
                if hasattr(ui, 'feature_rels_plot') and ui.feature_rels_plot is not None:
                    self.report_data["plots"]["feature_relationships"] = ui.feature_rels_plot
            except Exception as e:
                print(f"Error storing feature relationships plot: {str(e)}")
    
    def gather_model_selection_info(self, ui):
        """Gather model selection information"""
        if hasattr(ui, 'model_selector') and ui.model_selector is not None:
            model_selector = ui.model_selector
            
            # Get plots from model selection
            self.report_data["plots"].update({
                "training_curves": model_selector.plot_training_curves(),
                "metrics_comparison": model_selector.plot_metrics_comparison(),
                "resource_usage": model_selector.plot_resource_usage()
            })
            
            # Store model selection results
            self.report_data["model_selection"]["results"] = {
                model_name: {
                    "train_losses": results.train_losses,
                    "val_losses": results.val_losses,
                    "train_metrics": results.train_metrics,
                    "val_metrics": results.val_metrics,
                    "training_time": results.training_time,
                    "peak_memory_usage": results.peak_memory_usage
                }
                for model_name, results in model_selector.results.items()
            }
    
    def gather_training_info(self, ui):
        """Gather final model training information"""
        if hasattr(ui, 'selected_model_name'):
            self.report_data["final_training"].update({
                "selected_model": ui.selected_model_name,
                "best_params": ui.best_params if hasattr(ui, 'best_params') else {},
                "optimization_plot": ui.optimization_plot if hasattr(ui, 'optimization_plot') else None,
                "training_plot": ui.training_plot if hasattr(ui, 'training_plot') else None,
                "final_metrics": ui.final_metrics if hasattr(ui, 'final_metrics') else {}
            })
    
    def gather_technical_details(self, ui):
        """Gather technical implementation details"""
        self.report_data["technical_details"].update({
            "selected_features": ui.important_features if hasattr(ui, 'important_features') else [],
            "model_architecture": str(ui.best_model) if hasattr(ui, 'best_model') else "Not available",
            "training_config": {
                "batch_size": ui.batch_size if hasattr(ui, 'batch_size') else "Unknown",
                "learning_rate": ui.learning_rate if hasattr(ui, 'learning_rate') else "Unknown",
                "optimizer": "AdamW",  # Currently hardcoded in the implementation
                "loss_function": "MSELoss",  # Currently hardcoded for regression
                "early_stopping_patience": 5  # Currently hardcoded
            }
        })
    
    def _convert_plot_to_html(self, plot):
        """Convert a plotly figure to a self-contained HTML div"""
        if plot is None:
            return ""
        try:
            if isinstance(plot, (dict, str)):  # If it's already converted somehow
                return str(plot)
            
            # Ensure we're working with a plotly figure
            if not isinstance(plot, go.Figure):
                print(f"Warning: plot is of type {type(plot)}, expected plotly.graph_objects.Figure")
                return ""
            
            # Convert to HTML with CDN-hosted plotly.js
            html_str = plot.to_html(
                full_html=False,
                include_plotlyjs='cdn',
                config={'displayModeBar': True}
            )
            
            return html_str
        except Exception as e:
            print(f"Error converting plot to HTML: {str(e)}")
            return ""
    
    def generate_report(self, output_path="automatica_report.html"):
        try:
            from datetime import datetime
            import pprint
            
            # Convert all plots to HTML
            plot_html = {}
            for plot_name, plot in self.report_data["plots"].items():
                plot_html[plot_name] = self._convert_plot_to_html(plot)
            
            # Convert plots to HTML before template rendering
            if self.report_data["final_training"].get("optimization_plot"):
                self.report_data["final_training"]["optimization_plot"] = self._convert_plot_to_html(
                    self.report_data["final_training"]["optimization_plot"]
                )
                
            if self.report_data["final_training"].get("training_plot"):
                self.report_data["final_training"]["training_plot"] = self._convert_plot_to_html(
                    self.report_data["final_training"]["training_plot"]
                )

            # Create template from string
            template = Template(REPORT_TEMPLATE)
            
            # Combine all data for the template
            template_data = {
                **self.report_data["dataset_info"],
                "preprocessing": self.report_data["preprocessing"],
                # "plots": self.report_data["plots"],
                "plots": plot_html,
                "model_selection": self.report_data["model_selection"],
                "final_training": self.report_data["final_training"],
                "technical_details": self.report_data["technical_details"],
                "datetime": datetime,
                "pprint": pprint.pformat
            }

            # Render template
            html_content = template.render(**template_data)
            
            # Write to file
            with open(output_path, 'w', encoding='utf-8') as f:
                f.write(html_content)
            
            return True, f"Report generated successfully at {output_path}"
        except Exception as e:
            return False, f"Error generating report: {str(e)}"

In [6]:
# %% [markdown]
# ## UI System

# %%
class AutomaticaUI:
    def __init__(self):
        self.preprocessor = None
        self.feature_names = None
        self.important_features = None
        self.model_selector = None
        self.selected_model_name = None
        self.X_train_state = None
        self.y_train_state = None
        self.X_val_state = None
        self.y_val_state = None
        self.model_selection_results = None
        self.reporter = AutomaticaReporter()
        self.target_var = None
        self.model_type = None
        self.file_input = None
        self.batch_size = None
        self.learning_rate = None
        self.best_params = None
        self.best_model = None
        self.final_metrics = None
        self.selected_model_name = None
        self.optimization_plot = None
        self.training_plot = None
    
    def load_and_validate_file(self, file):
        """Validate and load CSV file"""
        try:
            if file is None:
                self.file_input = None  # Clear the stored file info
                return None, "No file uploaded", gr.update(choices=[]), None
            
            file_extension = os.path.splitext(file.name)[1].lower()
            if file_extension != '.csv':
                self.file_input = None  # Clear the stored file info
                return None, "Only CSV files are supported", gr.update(choices=[]), None
            
            df = pd.read_csv(file.name)
            self.file_input = file  # Store the file info
            
            return (
                "File loaded successfully", 
                self.create_pygwalker_vis(df),
                gr.update(choices=self.get_column_names(df)),
                df
            )
        except Exception as e:
            self.file_input = None  # Clear the stored file info
            return None, f"Error loading file: {str(e)}", gr.update(choices=[]), None
    
    def create_pygwalker_vis(self, df):
        """Create PyGWalker visualization"""
        if df is None:
            return "No data to visualize"
        try:
            gw = pyg.walk(df)
            return gw.to_html()
        except Exception as e:
            return f"Error creating visualization: {str(e)}"
    
    def get_column_names(self, df):
        """Get column names for target selection"""
        if df is None:
            return []
        return list(df.columns)
    
    def update_feature_selector(self, df, target, model_type):
        """Update feature selector dropdown based on target and model type"""
        if df is not None and target:
            important_features = self.get_important_features(df, target, model_type)
            return gr.update(choices=important_features, value=important_features[:5])
        return gr.update(choices=[], value=[])

    def get_important_features(self, df, target_variable, model_type, n_features=10):
        """Identify important features using mutual information"""
        try:
            X = df.drop(columns=[target_variable])
            y = df[target_variable]
            
            # Handle categorical features
            categorical_cols = X.select_dtypes(include=['object', 'category']).columns
            X_encoded = X.copy()
            for col in categorical_cols:
                X_encoded[col] = pd.Categorical(X_encoded[col]).codes
            
            # Calculate mutual information scores
            mi_func = mutual_info_regression if model_type == "Regression" else mutual_info_classif
            mi_scores = mi_func(X_encoded, y)
            
            # Create feature importance DataFrame
            feature_importance = pd.DataFrame({
                'feature': X.columns,
                'importance': mi_scores
            })
            feature_importance = feature_importance.sort_values('importance', ascending=False)
            
            return feature_importance['feature'].tolist()[:n_features]
        except Exception as e:
            print(f"Error in feature importance calculation: {str(e)}")
            return X.columns.tolist()[:n_features]
        
    def analyze_and_preprocess(self, df, target_variable, model_type, selected_features):
        """Combined analysis and preprocessing function"""
        if df is None or target_variable is None:
            return None, None, None, "Please provide data and target variable"
        
        try:
            # 1. Preprocess data first to get transformed features
            X = df[selected_features]
            y = df[target_variable]
            
            # Create preprocessing pipeline
            numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns
            categorical_cols = X.select_dtypes(include=['object', 'category']).columns
            
            numerical_pipeline = Pipeline([
                ('imputer', SimpleImputer(strategy='mean')),
                ('scaler', StandardScaler() if model_type == 'Regression' 
                        else MinMaxScaler())
            ])
            
            categorical_pipeline = Pipeline([
                ('imputer', SimpleImputer(strategy='most_frequent')),
                ('onehot', OneHotEncoder(handle_unknown='ignore', 
                                    sparse_output=False))
            ])
            
            self.preprocessor = ColumnTransformer([
                ('num', numerical_pipeline, numerical_cols),
                ('cat', categorical_pipeline, categorical_cols)
            ])
            
            # Transform all data for visualization
            X_preprocessed = self.preprocessor.fit_transform(X)
            
            # Get feature names after preprocessing
            if len(categorical_cols) > 0:
                encoded_cat_cols = self.preprocessor.named_transformers_[
                    'cat']['onehot'].get_feature_names_out(categorical_cols)
                self.feature_names = list(numerical_cols) + list(encoded_cat_cols)
            else:
                self.feature_names = list(numerical_cols)
            
            # Create DataFrame with preprocessed data
            X_preprocessed_df = pd.DataFrame(
                X_preprocessed, 
                columns=self.feature_names,
                index=X.index
            )
            
            # Split the data
            X_temp, self.X_test, y_temp, self.y_test = train_test_split(
                X, y, test_size=0.2, random_state=42,
                stratify=y if model_type == 'Classification' else None
            )
            
            self.X_train, self.X_val, self.y_train, self.y_val = train_test_split(
                X_temp, y_temp, test_size=0.25, random_state=42,
                stratify=y_temp if model_type == 'Classification' else None
            )
            
            # Transform split data
            self.X_train = self.preprocessor.transform(self.X_train)
            self.X_val = self.preprocessor.transform(self.X_val)
            self.X_test = self.preprocessor.transform(self.X_test)
            
            status = (f"Data preprocessing completed successfully!\n"
                    f"Training set size: {self.X_train.shape}\n"
                    f"Validation set size: {self.X_val.shape}\n"
                    f"Test set size: {self.X_test.shape}")
            
            # Store selected features for later use
            self.important_features = selected_features
            
            # Create visualizations
            target_dist = self.create_target_distribution(df, target_variable, model_type)
            feature_dist = self.create_feature_distributions(X, X_preprocessed_df, selected_features, numerical_cols)
            feature_rels = self.create_feature_relationships(df, X, target_variable, selected_features, numerical_cols, model_type)
            
            # Create visualizations and store them as instance attributes
            self.target_dist_plot = self.create_target_distribution(df, target_variable, model_type)
            self.feature_dist_plot = self.create_feature_distributions(X, X_preprocessed_df, selected_features, numerical_cols)
            self.feature_rels_plot = self.create_feature_relationships(df, X, target_variable, selected_features, numerical_cols, model_type)
            
            return (
                target_dist,
                feature_dist, 
                feature_rels, 
                status,
                self.X_train,
                self.y_train,
                self.X_val,
                self.y_val
            )
                
        except Exception as e:
            print(f"Detailed error: {str(e)}")
            return None, None, None, f"Error in analysis and preprocessing: {str(e)}"
    
    def create_target_distribution(self, df, target_variable, model_type):
        """Create target distribution visualization"""
        if model_type == "Regression":
            return px.histogram(
                df, 
                x=target_variable,
                title=f"Distribution of {target_variable}",
                template="plotly_white"
            )
        else:
            value_counts = df[target_variable].value_counts().reset_index()
            value_counts.columns = ['Class', 'Count']
            return px.bar(
                value_counts,
                x='Class',
                y='Count',
                title=f"Class Distribution of {target_variable}",
                template="plotly_white"
            )
    
    def create_feature_distributions(self, X, X_preprocessed_df, selected_features, numerical_cols):
        """Create feature distribution visualizations"""
        feature_dist = make_subplots(
            rows=len(selected_features), 
            cols=2,
            subplot_titles=[
                f"{feat} - Before Preprocessing" if i % 2 == 0 
                else f"{feat} - After Preprocessing"
                for feat in selected_features
                for i in range(2)
            ],
            vertical_spacing=0.1
        )
        
        for i, feature in enumerate(selected_features, 1):
            # Original distribution
            if feature in numerical_cols:
                # Numerical features - use histogram
                feature_dist.add_trace(
                    go.Histogram(
                        x=X[feature],
                        name=f"Original {feature}",
                        showlegend=False
                    ),
                    row=i, col=1
                )
            else:
                # Categorical features - use bar chart
                value_counts = X[feature].value_counts()
                feature_dist.add_trace(
                    go.Bar(
                        x=value_counts.index,
                        y=value_counts.values,
                        name=f"Original {feature}",
                        showlegend=False
                    ),
                    row=i, col=1
                )
            
            # Preprocessed distribution
            preprocessed_cols = [col for col in X_preprocessed_df.columns if feature in col]
            if len(preprocessed_cols) == 1:
                # Single preprocessed column (numerical)
                feature_dist.add_trace(
                    go.Histogram(
                        x=X_preprocessed_df[preprocessed_cols[0]],
                        name=f"Preprocessed {feature}",
                        showlegend=False
                    ),
                    row=i, col=2
                )
            else:
                # Multiple preprocessed columns (one-hot encoded)
                feature_dist.add_trace(
                    go.Bar(
                        x=preprocessed_cols,
                        y=[X_preprocessed_df[col].sum() for col in preprocessed_cols],
                        name=f"Preprocessed {feature}",
                        showlegend=False
                    ),
                    row=i, col=2
                )
        
        feature_dist.update_layout(
            height=300 * len(selected_features),
            title_text="Feature Distributions Before and After Preprocessing",
            showlegend=False,
            template="plotly_white"
        )
        
        return feature_dist

    def create_feature_relationships(self, df, X, target_variable, selected_features, numerical_cols, model_type):
        """Create feature relationship visualizations"""
        feature_rels = make_subplots(
            rows=len(selected_features), 
            cols=1,
            subplot_titles=[f"{target_variable} vs {feat}" for feat in selected_features],
            vertical_spacing=0.1
        )
        
        for i, feature in enumerate(selected_features, 1):
            if feature in numerical_cols:
                # Scatter plot for numerical features
                feature_rels.add_trace(
                    go.Scatter(
                        x=X[feature],
                        y=df[target_variable],
                        mode='markers',
                        name=feature,
                        marker=dict(
                            size=6,
                            opacity=0.6,
                            colorscale='Viridis'
                        ),
                        showlegend=False
                    ),
                    row=i, col=1
                )
            else:
                # Box plot for categorical features
                feature_rels.add_trace(
                    go.Box(
                        x=X[feature],
                        y=df[target_variable],
                        name=feature,
                        showlegend=False
                    ),
                    row=i, col=1
                )
            
            # Update axes labels
            feature_rels.update_xaxes(title_text=feature, row=i, col=1)
            if i == len(selected_features):  # Only add y-axis title for the last subplot
                feature_rels.update_yaxes(title_text=target_variable, row=i, col=1)
        
        feature_rels.update_layout(
            height=300 * len(selected_features),
            title_text=f"Feature Relationships with {target_variable}",
            showlegend=False,
            template="plotly_white"
        )
        
        return feature_rels

    def handle_model_training(self, df_state, target_var, selected_model, X_train, y_train, X_val, y_val, task_type):
        """Handle model training with hyperparameter optimization"""
        try:
            if X_train is None or y_train is None:
                return None, None, "Error: Please run data preprocessing first"

            # Data preparation
            X_train = np.array(X_train) if not isinstance(X_train, np.ndarray) else X_train
            y_train = np.array(y_train) if not isinstance(y_train, np.ndarray) else y_train
            X_val = np.array(X_val) if not isinstance(X_val, np.ndarray) else X_val
            y_val = np.array(y_val) if not isinstance(y_val, np.ndarray) else y_val
            
            print(f"Training data shapes - X_train: {X_train.shape}, y_train: {y_train.shape}")
            
            # Store selected model name
            self.selected_model_name = selected_model
            
            # Initialize trainer
            trainer = AutomaticaTrainer(
                model_class=selected_model,
                input_dim=X_train.shape[1],
                task_type=task_type.lower(),
                output_dim=1 if task_type.lower() == 'regression' else len(np.unique(y_train)),
                n_iter=5
            )
            
            # Run hyperparameter optimization
            optimizer = trainer.optimize_hyperparameters(X_train, y_train.reshape(-1, 1))
            
            # Generate and store optimization plot
            self.optimization_plot = trainer.plot_optimization_results()
            
            # Store best parameters
            self.best_params = trainer.best_params
            
            # Train final model
            training_history = trainer.train_final_model(
                X_train, 
                y_train.reshape(-1, 1),
                X_val,
                y_val.reshape(-1, 1),
                patience=5,
                min_delta=1e-4
            )
            
            # Generate and store training plot
            self.training_plot = trainer.plot_training_history(training_history)
        
            # Store model and configuration
            self.best_model = trainer.best_model
            self.batch_size = trainer.best_params.get('batch_size', 32)
            self.learning_rate = trainer.best_params.get('lr', 0.001)
            
            # Save model
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            model_filename = f'best_model_{timestamp}.joblib'
            trainer.save_model(model_filename)
            
            # Generate summary
            summary = f"""
    Training completed successfully!

    Model: {selected_model}
    Best Parameters: {trainer.best_params}
    Model saved as: {model_filename}

    Final Metrics:
    Train Loss: {training_history['train_loss'][-1]:.4f}
    Validation Loss: {training_history['val_loss'][-1]:.4f}
    Train Metric: {training_history['train_metric'][-1]:.4f}
    Validation Metric: {training_history['val_metric'][-1]:.4f}
    """
            
            # Store metrics
            self.final_metrics = self._parse_metrics_from_summary(summary)
            
            # Return plots and summary
            return self.optimization_plot, self.training_plot, summary
            
        except Exception as e:
            print(f"Detailed error in model training: {str(e)}")
            print(traceback.format_exc())
            return None, None, f"Error in model training: {str(e)}"

    def _parse_metrics_from_summary(self, summary):
        """Helper method to parse metrics from training summary"""
        metrics = {}
        try:
            # Basic parsing of the summary string to extract metrics
            lines = summary.split('\n')
            for line in lines:
                if ':' in line:
                    key, value = line.split(':', 1)
                    try:
                        metrics[key.strip()] = float(value.strip())
                    except ValueError:
                        continue
        except Exception as e:
            print(f"Error parsing metrics: {str(e)}")
        return metrics
    
    def run_model_selection(self, df_state, target_var, model_type, X_train_state, y_train_state, X_val_state, y_val_state):
        """Run model selection process and return results"""
        try:
            # Get data from state variables
            if X_train_state is None or y_train_state is None:
                return None, None, None, "Please run data preprocessing first", []

            # Convert data if needed
            X_train = np.array(X_train_state) if not isinstance(X_train_state, np.ndarray) else X_train_state
            y_train = np.array(y_train_state) if not isinstance(y_train_state, np.ndarray) else y_train_state
            X_val = np.array(X_val_state) if not isinstance(X_val_state, np.ndarray) else X_val_state
            y_val = np.array(y_val_state) if not isinstance(y_val_state, np.ndarray) else y_val_state
                
            print(f"X_train shape: {X_train.shape}")  # Debug print
            
            # Initialize selector with actual input dimension from preprocessed data
            self.model_selector = AutomaticaModelSelector(
                task_type=model_type.lower(),
                input_dim=X_train.shape[1],
                trial_epochs=100,
                batch_size=128
            )
            
            # Run trials with preprocessed data
            success = self.model_selector.run_trial(
                X_train, 
                y_train, 
                X_val, 
                y_val
            )
            
            if not success:
                return None, None, None, "Model training failed", []
            
            # Get plots
            training_curves = self.model_selector.plot_training_curves()
            metrics_plot = self.model_selector.plot_metrics_comparison()
            resource_plot = self.model_selector.plot_resource_usage()
            
            # Get summary as string
            summary_data = []
            for model_name, results in self.model_selector.results.items():
                summary_data.append({
                    'Model': model_name,
                    'Final Train Loss': f"{results.train_losses[-1]:.4f}",
                    'Final Val Loss': f"{results.val_losses[-1]:.4f}",
                    'Training Time (s)': f"{results.training_time:.2f}",
                    'Memory (MB)': f"{results.peak_memory_usage:.1f}"
                })
                summary_data[-1].update({
                    k: f"{v:.4f}" for k, v in results.val_metrics.items()
                })
            
            summary_df = pd.DataFrame(summary_data)
            summary_text = "Model Comparison Summary:\n" + summary_df.to_string()
            
            # Store the results in the UI state
            self.model_selection_results = {
                'models': self.model_selector.results,
                'best_model': model_name # Store the best model name based on validation metrics
            }
            
            return (
                training_curves,
                metrics_plot,
                resource_plot,
                summary_text,
                list(self.model_selector.results.keys())
            )
            
        except Exception as e:
            print(f"Error in model selection: {str(e)}")
            return None, None, None, f"Error in model selection: {str(e)}", []
        
    def handle_report_generation(self):
        """Handle the report generation process and open report in browser"""
        try:
            # Gather all data
            success, message = self.reporter.gather_data(self)
            if not success:
                return f"Failed to gather report data: {message}"
            
            # Generate the report
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            report_path = f"automatica_report_{timestamp}.html"
            success, message = self.reporter.generate_report(report_path)
            
            if success:
                # Get the absolute path to the report
                abs_path = os.path.abspath(report_path)
                
                # Convert the file path to a URL format
                if os.name == 'nt':  # Windows
                    url = f'file:///{abs_path.replace(os.sep, "/")}'
                else:  # Unix-like systems
                    url = f'file://{abs_path}'
                
                # Open the report in the default web browser
                try:
                    import webbrowser
                    webbrowser.open(url)
                    return f"Report generated and opened in browser! Location: {report_path}"
                except Exception as browser_error:
                    return f"Report generated at {report_path}, but couldn't open browser: {str(browser_error)}"
            else:
                return f"Failed to generate report: {message}"
                
        except Exception as e:
            return f"Error in report generation: {str(e)}"

    def build_interface(self):
        with gr.Blocks() as demo:
            # Create state objects
            df_state = gr.State()
            X_train_state = gr.State(None)
            y_train_state = gr.State(None)
            X_val_state = gr.State(None)
            y_val_state = gr.State(None)
            
            gr.Markdown("# Automatica")
            
            # File Upload
            with gr.Row():
                file_input = gr.File(label="Upload CSV File")
                file_status = gr.Textbox(label="File Status", interactive=False)
            
            # Data Visualization
            gr.Markdown("### Data Visualization")
            visualization_output = gr.HTML()
            
            # Model Configuration
            gr.Markdown("### Model Configuration")
            with gr.Row():
                target_var = gr.Dropdown(
                    label="Target Variable",
                    choices=[],
                    interactive=True
                )
                model_type = gr.Dropdown(
                    label="Model Type",
                    choices=["Regression", "Classification"],
                    value="Regression",
                    interactive=True
                )
            
            # Feature Selection
            feature_selector = gr.Dropdown(
                label="Select Features to Visualize",
                choices=[],
                multiselect=True,
                interactive=True
            )
            
            # Tabs for different sections
            with gr.Tabs():
                # Data Processing Tab
                with gr.TabItem("Data Processing"):
                    process_button = gr.Button("Analyze and Preprocess Data")
                    
                    with gr.Tabs():
                        with gr.TabItem("Target Distribution"):
                            target_dist_plot = gr.Plot()
                        with gr.TabItem("Feature Distributions"):
                            feature_dist_plot = gr.Plot()
                        with gr.TabItem("Feature Relationships"):
                            feature_rels_plot = gr.Plot()
                    
                    process_status = gr.Textbox(
                        label="Processing Status",
                        interactive=False
                    )
                
                # Model Selection Tab
                with gr.TabItem("Model Selection"):
                    gr.Markdown("### Model Selection")
                    select_model_button = gr.Button("Start Model Selection")
                    
                    with gr.Row():
                        with gr.Column():
                            training_curves_plot = gr.Plot(label="Training Curves")
                        with gr.Column():
                            metrics_plot = gr.Plot(label="Model Metrics")
                    
                    with gr.Row():
                        with gr.Column():
                            resource_plot = gr.Plot(label="Resource Usage")
                        with gr.Column():
                            model_summary = gr.Textbox(
                                label="Model Selection Summary",
                                interactive=False,
                                lines=10
                            )
                
                # Model Training Tab
                with gr.TabItem("Model Training"):
                    gr.Markdown("### Model Training and Hyperparameter Tuning")
                    
                    with gr.Row():
                        model_dropdown = gr.Dropdown(
                            label="Select Model for Training",
                            choices=["ResNetMLP", "TransformerTabular", "DenseNetTabular", "TabNetModel"],
                            interactive=True
                        )
                        train_model_button = gr.Button("Start Training")
                    
                    with gr.Tabs():
                        with gr.TabItem("Optimization Progress"):
                            optimization_plot = gr.Plot(label="Hyperparameter Optimization")
                        with gr.TabItem("Training Progress"):
                            training_plot = gr.Plot(label="Training History")
                    
                    training_summary = gr.Textbox(
                        label="Training Summary",
                        interactive=False,
                        lines=10
                    )
                    
                    # Report Generation tab
                with gr.TabItem("Report Generation"):
                    gr.Markdown("### Generate Analysis Report")
                    with gr.Row():
                        generate_report_button = gr.Button("Generate Report")
                        report_status = gr.Textbox(
                            label="Report Status",
                            interactive=False,
                            lines=3
                        )
                    
                    gr.Markdown("""
                    This will generate a comprehensive HTML report including:
                    - Dataset analysis and preprocessing details
                    - Model selection results and comparisons
                    - Final model training results and performance metrics
                    - Technical implementation details
                    """)
            
            def handle_target_selection(target, model_type_value):
                self.target_var = target
                self.model_type = model_type_value
                return None
            
            # Event handlers
            file_input.change(
                fn=self.load_and_validate_file,
                inputs=[file_input],
                outputs=[file_status, visualization_output, target_var, df_state]
            )
            
            target_var.change(
                fn=self.update_feature_selector,
                inputs=[df_state, target_var, model_type],
                outputs=[feature_selector]
            ).success(
                fn=lambda t, m: setattr(self, 'target_var', t) or setattr(self, 'model_type', m) or None,
                inputs=[target_var, model_type],
                outputs=None
            )

            model_type.change(
                fn=lambda t, m: setattr(self, 'model_type', m) or None,
                inputs=[target_var, model_type],
                outputs=None
            )
            
            process_button.click(
                fn=self.analyze_and_preprocess,
                inputs=[df_state, target_var, model_type, feature_selector],
                outputs=[
                    target_dist_plot,
                    feature_dist_plot,
                    feature_rels_plot,
                    process_status,
                    X_train_state,
                    y_train_state,
                    X_val_state,
                    y_val_state
                ]
            )

            select_model_button.click(
                fn=self.run_model_selection,
                inputs=[
                    df_state,
                    target_var,
                    model_type,
                    X_train_state,
                    y_train_state,
                    X_val_state,
                    y_val_state
                ],
                outputs=[
                    training_curves_plot,
                    metrics_plot,
                    resource_plot,
                    model_summary
                ]
            )
            
            train_model_button.click(
                fn=self.handle_model_training,
                inputs=[
                    df_state,
                    target_var,
                    model_dropdown,
                    X_train_state,
                    y_train_state,
                    X_val_state,
                    y_val_state,
                    model_type
                ],
                outputs=[
                    optimization_plot,
                    training_plot,
                    training_summary
                ]
            )
            
            generate_report_button.click(
                fn=self.handle_report_generation,
                inputs=[],
                outputs=[report_status]
            )
        
        return demo

In [None]:
# %% [markdown]
# ## Launch the Application

# %%
if __name__ == "__main__":
    app = AutomaticaUI()
    demo = app.build_interface()
    demo.launch(share=True)