# Changed RBG Scale Lenght to 2.0

In [None]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

# Machine Learning imports
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.model_selection import cross_val_score, KFold, train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor
from sklearn.linear_model import ElasticNet, Ridge, Lasso
from sklearn.svm import SVR
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, C, Matern, WhiteKernel
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error, r2_score

# Neural Network imports
import tensorflow as tf
from tensorflow.keras import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input, Add, LeakyReLU
from tensorflow.keras.optimizers import Adam, AdamW, RMSprop
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.regularizers import l1_l2

import logging
import time
from datetime import datetime

# Setup comprehensive logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class EnhancedModelTrainer:
    def __init__(self):
        self.model_performance = {}
        self.trained_models = {}
        self.feature_importance = {}
        
    def create_neural_architectures(self, input_dim, property_name):
        """Create multiple neural network architectures for comparison"""
        
        architectures = {}
        
        # Architecture 1: Deep Dense Network
        model1 = Sequential([
            Dense(256, activation='relu', input_shape=(input_dim,)),
            BatchNormalization(),
            Dropout(0.3),
            Dense(128, activation='relu'),
            BatchNormalization(),
            Dropout(0.2),
            Dense(64, activation='relu'),
            Dropout(0.1),
            Dense(32, activation='relu'),
            Dense(1, activation='linear')
        ])
        architectures['Deep_Dense'] = model1
        
        # Architecture 2: Residual Network
        def create_residual_model():
            inputs = Input(shape=(input_dim,))
            x = Dense(128, activation='relu')(inputs)
            x = BatchNormalization()(x)
            
            # Residual block 1
            shortcut = x
            x = Dense(128, activation='relu')(x)
            x = BatchNormalization()(x)
            x = Dense(128, activation='linear')(x)
            x = Add()([x, shortcut])
            x = LeakyReLU()(x)
            x = Dropout(0.2)(x)
            
            # Residual block 2
            shortcut = x
            x = Dense(128, activation='relu')(x)
            x = BatchNormalization()(x)
            x = Dense(128, activation='linear')(x)
            x = Add()([x, shortcut])
            x = LeakyReLU()(x)
            x = Dropout(0.2)(x)
            
            # Output layers
            x = Dense(64, activation='relu')(x)
            x = Dropout(0.1)(x)
            outputs = Dense(1, activation='linear')(x)
            
            return Model(inputs, outputs)
        
        architectures['Residual_Net'] = create_residual_model()
        
        # Architecture 3: Wide Network
        model3 = Sequential([
            Dense(512, activation='relu', input_shape=(input_dim,)),
            BatchNormalization(),
            Dropout(0.4),
            Dense(256, activation='relu'),
            BatchNormalization(),
            Dropout(0.3),
            Dense(128, activation='relu'),
            Dropout(0.2),
            Dense(1, activation='linear')
        ])
        architectures['Wide_Net'] = model3
        
        # Architecture 4: Regularized Network
        model4 = Sequential([
            Dense(128, activation='relu', input_shape=(input_dim,), 
                  kernel_regularizer=l1_l2(l1=0.01, l2=0.01)),
            BatchNormalization(),
            Dropout(0.3),
            Dense(64, activation='relu', kernel_regularizer=l1_l2(l1=0.01, l2=0.01)),
            BatchNormalization(),
            Dropout(0.2),
            Dense(32, activation='relu'),
            Dense(1, activation='linear')
        ])
        architectures['Regularized_Net'] = model4
        
        # Architecture 5: Ensemble-Ready Network
        model5 = Sequential([
            Dense(256, activation='elu', input_shape=(input_dim,)),
            BatchNormalization(),
            Dropout(0.25),
            Dense(128, activation='elu'),
            BatchNormalization(),
            Dropout(0.25),
            Dense(64, activation='elu'),
            Dropout(0.15),
            Dense(32, activation='relu'),
            Dense(1, activation='linear')
        ])
        architectures['Ensemble_Net'] = model5
        
        return architectures
    
    def get_traditional_models(self):
        """Define traditional ML models with optimized parameters"""
        
        models = {
            'Enhanced_GP_RBF': make_pipeline(
                StandardScaler(), 
                GaussianProcessRegressor(
                    kernel=C(1.0, (1e-3, 1e3)) * RBF(length_scale=2.0) + WhiteKernel(noise_level=1e-3),
                    n_restarts_optimizer=10, 
                    random_state=42,
                    alpha=1e-6
                )
            ),
            'Enhanced_GP_Matern': make_pipeline(
                StandardScaler(), 
                GaussianProcessRegressor(
                    kernel=C(1.0, (1e-3, 1e3)) * Matern(length_scale=2.0, nu=2.5) + WhiteKernel(noise_level=1e-3),
                    n_restarts_optimizer=10, 
                    random_state=42,
                    alpha=1e-6
                )
            ),
            'Enhanced_RF': RandomForestRegressor(
                n_estimators=200, 
                max_depth=15, 
                min_samples_split=5,
                min_samples_leaf=2,
                random_state=42, 
                n_jobs=-1
            ),
            'Extra_Trees': ExtraTreesRegressor(
                n_estimators=200,
                max_depth=15,
                min_samples_split=5,
                min_samples_leaf=2,
                random_state=42,
                n_jobs=-1
            ),
            'Gradient_Boosting': GradientBoostingRegressor(
                n_estimators=200,
                learning_rate=0.1,
                max_depth=8,
                min_samples_split=5,
                min_samples_leaf=2,
                random_state=42
            ),
            'Enhanced_ElasticNet': make_pipeline(
                StandardScaler(),
                ElasticNet(alpha=0.5, l1_ratio=0.5, random_state=42, max_iter=2000)
            ),
            'Ridge_Regression': make_pipeline(
                StandardScaler(),
                Ridge(alpha=1.0, random_state=42)
            ),
            'Enhanced_SVR_RBF': make_pipeline(
                StandardScaler(), 
                SVR(kernel='rbf', C=10.0, epsilon=0.1, gamma='scale')
            ),
            'Enhanced_SVR_Poly': make_pipeline(
                StandardScaler(), 
                SVR(kernel='poly', C=1.0, epsilon=0.1, degree=3)
            )
        }
        
        return models
    
    def evaluate_model_performance(self, model, X, y, model_name, property_name):
        """Comprehensive model evaluation with cross-validation"""
        
        try:
            # Cross-validation scores
            cv_scores = cross_val_score(model, X, y, cv=5, scoring='neg_mean_absolute_percentage_error', n_jobs=-1)
            cv_mape = -cv_scores.mean()
            cv_std = cv_scores.std()
            
            # Train-test split for detailed metrics
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
            
            # Fit and predict
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            
            # Calculate multiple metrics
            mape = mean_absolute_percentage_error(y_test, y_pred)
            mae = mean_absolute_error(y_test, y_pred)
            r2 = r2_score(y_test, y_pred)
            
            # Comprehensive logging
            logger.info(f"")
            logger.info(f"{'='*80}")
            logger.info(f"MODEL EVALUATION: {model_name} for {property_name}")
            logger.info(f"{'='*80}")
            logger.info(f"Cross-Validation MAPE: {cv_mape:.4f} ¬± {cv_std:.4f}")
            logger.info(f"Test Set MAPE: {mape:.4f}")
            logger.info(f"Test Set MAE: {mae:.4f}")
            logger.info(f"Test Set R¬≤: {r2:.4f}")
            logger.info(f"{'='*80}")
            
            return {
                'cv_mape': cv_mape,
                'cv_std': cv_std,
                'test_mape': mape,
                'test_mae': mae,
                'test_r2': r2,
                'model': model
            }
            
        except Exception as e:
            logger.error(f"Error evaluating {model_name} for {property_name}: {str(e)}")
            return None
    
    def evaluate_neural_network(self, architecture, X, y, model_name, property_name):
        """Evaluate neural network with proper training"""
        
        try:
            # Prepare data
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
            
            # Scale features
            scaler = StandardScaler()
            X_train_scaled = scaler.fit_transform(X_train)
            X_test_scaled = scaler.transform(X_test)
            
            # Compile model
            architecture.compile(
                optimizer=Adam(learning_rate=0.001),
                loss='mse',
                metrics=['mae']
            )
            
            # Callbacks
            callbacks = [
                EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True),
                ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7)
            ]
            
            # Train model
            logger.info(f"Training {model_name} for {property_name}...")
            history = architecture.fit(
                X_train_scaled, y_train,
                validation_split=0.2,
                epochs=200,
                batch_size=32,
                callbacks=callbacks,
                verbose=0
            )
            
            # Predictions
            y_pred = architecture.predict(X_test_scaled).flatten()
            
            # Calculate metrics
            mape = mean_absolute_percentage_error(y_test, y_pred)
            mae = mean_absolute_error(y_test, y_pred)
            r2 = r2_score(y_test, y_pred)
            
            # Create a pipeline for consistent interface
            class NeuralNetworkPipeline:
                def __init__(self, scaler, model):
                    self.scaler = scaler
                    self.model = model
                
                def fit(self, X, y):
                    pass  # Already trained
                
                def predict(self, X):
                    X_scaled = self.scaler.transform(X)
                    return self.model.predict(X_scaled).flatten()
            
            pipeline = NeuralNetworkPipeline(scaler, architecture)
            
            # Comprehensive logging
            logger.info(f"")
            logger.info(f"{'='*80}")
            logger.info(f"NEURAL NETWORK EVALUATION: {model_name} for {property_name}")
            logger.info(f"{'='*80}")
            logger.info(f"Final Training Loss: {history.history['loss'][-1]:.4f}")
            logger.info(f"Final Validation Loss: {history.history['val_loss'][-1]:.4f}")
            logger.info(f"Test Set MAPE: {mape:.4f}")
            logger.info(f"Test Set MAE: {mae:.4f}")
            logger.info(f"Test Set R¬≤: {r2:.4f}")
            logger.info(f"Training Epochs: {len(history.history['loss'])}")
            logger.info(f"{'='*80}")
            
            return {
                'cv_mape': mape,  # Using test MAPE as proxy
                'cv_std': 0.0,
                'test_mape': mape,
                'test_mae': mae,
                'test_r2': r2,
                'model': pipeline,
                'history': history
            }
            
        except Exception as e:
            logger.error(f"Error training {model_name} for {property_name}: {str(e)}")
            return None
    
    def get_best_model_for_property(self, X, y, property_name):
        """Find the best model for a specific property through comprehensive evaluation"""
        
        logger.info(f"\nüöÄ Starting comprehensive model evaluation for {property_name}")
        logger.info(f"Data shape: {X.shape}, Target shape: {y.shape}")
        
        all_results = {}
        
        # Evaluate traditional models
        logger.info(f"\nüìä Evaluating Traditional ML Models for {property_name}")
        traditional_models = self.get_traditional_models()
        
        for model_name, model in traditional_models.items():
            logger.info(f"\nEvaluating {model_name}...")
            result = self.evaluate_model_performance(model, X, y, model_name, property_name)
            if result:
                all_results[model_name] = result
        
        # Evaluate neural networks
        logger.info(f"\nüß† Evaluating Neural Network Architectures for {property_name}")
        neural_architectures = self.create_neural_architectures(X.shape[1], property_name)
        
        for arch_name, architecture in neural_architectures.items():
            logger.info(f"\nEvaluating {arch_name}...")
            result = self.evaluate_neural_network(architecture, X, y, arch_name, property_name)
            if result:
                all_results[arch_name] = result
        
        # Find best model
        if all_results:
            best_model_name = min(all_results.keys(), key=lambda k: all_results[k]['test_mape'])
            best_result = all_results[best_model_name]
            
            logger.info(f"\nüèÜ BEST MODEL FOR {property_name}: {best_model_name}")
            logger.info(f"Best Test MAPE: {best_result['test_mape']:.4f}")
            logger.info(f"Best Test R¬≤: {best_result['test_r2']:.4f}")
            
            # Store results
            self.model_performance[property_name] = {
                'best_model': best_model_name,
                'all_results': all_results,
                'best_performance': best_result
            }
            
            return best_result['model'], best_model_name, best_result
        
        else:
            logger.error(f"No models successfully trained for {property_name}")
            return None, None, None

# Initialize the enhanced trainer
trainer = EnhancedModelTrainer()

# Load data
logger.info("Loading training and test data...")
try:
    # Load training data
    df = pd.read_csv("/Users/MacbookPro/LocalStorage/Developer/ShellAi/dataset/train.csv")
    
    # Load test data
    test_df = pd.read_csv("/Users/MacbookPro/LocalStorage/Developer/ShellAi/dataset/test.csv")
    sample_submission = pd.read_csv("/Users/MacbookPro/LocalStorage/Developer/ShellAi/dataset/sample_solution.csv")
    
    test_ids = test_df['ID']
    test_df_features = test_df.drop(columns=['ID'])
    
    logger.info(f"Training data shape: {df.shape}")
    logger.info(f"Test data shape: {test_df.shape}")
    
except FileNotFoundError as e:
    logger.error(f"Data files not found: {e}")
    print("Please ensure all data files are in the correct location.")

# Process each property
if 'df' in locals() and 'test_df_features' in locals():
    final_predictions = {}
    submission_df = sample_submission.copy()
    
    # Log overall process start
    logger.info(f"\nüéØ STARTING COMPREHENSIVE MODEL TRAINING FOR ALL PROPERTIES")
    logger.info(f"Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    logger.info(f"{'='*100}")
    
    for i in range(1, 11):
        property_name = f'BlendProperty{i}'
        
        logger.info(f"\n\nüîÑ PROCESSING {property_name} ({i}/10)")
        logger.info(f"{'='*100}")
        
        # Define features for this property
        features = ['Component1_fraction', 'Component2_fraction', 'Component3_fraction',
                   'Component4_fraction', 'Component5_fraction'] + \
                  [f'Component{j}_Property{i}' for j in range(1, 6)]
        
        # Extract training data
        X = df[features]
        y = df[property_name]
        
        logger.info(f"Features for {property_name}: {len(features)} features")
        logger.info(f"Target statistics - Mean: {y.mean():.4f}, Std: {y.std():.4f}")
        
        # Find best model
        start_time = time.time()
        best_model, best_model_name, best_result = trainer.get_best_model_for_property(X, y, property_name)
        training_time = time.time() - start_time
        
        if best_model and best_model_name:
            # Retrain on full dataset
            logger.info(f"\nüîß Retraining {best_model_name} on full dataset for {property_name}...")
            best_model.fit(X, y)
            
            # Make predictions
            test_predictions = best_model.predict(test_df_features[features])
            submission_df[property_name] = test_predictions
            
            # Store results
            final_predictions[property_name] = {
                'model_name': best_model_name,
                'model': best_model,
                'performance': best_result,
                'training_time': training_time,
                'predictions': test_predictions
            }
            
            # Final logging for this property
            logger.info(f"‚úÖ {property_name} COMPLETE")
            logger.info(f"Best Model: {best_model_name}")
            logger.info(f"Performance: MAPE={best_result['test_mape']:.4f}, R¬≤={best_result['test_r2']:.4f}")
            logger.info(f"Training Time: {training_time:.2f} seconds")
            logger.info(f"Prediction Range: [{test_predictions.min():.4f}, {test_predictions.max():.4f}]")
            
        else:
            logger.error(f"‚ùå Failed to train models for {property_name}")
    
    # Save final submission
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    submission_filename = f'enhanced_submission_{timestamp}.csv'
    submission_df.to_csv(submission_filename, index=False)
    
    # Final summary logging
    logger.info(f"\n\nüéâ TRAINING COMPLETE FOR ALL PROPERTIES")
    logger.info(f"{'='*100}")
    logger.info(f"Submission saved as: {submission_filename}")
    
    # Print summary table
    print(f"\n{'='*120}")
    print(f"FINAL MODEL SUMMARY")
    print(f"{'='*120}")
    print(f"{'Property':<15} {'Best Model':<20} {'MAPE':<10} {'R¬≤':<10} {'Time (s)':<10}")
    print(f"{'-'*120}")
    
    for prop, details in final_predictions.items():
        print(f"{prop:<15} {details['model_name']:<20} {details['performance']['test_mape']:<10.4f} "
              f"{details['performance']['test_r2']:<10.4f} {details['training_time']:<10.2f}")
    
    print(f"{'='*120}")
    print(f"Enhanced submission file saved: {submission_filename}")

else:
    logger.error("Failed to load required data files")


In [None]:
# Advanced Model Analysis and Ensemble Generation
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import VotingRegressor
from sklearn.model_selection import learning_curve
import pickle
import json

class ModelAnalyzer:
    def __init__(self, trainer):
        self.trainer = trainer
        
    def plot_performance_comparison(self):
        """Create comprehensive performance visualizations"""
        
        if not self.trainer.model_performance:
            logger.warning("No model performance data available for plotting")
            return
        
        # Prepare data for plotting
        properties = []
        models = []
        mape_scores = []
        r2_scores = []
        
        for prop, data in self.trainer.model_performance.items():
            for model_name, results in data['all_results'].items():
                properties.append(prop)
                models.append(model_name)
                mape_scores.append(results['test_mape'])
                r2_scores.append(results['test_r2'])
        
        # Create DataFrame for plotting
        plot_df = pd.DataFrame({
            'Property': properties,
            'Model': models,
            'MAPE': mape_scores,
            'R2': r2_scores
        })
        
        # Plot 1: MAPE comparison across properties
        plt.figure(figsize=(20, 12))
        
        plt.subplot(2, 2, 1)
        pivot_mape = plot_df.pivot(index='Property', columns='Model', values='MAPE')
        sns.heatmap(pivot_mape, annot=True, fmt='.3f', cmap='RdYlBu_r')
        plt.title('MAPE Scores by Property and Model')
        plt.xticks(rotation=45, ha='right')
        
        # Plot 2: R¬≤ comparison across properties
        plt.subplot(2, 2, 2)
        pivot_r2 = plot_df.pivot(index='Property', columns='Model', values='R2')
        sns.heatmap(pivot_r2, annot=True, fmt='.3f', cmap='RdYlBu')
        plt.title('R¬≤ Scores by Property and Model')
        plt.xticks(rotation=45, ha='right')
        
        # Plot 3: Best model distribution
        plt.subplot(2, 2, 3)
        best_models = [data['best_model'] for data in self.trainer.model_performance.values()]
        model_counts = pd.Series(best_models).value_counts()
        plt.pie(model_counts.values, labels=model_counts.index, autopct='%1.1f%%')
        plt.title('Distribution of Best Models')
        
        # Plot 4: Average performance by model type
        plt.subplot(2, 2, 4)
        avg_performance = plot_df.groupby('Model')[['MAPE', 'R2']].mean().sort_values('MAPE')
        
        x = range(len(avg_performance))
        plt.bar(x, avg_performance['MAPE'], alpha=0.7, label='MAPE')
        plt.xlabel('Model')
        plt.ylabel('Average MAPE')
        plt.title('Average Performance by Model Type')
        plt.xticks(x, avg_performance.index, rotation=45, ha='right')
        plt.legend()
        
        plt.tight_layout()
        plt.show()
        
        return plot_df
    
    def create_ensemble_models(self, X_dict, y_dict, test_features):
        """Create ensemble models for improved performance"""
        
        logger.info("\nüéØ Creating Ensemble Models")
        ensemble_predictions = {}
        
        for i in range(1, 11):
            property_name = f'BlendProperty{i}'
            
            if property_name not in self.trainer.model_performance:
                continue
                
            logger.info(f"\nCreating ensemble for {property_name}...")
            
            # Get top 3 models for this property
            all_results = self.trainer.model_performance[property_name]['all_results']
            sorted_models = sorted(all_results.items(), key=lambda x: x[1]['test_mape'])[:3]
            
            ensemble_models = []
            model_names = []
            
            for model_name, results in sorted_models:
                if 'Net' not in model_name:  # Exclude neural networks from voting regressor
                    ensemble_models.append((model_name, results['model']))
                    model_names.append(model_name)
            
            if len(ensemble_models) >= 2:
                # Create voting regressor
                voting_regressor = VotingRegressor(ensemble_models)
                
                # Train on full dataset
                X = X_dict[property_name]
                y = y_dict[property_name]
                voting_regressor.fit(X, y)
                
                # Make predictions
                ensemble_pred = voting_regressor.predict(test_features[property_name])
                ensemble_predictions[property_name] = ensemble_pred
                
                logger.info(f"Ensemble for {property_name}: {model_names}")
                logger.info(f"Prediction range: [{ensemble_pred.min():.4f}, {ensemble_pred.max():.4f}]")
            
            else:
                logger.info(f"Not enough suitable models for ensemble in {property_name}")
        
        return ensemble_predictions
    
    def save_model_artifacts(self, final_predictions, ensemble_predictions=None):
        """Save all trained models and metadata"""
        
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        
        # Save model performance summary
        performance_summary = {}
        for prop, data in self.trainer.model_performance.items():
            performance_summary[prop] = {
                'best_model': data['best_model'],
                'best_mape': data['best_performance']['test_mape'],
                'best_r2': data['best_performance']['test_r2'],
                'all_models_mape': {name: results['test_mape'] 
                                   for name, results in data['all_results'].items()}
            }
        
        # Save performance summary as JSON
        with open(f'model_performance_summary_{timestamp}.json', 'w') as f:
            json.dump(performance_summary, f, indent=2)
        
        # Save individual models (non-neural network ones)
        saved_models = {}
        for prop, details in final_predictions.items():
            model_name = details['model_name']
            if 'Net' not in model_name:  # Traditional ML models
                try:
                    model_filename = f'model_{prop}_{model_name}_{timestamp}.pkl'
                    with open(model_filename, 'wb') as f:
                        pickle.dump(details['model'], f)
                    saved_models[prop] = model_filename
                except Exception as e:
                    logger.warning(f"Could not save model for {prop}: {e}")
        
        logger.info(f"\nüíæ Model artifacts saved:")
        logger.info(f"Performance summary: model_performance_summary_{timestamp}.json")
        logger.info(f"Saved {len(saved_models)} traditional ML models")
        
        return performance_summary

# Run advanced analysis if models have been trained
if 'trainer' in locals() and trainer.model_performance:
    
    # Initialize analyzer
    analyzer = ModelAnalyzer(trainer)
    
    # Create performance visualizations
    logger.info("\nüìä Creating performance visualizations...")
    plot_data = analyzer.plot_performance_comparison()
    
    # Create ensemble models if we have trained models
    if 'final_predictions' in locals():
        logger.info("\nüîÆ Creating ensemble predictions...")
        
        # Prepare data dictionaries for ensemble
        X_dict = {}
        y_dict = {}
        test_features_dict = {}
        
        for i in range(1, 11):
            property_name = f'BlendProperty{i}'
            features = ['Component1_fraction', 'Component2_fraction', 'Component3_fraction',
                       'Component4_fraction', 'Component5_fraction'] + \
                      [f'Component{j}_Property{i}' for j in range(1, 6)]
            
            X_dict[property_name] = df[features]
            y_dict[property_name] = df[property_name]
            test_features_dict[property_name] = test_df_features[features]
        
        # Create ensemble predictions
        ensemble_predictions = analyzer.create_ensemble_models(X_dict, y_dict, test_features_dict)
        
        # Create ensemble submission if we have predictions
        if ensemble_predictions:
            ensemble_submission = sample_submission.copy()
            for prop, predictions in ensemble_predictions.items():
                ensemble_submission[prop] = predictions
            
            ensemble_filename = f'ensemble_submission_{timestamp}.csv'
            ensemble_submission.to_csv(ensemble_filename, index=False)
            logger.info(f"Ensemble submission saved: {ensemble_filename}")
        
        # Save model artifacts
        logger.info("\nüíæ Saving model artifacts...")
        performance_summary = analyzer.save_model_artifacts(final_predictions, ensemble_predictions)
        
        # Print final comprehensive summary
        print(f"\n{'='*140}")
        print(f"COMPREHENSIVE MODEL ANALYSIS COMPLETE")
        print(f"{'='*140}")
        print(f"üìä Performance data plotted and analyzed")
        print(f"üîÆ Ensemble models created for applicable properties")
        print(f"üíæ Model artifacts saved with timestamp: {timestamp}")
        print(f"üìà Best overall models by property:")
        
        for prop, summary in performance_summary.items():
            print(f"   {prop:<15}: {summary['best_model']:<20} (MAPE: {summary['best_mape']:.4f})")
        
        print(f"{'='*140}")

else:
    logger.info("No trained models available for advanced analysis")
    print("Please run the model training cell first to generate models for analysis.")