In [None]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import RobustScaler, StandardScaler
from sklearn.feature_selection import SelectFromModel
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor, StackingRegressor
from sklearn.linear_model import Ridge, ElasticNet, HuberRegressor, BayesianRidge, Lasso, LinearRegression
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from catboost import CatBoostRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C, Matern, WhiteKernel
from scipy.stats import skew, kurtosis
from sklearn.metrics import mean_absolute_percentage_error
try:
    from scikeras.wrappers import KerasRegressor
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import Dense, Dropout
    KERAS_AVAILABLE = True
except ImportError:
    KERAS_AVAILABLE = False
    print("Warning: TensorFlow/Keras not available. Neural network will be skipped.")
import logging
import time
from datetime import datetime

#########FIX###########
from sklearn.model_selection import KFold

kf = KFold(n_splits=5, shuffle=True, random_state=42)
# n_splits = 5
#######################

# Load Data
print("Loading data...")
train = pd.read_csv('/Users/MacbookPro/LocalStorage/Developer/ShellAi/dataset/train.csv')
test = pd.read_csv('/Users/MacbookPro/LocalStorage/Developer/ShellAi/dataset/test.csv')

# Breakthrough Feature Engineering
def create_breakthrough_features(df, pca_model=None, scaler=None, fit_transformers=True):
    features = [f'Component{i}_fraction' for i in range(1, 6)]
    features += [f'Component{i}_Property{j}' for i in range(1, 6) for j in range(1, 11)]

    # Enhanced interaction features with non-linear transformations
    for i in range(1, 6):
        for j in range(1, 11):
            df[f'frac{i}_prop{j}'] = df[f'Component{i}_fraction'] * df[f'Component{i}_Property{j}']
            df[f'frac{i}_prop{j}_sqrt'] = df[f'Component{i}_fraction'] * np.sqrt(np.abs(df[f'Component{i}_Property{j}']))
            df[f'frac{i}_prop{j}_log'] = df[f'Component{i}_fraction'] * np.log(np.abs(df[f'Component{i}_Property{j}']) + 1)
            df[f'frac{i}_prop{j}_square'] = df[f'Component{i}_fraction'] * (df[f'Component{i}_Property{j}'] ** 2)
            features.extend([f'frac{i}_prop{j}', f'frac{i}_prop{j}_sqrt', f'frac{i}_prop{j}_log', f'frac{i}_prop{j}_square'])

    # Advanced weighted features with multiple aggregation methods
    for j in range(1, 11):
        prop_cols = [f'Component{i}_Property{j}' for i in range(1, 6)]
        frac_cols = [f'Component{i}_fraction' for i in range(1, 6)]

        # Multiple weighted aggregations
        df[f'weighted_mean_prop{j}'] = sum(
            df[f'Component{i}_fraction'] * df[f'Component{i}_Property{j}'] for i in range(1, 6)
        )
        mean = df[f'weighted_mean_prop{j}']
        df[f'weighted_var_prop{j}'] = sum(
            df[f'Component{i}_fraction'] * (df[f'Component{i}_Property{j}'] - mean) ** 2 for i in range(1, 6)
        )

    # Harmonic mean (important for fuel properties)
    safe_props = [np.maximum(df[f'Component{i}_Property{j}'], 1e-6) for i in range(1, 6)]
    harmonic_mean = sum(df[f'Component{i}_fraction'] / safe_props[i-1] for i in range(1, 6))
    df[f'harmonic_mean_prop{j}'] = 1 / harmonic_mean

    # Geometric mean (for multiplicative properties)
    log_geo_mean = sum(df[f'Component{i}_fraction'] * np.log(safe_props[i-1]) for i in range(1, 6))
    df[f'geometric_mean_prop{j}'] = np.exp(log_geo_mean)

    # Component dominance with ranking
    frac_array = np.array([df[f'Component{i}_fraction'] for i in range(1, 6)])
    dominant_idx = np.argmax(frac_array, axis=0)
    df[f'dominant_prop{j}'] = df.apply(lambda row:
        row[f'Component{dominant_idx[row.name] + 1}_Property{j}'], axis=1)

    # Blend balance and diversity
    frac_cols = [f'Component{i}_fraction' for i in range(1, 6)]
    df[f'blend_balance_prop{j}'] = 1 - df[frac_cols].std(axis=1)
    df[f'blend_diversity_prop{j}'] = df[frac_cols].std(axis=1) / (df[frac_cols].mean(axis=1) + 1e-8)

    # Advanced statistics
    for j in range(1, 11):
        prop_cols = [f'Component{i}_Property{j}' for i in range(1, 6)]
        df[f'min_prop{j}'] = df[prop_cols].min(axis=1)
        df[f'max_prop{j}'] = df[prop_cols].max(axis=1)
        df[f'mean_prop{j}'] = df[prop_cols].mean(axis=1)
        df[f'std_prop{j}'] = df[prop_cols].std(axis=1)
        df[f'median_prop{j}'] = df[prop_cols].median(axis=1)
        df[f'skew_prop{j}'] = df[prop_cols].apply(lambda row: skew(row), axis=1)
        df[f'kurtosis_prop{j}'] = df[prop_cols].apply(lambda row: kurtosis(row), axis=1)
        df[f'range_prop{j}'] = df[f'max_prop{j}'] - df[f'min_prop{j}']
        df[f'iqr_prop{j}'] = df[prop_cols].quantile(0.75, axis=1) - df[prop_cols].quantile(0.25, axis=1)

        features.extend([
            f'min_prop{j}', f'max_prop{j}', f'mean_prop{j}',
            f'std_prop{j}', f'median_prop{j}', f'skew_prop{j}', f'kurtosis_prop{j}',
            f'range_prop{j}', f'iqr_prop{j}'
        ])


    # Shell-specific advanced features
    for j in range(1, 11):
        fractions = [df[f'Component{i}_fraction'] for i in range(1, 6)]
        props = [df[f'Component{i}_Property{j}'] for i in range(1, 6)]
        safe_props = [np.maximum(p, 1e-6) for p in props]

        # RON-like blending (non-linear octane)
        ron_blend = sum(f * (r ** 1.5) for f, r in zip(fractions, safe_props)) ** (1 / 1.5)
        df[f'ron_like_blend_prop{j}'] = ron_blend

        # Viscosity-like blending (logarithmic)
        log_visc_blend = sum(f * np.log(r) for f, r in zip(fractions, safe_props))
        df[f'log_visc_blend_prop{j}'] = log_visc_blend

        # Density-like blending (linear but with corrections)
        density_blend = sum(f * r for f, r in zip(fractions, safe_props))
        df[f'density_blend_prop{j}'] = density_blend

    # Reid vapor pressure-like (exponential)
        rvp_blend = sum(f * np.exp(r / 100) for f, r in zip(fractions, safe_props))
        df[f'rvp_blend_prop{j}'] = rvp_blend

        features.extend([
            f'ron_like_blend_prop{j}', f'log_visc_blend_prop{j}',
            f'density_blend_prop{j}', f'rvp_blend_prop{j}'
        ])

    # Cross-property interactions (most important combinations)
    for j1 in range(1, 6):
        for j2 in range(j1 + 1, 7):
            df[f'prop{j1}_prop{j2}_interaction'] = df[f'weighted_mean_prop{j1}'] * df[f'weighted_mean_prop{j2}']
            df[f'prop{j1}_prop{j2}_ratio'] = df[f'weighted_mean_prop{j1}'] / (df[f'weighted_mean_prop{j2}'] + 1e-8)
            features.extend([f'prop{j1}_prop{j2}_interaction', f'prop{j1}_prop{j2}_ratio'])

    # Enhanced PCA with more components
    prop_features = [f'Component{i}_Property{j}' for i in range(1, 6) for j in range(1, 11)]
    if fit_transformers:
        pca = PCA(n_components=12, random_state=42)
        pca_feats = pca.fit_transform(df[prop_features])
    else:
        pca = pca_model
        pca_feats = pca.transform(df[prop_features])


    for k in range(12):
        df[f'pca_prop_{k+1}'] = pca_feats[:, k]
        features.append(f'pca_prop_{k+1}')

    # Fraction-based advanced features
    frac_cols = [f'Component{i}_fraction' for i in range(1, 6)]
    df['frac_sum'] = df[frac_cols].sum(axis=1)
    df['frac_std'] = df[frac_cols].std(axis=1)
    df['frac_skew'] = df[frac_cols].apply(lambda row: skew(row), axis=1)
    df['frac_kurtosis'] = df[frac_cols].apply(lambda row: kurtosis(row), axis=1)
    df['frac_entropy'] = -sum(df[f'Component{i}_fraction'] * np.log(df[f'Component{i}_fraction'] + 1e-8) for i in range(1, 6))
    df['frac_gini'] = 1 - sum(df[f'Component{i}_fraction'] ** 2 for i in range(1, 6))

    features.extend(['frac_sum', 'frac_std', 'frac_skew', 'frac_kurtosis', 'frac_entropy', 'frac_gini'])

    return df, features, pca

# Apply feature engineering
print("Creating breakthrough features...")
train, feat_cols, pca_model = create_breakthrough_features(train, fit_transformers=True)
test, _, _ = create_breakthrough_features(test, pca_model=pca_model, fit_transformers=False)

# Prepare Data
TARGETS = [f'BlendProperty{i}' for i in range(1, 11)]
X_train = train[feat_cols]
y_train = train[TARGETS]
X_test = test[feat_cols]

# Handle NaN values
print("Handling NaN values...")

# Handle NaN values
print("Handling NaN values...")
X_train = X_train.fillna(0)
X_test = X_test.fillna(0)

# Feature scaling for different models
scaler_robust = RobustScaler()
X_train_robust = scaler_robust.fit_transform(X_train)
X_test_robust = scaler_robust.transform(X_test)

scaler_standard = StandardScaler()
X_train_standard = scaler_standard.fit_transform(X_train)
X_test_standard = scaler_standard.transform(X_test)

# Feature selection for some models
print("Performing feature selection...")
selector = SelectFromModel(
    RandomForestRegressor(n_estimators=200, random_state=42),
    prefit=False,
    threshold='median'
)

X_train_selected = selector.fit_transform(X_train, y_train.iloc[:, 0])
X_test_selected = selector.transform(X_test)

selected_features = [feat_cols[i] for i in range(len(feat_cols)) if selector.get_support()[i]]
print(f"Original features: {len(feat_cols)}")
print(f"Selected features: {len(selected_features)}")

# Cross-Validation Setup
kf = KFold(n_splits=5, shuffle=True, random_state=42)
final_preds = np.zeros((X_test.shape[0], len(TARGETS)))

print("Training Breakthrough Ensemble...")
print(f"Features: {len(feat_cols)} (selected: {len(selected_features)})")

# Define a simple Keras ANN model builder (if available)
if KERAS_AVAILABLE:
    def build_ann(input_dim):
        model = Sequential()
        model.add(Dense(128, activation='relu', input_dim=input_dim))
        model.add(Dropout(0.2))
        model.add(Dense(64, activation='relu'))
        model.add(Dropout(0.1))
        model.add(Dense(1, activation='linear'))
        model.compile(optimizer='adam', loss='mae')
        return model

for i, target in enumerate(TARGETS):
    print(f"\nTraining for {target}...")

    # Out-of-fold predictions for each model
    rf_oof = np.zeros(X_train.shape[0])
    et_oof = np.zeros(X_train.shape[0])
    gb_oof = np.zeros(X_train.shape[0])
    ridge_oof = np.zeros(X_train.shape[0])
    elastic_oof = np.zeros(X_train.shape[0])
    huber_oof = np.zeros(X_train.shape[0])
    gpr_oof = np.zeros(X_train.shape[0])
    svr_oof = np.zeros(X_train.shape[0])
    knn_oof = np.zeros(X_train.shape[0])
    br_oof = np.zeros(X_train.shape[0])
    rf2_oof = np.zeros(X_train.shape[0])
    et2_oof = np.zeros(X_train.shape[0])
    knn2_oof = np.zeros(X_train.shape[0])
    stacking_oof = np.zeros(X_train.shape[0])
    gpr2_oof = np.zeros(X_train.shape[0])
    
    # Test predictions for each model
    rf_test_preds = np.zeros(X_test.shape[0])
    et_test_preds = np.zeros(X_test.shape[0])
    gb_test_preds = np.zeros(X_test.shape[0])
    ridge_test_preds = np.zeros(X_test.shape[0])
    elastic_test_preds = np.zeros(X_test.shape[0])
    huber_test_preds = np.zeros(X_test.shape[0])
    gpr_test_preds = np.zeros(X_test.shape[0])
    svr_test_preds = np.zeros(X_test.shape[0])
    knn_test_preds = np.zeros(X_test.shape[0])
    br_test_preds = np.zeros(X_test.shape[0])
    rf2_test_preds = np.zeros(X_test.shape[0])
    et2_test_preds = np.zeros(X_test.shape[0])
    knn2_test_preds = np.zeros(X_test.shape[0])
    stacking_test_preds = np.zeros(X_test.shape[0])
    gpr2_test_preds = np.zeros(X_test.shape[0])

    if KERAS_AVAILABLE:
        ann_oof = np.zeros(X_train.shape[0])
        ann_test_preds = np.zeros(X_test.shape[0])

    for fold, (tr_idx, val_idx) in enumerate(kf.split(X_train)):

        # Model 1: Optimized Random Forest
        model_rf = RandomForestRegressor(
            n_estimators=800, max_depth=20, min_samples_split=5,
            min_samples_leaf=2, random_state=fold, n_jobs=-1
        )
        model_rf.fit(X_train.iloc[tr_idx], y_train[target].iloc[tr_idx])
        rf_oof[val_idx] = model_rf.predict(X_train.iloc[val_idx])
        rf_test_preds += model_rf.predict(X_test) / kf.get_n_splits()

        # Model 1b: Random Forest with different params
        model_rf2 = RandomForestRegressor(
            n_estimators=500, max_depth=15, min_samples_split=10,
            min_samples_leaf=4, random_state=fold, n_jobs=-1
        )
        model_rf2.fit(X_train.iloc[tr_idx], y_train[target].iloc[tr_idx])
        rf2_oof[val_idx] = model_rf2.predict(X_train.iloc[val_idx])
        rf2_test_preds += model_rf2.predict(X_test) / kf.get_n_splits()

        # Model 2: Extra Trees
        model_et = ExtraTreesRegressor(
            n_estimators=600, max_depth=18, min_samples_split=3,
            min_samples_leaf=1, random_state=fold, n_jobs=-1
        )
        model_et.fit(X_train.iloc[tr_idx], y_train[target].iloc[tr_idx])
        et_oof[val_idx] = model_et.predict(X_train.iloc[val_idx])
        et_test_preds += model_et.predict(X_test) / kf.get_n_splits()

        # Model 2b: Extra Trees with different params
        model_et2 = ExtraTreesRegressor(
            n_estimators=400, max_depth=25, min_samples_split=5,
            min_samples_leaf=2, random_state=fold, n_jobs=-1
        )
        model_et2.fit(X_train.iloc[tr_idx], y_train[target].iloc[tr_idx])
        et2_oof[val_idx] = model_et2.predict(X_train.iloc[val_idx])
        et2_test_preds += model_et2.predict(X_test) / kf.get_n_splits()

        # Model 3: Gradient Boosting
        model_gb = GradientBoostingRegressor(
            n_estimators=500, learning_rate=0.01, max_depth=6,
            min_samples_split=5, min_samples_leaf=2, random_state=fold
        )

        model_gb.fit(X_train.iloc[tr_idx], y_train[target].iloc[tr_idx])
        gb_oof[val_idx] = model_gb.predict(X_train.iloc[val_idx])
        gb_test_preds += model_gb.predict(X_test) / kf.get_n_splits()

        # Model 4: Ridge (with robust scaling)
        model_ridge = Ridge(alpha=0.03, random_state=fold)
        model_ridge.fit(X_train_robust[tr_idx], y_train[target].iloc[tr_idx])
        ridge_oof[val_idx] = model_ridge.predict(X_train_robust[val_idx])
        ridge_test_preds += model_ridge.predict(X_test_robust) / kf.get_n_splits()

        # Model 5: Elastic Net (with standard scaling)
        model_elastic = ElasticNet(alpha=0.001, l1_ratio=0.3, random_state=fold, max_iter=2000)
        model_elastic.fit(X_train_standard[tr_idx], y_train[target].iloc[tr_idx])
        elastic_oof[val_idx] = model_elastic.predict(X_train_standard[val_idx])
        elastic_test_preds += model_elastic.predict(X_test_standard) / kf.get_n_splits()

        # Model 6: Huber (robust to outliers)
        model_huber = HuberRegressor(alpha=0.01, epsilon=1.35)
        model_huber.fit(X_train_robust[tr_idx], y_train[target].iloc[tr_idx])
        huber_oof[val_idx] = model_huber.predict(X_train_robust[val_idx])
        huber_test_preds += model_huber.predict(X_test_robust) / kf.get_n_splits()

        # Model 7: Gaussian Process Regression (with selected features for computational efficiency)
        kernel = C(1.0, (1e-3, 1e3)) * RBF(1.0, (1e-2, 1e2)) + WhiteKernel(noise_level=1e-5, noise_level_bounds=(1e-10, 1e+1))
        model_gpr = GaussianProcessRegressor(
            kernel=kernel, 
            alpha=1e-6, 
            n_restarts_optimizer=3,
            random_state=fold
        )
        # Use selected features for GPR to reduce computational complexity
        model_gpr.fit(X_train_selected[tr_idx], y_train[target].iloc[tr_idx])
        gpr_oof[val_idx] = model_gpr.predict(X_train_selected[val_idx])
        gpr_test_preds += model_gpr.predict(X_test_selected) / kf.get_n_splits()

        # Model 8: SVR
        model_svr = SVR(C=1.0, epsilon=0.1)
        model_svr.fit(X_train_standard[tr_idx], y_train[target].iloc[tr_idx])
        svr_oof[val_idx] = model_svr.predict(X_train_standard[val_idx])
        svr_test_preds += model_svr.predict(X_test_standard) / kf.get_n_splits()

        # Model 9: KNeighborsRegressor
        model_knn = KNeighborsRegressor(n_neighbors=5)
        model_knn.fit(X_train_standard[tr_idx], y_train[target].iloc[tr_idx])
        knn_oof[val_idx] = model_knn.predict(X_train_standard[val_idx])
        knn_test_preds += model_knn.predict(X_test_standard) / kf.get_n_splits()
        
        # Model 9b: KNeighborsRegressor with different k
        model_knn2 = KNeighborsRegressor(n_neighbors=10)
        model_knn2.fit(X_train_standard[tr_idx], y_train[target].iloc[tr_idx])
        knn2_oof[val_idx] = model_knn2.predict(X_train_standard[val_idx])
        knn2_test_preds += model_knn2.predict(X_test_standard) / kf.get_n_splits()

        # Model 10: BayesianRidge
        model_br = BayesianRidge()
        model_br.fit(X_train_standard[tr_idx], y_train[target].iloc[tr_idx])
        br_oof[val_idx] = model_br.predict(X_train_standard[val_idx])
        br_test_preds += model_br.predict(X_test_standard) / kf.get_n_splits()

        # Stacking Regressor (using top base models)
        stacker = StackingRegressor(
            estimators=[
                ('rf', RandomForestRegressor(n_estimators=200, random_state=fold)),
                ('ridge', Ridge(alpha=0.03, random_state=fold)),
                ('gb', GradientBoostingRegressor(n_estimators=100, random_state=fold)),
                ('et', ExtraTreesRegressor(n_estimators=100, random_state=fold))
            ],
            final_estimator=Ridge(alpha=0.01),
            n_jobs=-1
        )
        stacker.fit(X_train.iloc[tr_idx], y_train[target].iloc[tr_idx])
        stacking_oof[val_idx] = stacker.predict(X_train.iloc[val_idx])
        stacking_test_preds += stacker.predict(X_test) / kf.get_n_splits()
        
        # Gaussian Process Regressor 2 (with custom parameters)
        kernel2 = C(1.0, (1e-3, 1e3)) * RBF(length_scale=2.0) + WhiteKernel(noise_level=1e-3)
        model_gpr2 = GaussianProcessRegressor(
            kernel=kernel2,
            n_restarts_optimizer=10, 
            random_state=42,
            alpha=1e-6
        )
        # Use selected features for GPR2 to reduce computational complexity
        model_gpr2.fit(X_train_selected[tr_idx], y_train[target].iloc[tr_idx])
        gpr2_oof[val_idx] = model_gpr2.predict(X_train_selected[val_idx])
        gpr2_test_preds += model_gpr2.predict(X_test_selected) / kf.get_n_splits()
        
        # ANN/Neural Network (if available)
        if KERAS_AVAILABLE:
            ann = KerasRegressor(build_ann, input_dim=X_train_standard.shape[1], epochs=30, batch_size=32, verbose=0)
            ann.fit(X_train_standard[tr_idx], y_train[target].iloc[tr_idx])
            ann_oof[val_idx] = ann.predict(X_train_standard[val_idx])
            ann_test_preds += ann.predict(X_test_standard) / kf.get_n_splits()

    # Calculate individual model MAPE
    rf_mape = mean_absolute_percentage_error(y_train[target], rf_oof)
    et_mape = mean_absolute_percentage_error(y_train[target], et_oof)
    gb_mape = mean_absolute_percentage_error(y_train[target], gb_oof)
    ridge_mape = mean_absolute_percentage_error(y_train[target], ridge_oof)
    elastic_mape = mean_absolute_percentage_error(y_train[target], elastic_oof)
    huber_mape = mean_absolute_percentage_error(y_train[target], huber_oof)
    gpr_mape = mean_absolute_percentage_error(y_train[target], gpr_oof)
    svr_mape = mean_absolute_percentage_error(y_train[target], svr_oof)
    knn_mape = mean_absolute_percentage_error(y_train[target], knn_oof)
    br_mape = mean_absolute_percentage_error(y_train[target], br_oof)
    rf2_mape = mean_absolute_percentage_error(y_train[target], rf2_oof)
    et2_mape = mean_absolute_percentage_error(y_train[target], et2_oof)
    knn2_mape = mean_absolute_percentage_error(y_train[target], knn2_oof)
    stacking_mape = mean_absolute_percentage_error(y_train[target], stacking_oof)
    gpr2_mape = mean_absolute_percentage_error(y_train[target], gpr2_oof)

    #-----
    # Advanced ensemble: Exponential weighting based on validation performance
    mape_scores = [rf_mape, et_mape, gb_mape, ridge_mape, elastic_mape, huber_mape, gpr_mape, svr_mape, knn_mape, br_mape, rf2_mape, et2_mape, knn2_mape, stacking_mape, gpr2_mape]
    test_preds_list = [rf_test_preds, et_test_preds, gb_test_preds, ridge_test_preds, elastic_test_preds, huber_test_preds, gpr_test_preds, svr_test_preds, knn_test_preds, br_test_preds, rf2_test_preds, et2_test_preds, knn2_test_preds, stacking_test_preds, gpr2_test_preds]
    oof_preds_list = [rf_oof, et_oof, gb_oof, ridge_oof, elastic_oof, huber_oof, gpr_oof, svr_oof, knn_oof, br_oof, rf2_oof, et2_oof, knn2_oof, stacking_oof, gpr2_oof]
    
    if KERAS_AVAILABLE:
        ann_mape = mean_absolute_percentage_error(y_train[target], ann_oof)
        mape_scores.append(ann_mape)
        test_preds_list.append(ann_test_preds)
        oof_preds_list.append(ann_oof)
    
    weights = [np.exp(-score * 10) for score in mape_scores]  # Exponential weighting
    total_weight = sum(weights)
    weights = [w / total_weight for w in weights]

    # Final predictions
    final_preds[:, i] = sum(w * pred for w, pred in zip(weights, test_preds_list))

    # Ensemble validation score
    ensemble_oof = sum(w * pred for w, pred in zip(weights, oof_preds_list))
    ensemble_mape = mean_absolute_percentage_error(y_train[target], ensemble_oof)

    print(f"Random Forest MAPE: {rf_mape:.4f} (weight: {weights[0]:.3f})")
    print(f"Random Forest 2 MAPE: {rf2_mape:.4f} (weight: {weights[10]:.3f})")
    print(f"Extra Trees MAPE: {et_mape:.4f} (weight: {weights[1]:.3f})")
    print(f"Extra Trees 2 MAPE: {et2_mape:.4f} (weight: {weights[11]:.3f})")
    print(f"Gradient Boosting MAPE: {gb_mape:.4f} (weight: {weights[2]:.3f})")
    print(f"Ridge MAPE: {ridge_mape:.4f} (weight: {weights[3]:.3f})")
    print(f"Elastic Net MAPE: {elastic_mape:.4f} (weight: {weights[4]:.3f})")
    print(f"Huber MAPE: {huber_mape:.4f} (weight: {weights[5]:.3f})")
    print(f"Gaussian Process MAPE: {gpr_mape:.4f} (weight: {weights[6]:.3f})")
    print(f"GPR2 (Custom) MAPE: {gpr2_mape:.4f} (weight: {weights[14]:.3f})")
    print(f"SVR MAPE: {svr_mape:.4f} (weight: {weights[7]:.3f})")
    print(f"KNeighbors MAPE: {knn_mape:.4f} (weight: {weights[8]:.3f})")
    print(f"KNeighbors 2 MAPE: {knn2_mape:.4f} (weight: {weights[12]:.3f})")
    print(f"Bayesian Ridge MAPE: {br_mape:.4f} (weight: {weights[9]:.3f})")
    print(f"StackingRegressor MAPE: {stacking_mape:.4f} (weight: {weights[13]:.3f})")
    if KERAS_AVAILABLE:
        print(f"ANN MAPE: {ann_mape:.4f} (weight: {weights[15]:.3f})")
    print(f"Ensemble MAPE: {ensemble_mape:.4f}")

# Create Submission
submission = pd.DataFrame(final_preds, columns=TARGETS)
submission.insert(0, 'ID', test.get('ID', np.arange(1, len(test) + 1)))
submission.to_csv('submission_breakthrough_mega_ensemble_fixed.csv', index=False)

print("\nSubmission file created: submission_breakthrough_mega_ensemble_fixed.csv")
print("\nBreakthrough Mega Ensemble Summary:")
print(f"Features: {len(feat_cols)} (selected: {len(selected_features)})")
print("Cross-validation: 5-fold")
model_count = 15 + (1 if KERAS_AVAILABLE else 0)
model_list = "Random Forest (x2), Extra Trees (x2), Gradient Boosting, Ridge, Elastic Net, Huber, Gaussian Process (x2), SVR, KNeighbors (x2), Bayesian Ridge, Stacking Regressor"
if KERAS_AVAILABLE:
    model_list += ", Neural Network"
print(f"Models ({model_count}): {model_list}")
print("Ensemble: Exponential weighting based on validation performance")
print("Scaling: Robust and Standard scaling for different models")
print("Target: 90+ score with breakthrough features and mega ensemble")

Loading data...
Creating breakthrough features...


  df[f'frac{i}_prop{j}_square'] = df[f'Component{i}_fraction'] * (df[f'Component{i}_Property{j}'] ** 2)
  df[f'frac{i}_prop{j}'] = df[f'Component{i}_fraction'] * df[f'Component{i}_Property{j}']
  df[f'frac{i}_prop{j}_sqrt'] = df[f'Component{i}_fraction'] * np.sqrt(np.abs(df[f'Component{i}_Property{j}']))
  df[f'frac{i}_prop{j}_log'] = df[f'Component{i}_fraction'] * np.log(np.abs(df[f'Component{i}_Property{j}']) + 1)
  df[f'frac{i}_prop{j}_square'] = df[f'Component{i}_fraction'] * (df[f'Component{i}_Property{j}'] ** 2)
  df[f'frac{i}_prop{j}'] = df[f'Component{i}_fraction'] * df[f'Component{i}_Property{j}']
  df[f'frac{i}_prop{j}_sqrt'] = df[f'Component{i}_fraction'] * np.sqrt(np.abs(df[f'Component{i}_Property{j}']))
  df[f'frac{i}_prop{j}_log'] = df[f'Component{i}_fraction'] * np.log(np.abs(df[f'Component{i}_Property{j}']) + 1)
  df[f'frac{i}_prop{j}_square'] = df[f'Component{i}_fraction'] * (df[f'Component{i}_Property{j}'] ** 2)
  df[f'frac{i}_prop{j}'] = df[f'Component{i}_fraction'] 

Handling NaN values...
Handling NaN values...
Performing feature selection...
Original features: 433
Selected features: 217
Training Breakthrough Ensemble...
Features: 433 (selected: 217)

Training for BlendProperty1...
Original features: 433
Selected features: 217
Training Breakthrough Ensemble...
Features: 433 (selected: 217)

Training for BlendProperty1...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  

Random Forest MAPE: 6.5194 (weight: 0.000)
Random Forest 2 MAPE: 6.7153 (weight: 0.000)
Extra Trees MAPE: 7.7260 (weight: 0.000)
Extra Trees 2 MAPE: 6.6252 (weight: 0.000)
Gradient Boosting MAPE: 2.5464 (weight: 0.000)
Ridge MAPE: 0.0402 (weight: 0.276)
Elastic Net MAPE: 0.0269 (weight: 0.316)
Huber MAPE: 2.8509 (weight: 0.000)
Gaussian Process MAPE: 1.9927 (weight: 0.000)
GPR2 (Custom) MAPE: 1.6634 (weight: 0.000)
SVR MAPE: 1.4575 (weight: 0.000)
KNeighbors MAPE: 4.9352 (weight: 0.000)
KNeighbors 2 MAPE: 4.6202 (weight: 0.000)
Bayesian Ridge MAPE: 0.0015 (weight: 0.407)
StackingRegressor MAPE: 0.5394 (weight: 0.002)
ANN MAPE: 6.2663 (weight: 0.000)
Ensemble MAPE: 0.0149

Training for BlendProperty2...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  

Random Forest MAPE: 1.7459 (weight: 0.000)
Random Forest 2 MAPE: 1.7552 (weight: 0.000)
Extra Trees MAPE: 1.5134 (weight: 0.000)
Extra Trees 2 MAPE: 1.5091 (weight: 0.000)
Gradient Boosting MAPE: 1.2271 (weight: 0.000)
Ridge MAPE: 0.2432 (weight: 0.245)
Elastic Net MAPE: 0.2200 (weight: 0.309)
Huber MAPE: 2.0006 (weight: 0.000)
Gaussian Process MAPE: 1.4230 (weight: 0.000)
GPR2 (Custom) MAPE: 2.0062 (weight: 0.000)
SVR MAPE: 1.0374 (weight: 0.000)
KNeighbors MAPE: 2.2993 (weight: 0.000)
KNeighbors 2 MAPE: 1.8629 (weight: 0.000)
Bayesian Ridge MAPE: 0.2256 (weight: 0.292)
StackingRegressor MAPE: 0.2904 (weight: 0.153)
ANN MAPE: 0.9952 (weight: 0.000)
Ensemble MAPE: 0.2226

Training for BlendProperty3...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  

Random Forest MAPE: 1.7066 (weight: 0.000)
Random Forest 2 MAPE: 1.7131 (weight: 0.000)
Extra Trees MAPE: 1.4699 (weight: 0.002)
Extra Trees 2 MAPE: 1.5651 (weight: 0.001)
Gradient Boosting MAPE: 1.3260 (weight: 0.009)
Ridge MAPE: 1.1278 (weight: 0.062)
Elastic Net MAPE: 0.9481 (weight: 0.372)
Huber MAPE: 1.8455 (weight: 0.000)
Gaussian Process MAPE: 2.7593 (weight: 0.000)
GPR2 (Custom) MAPE: 1.9424 (weight: 0.000)
SVR MAPE: 1.4808 (weight: 0.002)
KNeighbors MAPE: 2.4926 (weight: 0.000)
KNeighbors 2 MAPE: 2.0600 (weight: 0.000)
Bayesian Ridge MAPE: 1.0191 (weight: 0.183)
StackingRegressor MAPE: 0.9540 (weight: 0.351)
ANN MAPE: 1.2466 (weight: 0.019)
Ensemble MAPE: 0.8889

Training for BlendProperty4...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
