In [None]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import RobustScaler, StandardScaler, PolynomialFeatures, PowerTransformer
from sklearn.feature_selection import SelectFromModel, RFE, SelectKBest, f_regression
from sklearn.model_selection import KFold, cross_val_score
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor, VotingRegressor
from sklearn.linear_model import Ridge, ElasticNet, HuberRegressor, Lasso, BayesianRidge
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from lightgbm import LGBMRegressor, early_stopping, log_evaluation
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from scipy.stats import skew, kurtosis, boxcox
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, r2_score
import optuna
from optuna.samplers import TPESampler
import warnings
warnings.filterwarnings('ignore')

# For neural networks
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.base import BaseEstimator, RegressorMixin

#########FIX###########
from sklearn.model_selection import KFold

kf = KFold(n_splits=5, shuffle=True, random_state=42)
# n_splits = 5
#######################

# Load Data
print("Loading data...")
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

# Enhanced Oil Properties Feature Engineering
def create_enhanced_oil_features(df, poly_features=None, power_transformer=None, fit_transformers=True):
    """
    Create advanced features specific to oil blending and properties
    """
    features = [f'Component{i}_fraction' for i in range(1, 6)]
    features += [f'Component{i}_Property{j}' for i in range(1, 6) for j in range(1, 11)]

    # Oil-specific domain knowledge features
    print("Creating oil-specific features...")
    
    # 1. Viscosity Index calculations (Properties 1-3 typically relate to viscosity)
    for i in range(1, 6):
        df[f'Component{i}_viscosity_index'] = (
            df[f'Component{i}_Property1'] * 0.4 + 
            df[f'Component{i}_Property2'] * 0.4 + 
            df[f'Component{i}_Property3'] * 0.2
        )
    
    # 2. Density estimations (Properties 4-6)
    for i in range(1, 6):
        df[f'Component{i}_density_est'] = (
            df[f'Component{i}_Property4'] * 0.5 + 
            df[f'Component{i}_Property5'] * 0.3 + 
            df[f'Component{i}_Property6'] * 0.2
        )
    
    # 3. Octane-like properties (Properties 7-10)
    for i in range(1, 6):
        df[f'Component{i}_octane_est'] = (
            df[f'Component{i}_Property7'] * 0.3 + 
            df[f'Component{i}_Property8'] * 0.3 + 
            df[f'Component{i}_Property9'] * 0.2 + 
            df[f'Component{i}_Property10'] * 0.2
        )

    # Enhanced interaction features with non-linear transformations
    for i in range(1, 6):
        for j in range(1, 11):
            df[f'frac{i}_prop{j}'] = df[f'Component{i}_fraction'] * df[f'Component{i}_Property{j}']
            df[f'frac{i}_prop{j}_sqrt'] = df[f'Component{i}_fraction'] * np.sqrt(np.abs(df[f'Component{i}_Property{j}']))
            df[f'frac{i}_prop{j}_log'] = df[f'Component{i}_fraction'] * np.log(np.abs(df[f'Component{i}_Property{j}']) + 1)
            df[f'frac{i}_prop{j}_square'] = df[f'Component{i}_fraction'] * (df[f'Component{i}_Property{j}'] ** 2)
            df[f'frac{i}_prop{j}_cube'] = df[f'Component{i}_fraction'] * (df[f'Component{i}_Property{j}'] ** 3)
            
            # Oil-specific transformations
            df[f'frac{i}_prop{j}_exp'] = df[f'Component{i}_fraction'] * np.exp(df[f'Component{i}_Property{j}'] / 100)
            df[f'frac{i}_prop{j}_inv'] = df[f'Component{i}_fraction'] / (np.abs(df[f'Component{i}_Property{j}']) + 1e-8)
            
            features.extend([
                f'frac{i}_prop{j}', f'frac{i}_prop{j}_sqrt', f'frac{i}_prop{j}_log', 
                f'frac{i}_prop{j}_square', f'frac{i}_prop{j}_cube', f'frac{i}_prop{j}_exp', f'frac{i}_prop{j}_inv'
            ])

    # Advanced weighted features with multiple aggregation methods
    for j in range(1, 11):
        prop_cols = [f'Component{i}_Property{j}' for i in range(1, 6)]
        frac_cols = [f'Component{i}_fraction' for i in range(1, 6)]

        # Multiple weighted aggregations
        df[f'weighted_mean_prop{j}'] = sum(
            df[f'Component{i}_fraction'] * df[f'Component{i}_Property{j}'] for i in range(1, 6)
        )
        mean = df[f'weighted_mean_prop{j}']
        df[f'weighted_var_prop{j}'] = sum(
            df[f'Component{i}_fraction'] * (df[f'Component{i}_Property{j}'] - mean) ** 2 for i in range(1, 6)
        )

        # Harmonic mean (important for fuel properties)
        safe_props = [np.maximum(df[f'Component{i}_Property{j}'], 1e-6) for i in range(1, 6)]
        harmonic_mean = sum(df[f'Component{i}_fraction'] / safe_props[i-1] for i in range(1, 6))
        df[f'harmonic_mean_prop{j}'] = 1 / harmonic_mean

        # Geometric mean (for multiplicative properties)
        log_geo_mean = sum(df[f'Component{i}_fraction'] * np.log(safe_props[i-1]) for i in range(1, 6))
        df[f'geometric_mean_prop{j}'] = np.exp(log_geo_mean)

        # Component dominance with ranking
        frac_array = np.array([df[f'Component{i}_fraction'] for i in range(1, 6)])
        dominant_idx = np.argmax(frac_array, axis=0)
        df[f'dominant_prop{j}'] = df.apply(lambda row:
            row[f'Component{dominant_idx[row.name] + 1}_Property{j}'], axis=1)

        # Blend balance and diversity
        frac_cols = [f'Component{i}_fraction' for i in range(1, 6)]
        df[f'blend_balance_prop{j}'] = 1 - df[frac_cols].std(axis=1)
        df[f'blend_diversity_prop{j}'] = df[frac_cols].std(axis=1) / (df[frac_cols].mean(axis=1) + 1e-8)

        # Advanced statistics
        prop_cols = [f'Component{i}_Property{j}' for i in range(1, 6)]
        df[f'min_prop{j}'] = df[prop_cols].min(axis=1)
        df[f'max_prop{j}'] = df[prop_cols].max(axis=1)
        df[f'mean_prop{j}'] = df[prop_cols].mean(axis=1)
        df[f'std_prop{j}'] = df[prop_cols].std(axis=1)
        df[f'median_prop{j}'] = df[prop_cols].median(axis=1)
        df[f'skew_prop{j}'] = df[prop_cols].apply(lambda row: skew(row), axis=1)
        df[f'kurtosis_prop{j}'] = df[prop_cols].apply(lambda row: kurtosis(row), axis=1)
        df[f'range_prop{j}'] = df[f'max_prop{j}'] - df[f'min_prop{j}']
        df[f'iqr_prop{j}'] = df[prop_cols].quantile(0.75, axis=1) - df[prop_cols].quantile(0.25, axis=1)

        features.extend([
            f'min_prop{j}', f'max_prop{j}', f'mean_prop{j}',
            f'std_prop{j}', f'median_prop{j}', f'skew_prop{j}', f'kurtosis_prop{j}',
            f'range_prop{j}', f'iqr_prop{j}'
        ])

    # Oil-specific advanced blending rules
    for j in range(1, 11):
        fractions = [df[f'Component{i}_fraction'] for i in range(1, 6)]
        props = [df[f'Component{i}_Property{j}'] for i in range(1, 6)]
        safe_props = [np.maximum(p, 1e-6) for p in props]

        # RON-like blending (non-linear octane)
        ron_blend = sum(f * (r ** 1.5) for f, r in zip(fractions, safe_props)) ** (1 / 1.5)
        df[f'ron_like_blend_prop{j}'] = ron_blend

        # Viscosity-like blending (logarithmic)
        log_visc_blend = sum(f * np.log(r) for f, r in zip(fractions, safe_props))
        df[f'log_visc_blend_prop{j}'] = log_visc_blend

        # Density-like blending (linear but with corrections)
        density_blend = sum(f * r for f, r in zip(fractions, safe_props))
        df[f'density_blend_prop{j}'] = density_blend

        # Reid vapor pressure-like (exponential)
        rvp_blend = sum(f * np.exp(r / 100) for f, r in zip(fractions, safe_props))
        df[f'rvp_blend_prop{j}'] = rvp_blend

        # Cetane number-like blending
        cetane_blend = sum(f * np.sqrt(r) for f, r in zip(fractions, safe_props))
        df[f'cetane_like_blend_prop{j}'] = cetane_blend

        # Flash point-like blending
        flash_blend = sum(f * (r ** 0.7) for f, r in zip(fractions, safe_props))
        df[f'flash_like_blend_prop{j}'] = flash_blend

        features.extend([
            f'ron_like_blend_prop{j}', f'log_visc_blend_prop{j}',
            f'density_blend_prop{j}', f'rvp_blend_prop{j}',
            f'cetane_like_blend_prop{j}', f'flash_like_blend_prop{j}'
        ])

    # Cross-property interactions (enhanced for oil properties)
    for j1 in range(1, 11):
        for j2 in range(j1 + 1, 11):
            df[f'prop{j1}_prop{j2}_interaction'] = df[f'weighted_mean_prop{j1}'] * df[f'weighted_mean_prop{j2}']
            df[f'prop{j1}_prop{j2}_ratio'] = df[f'weighted_mean_prop{j1}'] / (df[f'weighted_mean_prop{j2}'] + 1e-8)
            df[f'prop{j1}_prop{j2}_diff'] = df[f'weighted_mean_prop{j1}'] - df[f'weighted_mean_prop{j2}']
            df[f'prop{j1}_prop{j2}_harmonic'] = 2 / (1/df[f'weighted_mean_prop{j1}'] + 1/(df[f'weighted_mean_prop{j2}'] + 1e-8))
            features.extend([
                f'prop{j1}_prop{j2}_interaction', f'prop{j1}_prop{j2}_ratio',
                f'prop{j1}_prop{j2}_diff', f'prop{j1}_prop{j2}_harmonic'
            ])

    # Enhanced PCA with more components
    prop_features = [f'Component{i}_Property{j}' for i in range(1, 6) for j in range(1, 11)]
    if fit_transformers:
        pca = PCA(n_components=15, random_state=42)
        pca_feats = pca.fit_transform(df[prop_features])
    else:
        pca = pca_model
        pca_feats = pca.transform(df[prop_features])

    for k in range(15):
        df[f'pca_prop_{k+1}'] = pca_feats[:, k]
        features.append(f'pca_prop_{k+1}')

    # Polynomial features for key interactions
    if fit_transformers:
        key_features = [f'Component{i}_fraction' for i in range(1, 6)]
        key_features += [f'weighted_mean_prop{j}' for j in range(1, 11)]
        poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)
        poly_feats = poly.fit_transform(df[key_features])
        poly_feature_names = poly.get_feature_names_out(key_features)
        
        # Add only the most relevant polynomial features
        for i, name in enumerate(poly_feature_names):
            if 'weighted_mean_prop' in name and ('fraction' in name or 'weighted_mean_prop' in name):
                df[f'poly_{name}'] = poly_feats[:, i]
                features.append(f'poly_{name}')
    else:
        poly = poly_features
        if poly is not None:
            key_features = [f'Component{i}_fraction' for i in range(1, 6)]
            key_features += [f'weighted_mean_prop{j}' for j in range(1, 11)]
            poly_feats = poly.transform(df[key_features])
            poly_feature_names = poly.get_feature_names_out(key_features)
            
            for i, name in enumerate(poly_feature_names):
                if 'weighted_mean_prop' in name and ('fraction' in name or 'weighted_mean_prop' in name):
                    df[f'poly_{name}'] = poly_feats[:, i]
                    features.append(f'poly_{name}')

    # Power transformations for skewed features
    if fit_transformers:
        skewed_features = []
        for col in [f'Component{i}_Property{j}' for i in range(1, 6) for j in range(1, 11)]:
            if abs(skew(df[col].dropna())) > 0.5:
                skewed_features.append(col)
        
        if skewed_features:
            pt = PowerTransformer(method='yeo-johnson')
            transformed_feats = pt.fit_transform(df[skewed_features])
            for i, col in enumerate(skewed_features):
                df[f'power_transform_{col}'] = transformed_feats[:, i]
                features.append(f'power_transform_{col}')
    else:
        pt = power_transformer
        if pt is not None:
            skewed_features = []
            for col in [f'Component{i}_Property{j}' for i in range(1, 6) for j in range(1, 11)]:
                if col in df.columns and abs(skew(df[col].dropna())) > 0.5:
                    skewed_features.append(col)
            
            if skewed_features:
                transformed_feats = pt.transform(df[skewed_features])
                for i, col in enumerate(skewed_features):
                    df[f'power_transform_{col}'] = transformed_feats[:, i]
                    features.append(f'power_transform_{col}')

    # Fraction-based advanced features
    frac_cols = [f'Component{i}_fraction' for i in range(1, 6)]
    df['frac_sum'] = df[frac_cols].sum(axis=1)
    df['frac_std'] = df[frac_cols].std(axis=1)
    df['frac_skew'] = df[frac_cols].apply(lambda row: skew(row), axis=1)
    df['frac_kurtosis'] = df[frac_cols].apply(lambda row: kurtosis(row), axis=1)
    df['frac_entropy'] = -sum(df[f'Component{i}_fraction'] * np.log(df[f'Component{i}_fraction'] + 1e-8) for i in range(1, 6))
    df['frac_gini'] = 1 - sum(df[f'Component{i}_fraction'] ** 2 for i in range(1, 6))
    df['frac_max'] = df[frac_cols].max(axis=1)
    df['frac_min'] = df[frac_cols].min(axis=1)
    df['frac_range'] = df['frac_max'] - df['frac_min']

    features.extend(['frac_sum', 'frac_std', 'frac_skew', 'frac_kurtosis', 'frac_entropy', 'frac_gini',
                     'frac_max', 'frac_min', 'frac_range'])

    return df, features, pca, poly, pt

# Custom Neural Network Regressor
class KerasRegressor(BaseEstimator, RegressorMixin):
    def __init__(self, input_dim=100, hidden_layers=3, neurons=128, dropout=0.3, learning_rate=0.001, epochs=100):
        self.input_dim = input_dim
        self.hidden_layers = hidden_layers
        self.neurons = neurons
        self.dropout = dropout
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.model = None
        
    def build_model(self):
        model = Sequential()
        model.add(Dense(self.neurons, input_dim=self.input_dim, activation='relu'))
        model.add(BatchNormalization())
        model.add(Dropout(self.dropout))
        
        for _ in range(self.hidden_layers - 1):
            model.add(Dense(self.neurons, activation='relu'))
            model.add(BatchNormalization())
            model.add(Dropout(self.dropout))
        
        model.add(Dense(1, activation='linear'))
        model.compile(optimizer=Adam(learning_rate=self.learning_rate), loss='mse', metrics=['mae'])
        return model
    
    def fit(self, X, y):
        self.input_dim = X.shape[1]
        self.model = self.build_model()
        
        early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
        
        self.model.fit(X, y, epochs=self.epochs, batch_size=32, validation_split=0.2,
                      callbacks=[early_stop, reduce_lr], verbose=0)
        return self
    
    def predict(self, X):
        return self.model.predict(X, verbose=0).flatten()

# Apply enhanced feature engineering
print("Creating enhanced oil-specific features...")
train, feat_cols, pca_model, poly_model, pt_model = create_enhanced_oil_features(train, fit_transformers=True)
test, _, _, _, _ = create_enhanced_oil_features(test, pca_model, poly_model, pt_model, fit_transformers=False)

print(f"Created {len(feat_cols)} features for oil property prediction")

Loading data...
Creating breakthrough features...


  df[f'frac{i}_prop{j}_square'] = df[f'Component{i}_fraction'] * (df[f'Component{i}_Property{j}'] ** 2)
  df[f'frac{i}_prop{j}'] = df[f'Component{i}_fraction'] * df[f'Component{i}_Property{j}']
  df[f'frac{i}_prop{j}_sqrt'] = df[f'Component{i}_fraction'] * np.sqrt(np.abs(df[f'Component{i}_Property{j}']))
  df[f'frac{i}_prop{j}_log'] = df[f'Component{i}_fraction'] * np.log(np.abs(df[f'Component{i}_Property{j}']) + 1)
  df[f'frac{i}_prop{j}_square'] = df[f'Component{i}_fraction'] * (df[f'Component{i}_Property{j}'] ** 2)
  df[f'frac{i}_prop{j}'] = df[f'Component{i}_fraction'] * df[f'Component{i}_Property{j}']
  df[f'frac{i}_prop{j}_sqrt'] = df[f'Component{i}_fraction'] * np.sqrt(np.abs(df[f'Component{i}_Property{j}']))
  df[f'frac{i}_prop{j}_log'] = df[f'Component{i}_fraction'] * np.log(np.abs(df[f'Component{i}_Property{j}']) + 1)
  df[f'frac{i}_prop{j}_square'] = df[f'Component{i}_fraction'] * (df[f'Component{i}_Property{j}'] ** 2)
  df[f'frac{i}_prop{j}'] = df[f'Component{i}_fraction'] 

Handling NaN values...
Handling NaN values...
Performing feature selection...
Original features: 433
Selected features: 227
Training Breakthrough Ensemble...
Features: 433 (selected: 227)

Training for BlendProperty1...
Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.658822
[400]	valid_0's l1: 0.555859
[600]	valid_0's l1: 0.481634
[800]	valid_0's l1: 0.426586
[1000]	valid_0's l1: 0.383944
[1200]	valid_0's l1: 0.350733
[1400]	valid_0's l1: 0.325188
[1600]	valid_0's l1: 0.304542
[1800]	valid_0's l1: 0.287788
[2000]	valid_0's l1: 0.274215
[2200]	valid_0's l1: 0.2629
[2400]	valid_0's l1: 0.253431
[2600]	valid_0's l1: 0.245593
[2800]	valid_0's l1: 0.238928
[3000]	valid_0's l1: 0.233089
[3200]	valid_0's l1: 0.228454
[3400]	valid_0's l1: 0.224477
[3600]	valid_0's l1: 0.221075
[3800]	valid_0's l1: 0.218047
[4000]	valid_0's l1: 0.215134
[4200]	valid_0's l1: 0.212573
[4400]	valid_0's l1: 0.210522
[4600]	valid_0's l1: 0.207972
[4800]	valid_0's l1: 0.205602
[500

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.660942
[400]	valid_0's l1: 0.558553
[600]	valid_0's l1: 0.480034
[800]	valid_0's l1: 0.417343
[1000]	valid_0's l1: 0.370788
[1200]	valid_0's l1: 0.335246
[1400]	valid_0's l1: 0.307613
[1600]	valid_0's l1: 0.285916
[1800]	valid_0's l1: 0.268543
[2000]	valid_0's l1: 0.255056
[2200]	valid_0's l1: 0.24484
[2400]	valid_0's l1: 0.236367
[2600]	valid_0's l1: 0.229232
[2800]	valid_0's l1: 0.223278
[3000]	valid_0's l1: 0.218231
[3200]	valid_0's l1: 0.214045
[3400]	valid_0's l1: 0.210303
[3600]	valid_0's l1: 0.207113
[3800]	valid_0's l1: 0.204117
[4000]	valid_0's l1: 0.201305
[4200]	valid_0's l1: 0.198914
[4400]	valid_0's l1: 0.1969
[4600]	valid_0's l1: 0.194861
[4800]	valid_0's l1: 0.193161
[5000]	valid_0's l1: 0.191085
[5200]	valid_0's l1: 0.18947
[5400]	valid_0's l1: 0.188112
[5600]	valid_0's l1: 0.187045
[5800]	valid_0's l1: 0.185829
[6000]	valid_0's l1: 0.18466
[6200]	valid_0's l1: 0.183463
[6400]	valid_0's 

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.648012
[400]	valid_0's l1: 0.544679
[600]	valid_0's l1: 0.469677
[800]	valid_0's l1: 0.413261
[1000]	valid_0's l1: 0.36942
[1200]	valid_0's l1: 0.335057
[1400]	valid_0's l1: 0.308407
[1600]	valid_0's l1: 0.287026
[1800]	valid_0's l1: 0.269679
[2000]	valid_0's l1: 0.256134
[2200]	valid_0's l1: 0.244856
[2400]	valid_0's l1: 0.235281
[2600]	valid_0's l1: 0.227239
[2800]	valid_0's l1: 0.2204
[3000]	valid_0's l1: 0.214932
[3200]	valid_0's l1: 0.21042
[3400]	valid_0's l1: 0.206518
[3600]	valid_0's l1: 0.203244
[3800]	valid_0's l1: 0.20057
[4000]	valid_0's l1: 0.197554
[4200]	valid_0's l1: 0.194402
[4400]	valid_0's l1: 0.192159
[4600]	valid_0's l1: 0.190085
[4800]	valid_0's l1: 0.188226
[5000]	valid_0's l1: 0.186321
[5200]	valid_0's l1: 0.184659
[5400]	valid_0's l1: 0.183415
[5600]	valid_0's l1: 0.182489
[5800]	valid_0's l1: 0.181362
[6000]	valid_0's l1: 0.17945
[6200]	valid_0's l1: 0.177482
[6400]	valid_0's l

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.712716
[400]	valid_0's l1: 0.607226
[600]	valid_0's l1: 0.530774
[800]	valid_0's l1: 0.472251
[1000]	valid_0's l1: 0.425682
[1200]	valid_0's l1: 0.390274
[1400]	valid_0's l1: 0.362332
[1600]	valid_0's l1: 0.340518
[1800]	valid_0's l1: 0.321997
[2000]	valid_0's l1: 0.306671
[2200]	valid_0's l1: 0.293699
[2400]	valid_0's l1: 0.282756
[2600]	valid_0's l1: 0.273584
[2800]	valid_0's l1: 0.265954
[3000]	valid_0's l1: 0.259879
[3200]	valid_0's l1: 0.254421
[3400]	valid_0's l1: 0.249973
[3600]	valid_0's l1: 0.245513
[3800]	valid_0's l1: 0.241097
[4000]	valid_0's l1: 0.237331
[4200]	valid_0's l1: 0.234615
[4400]	valid_0's l1: 0.231754
[4600]	valid_0's l1: 0.229085
[4800]	valid_0's l1: 0.226868
[5000]	valid_0's l1: 0.225154
[5200]	valid_0's l1: 0.22357
[5400]	valid_0's l1: 0.222055
[5600]	valid_0's l1: 0.220208
[5800]	valid_0's l1: 0.217375
[6000]	valid_0's l1: 0.214984
[6200]	valid_0's l1: 0.213422
[6400]	valid_

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.709438
[400]	valid_0's l1: 0.597026
[600]	valid_0's l1: 0.515192
[800]	valid_0's l1: 0.453945
[1000]	valid_0's l1: 0.405469
[1200]	valid_0's l1: 0.368286
[1400]	valid_0's l1: 0.339386
[1600]	valid_0's l1: 0.317356
[1800]	valid_0's l1: 0.299241
[2000]	valid_0's l1: 0.284846
[2200]	valid_0's l1: 0.272585
[2400]	valid_0's l1: 0.262263
[2600]	valid_0's l1: 0.253795
[2800]	valid_0's l1: 0.246671
[3000]	valid_0's l1: 0.240753
[3200]	valid_0's l1: 0.235564
[3400]	valid_0's l1: 0.231049
[3600]	valid_0's l1: 0.227361
[3800]	valid_0's l1: 0.224279
[4000]	valid_0's l1: 0.221627
[4200]	valid_0's l1: 0.218024
[4400]	valid_0's l1: 0.214724
[4600]	valid_0's l1: 0.211932
[4800]	valid_0's l1: 0.209414
[5000]	valid_0's l1: 0.207498
[5200]	valid_0's l1: 0.205717
[5400]	valid_0's l1: 0.203958
[5600]	valid_0's l1: 0.202707
[5800]	valid_0's l1: 0.201553
[6000]	valid_0's l1: 0.200567
[6200]	valid_0's l1: 0.199652
[6400]	valid

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


LightGBM MAPE: 1.4942 (weight: 0.000)
Random Forest MAPE: 6.5267 (weight: 0.000)
Extra Trees MAPE: 7.5550 (weight: 0.000)
Gradient Boosting MAPE: 2.5302 (weight: 0.000)
Ridge MAPE: 0.0402 (weight: 0.467)
Elastic Net MAPE: 0.0269 (weight: 0.533)
Huber MAPE: 2.6134 (weight: 0.000)
Ensemble MAPE: 0.0227

Training for BlendProperty2...
Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.651718
[400]	valid_0's l1: 0.548617
[600]	valid_0's l1: 0.471194
[800]	valid_0's l1: 0.411585
[1000]	valid_0's l1: 0.365949
[1200]	valid_0's l1: 0.331945
[1400]	valid_0's l1: 0.305387
[1600]	valid_0's l1: 0.284471
[1800]	valid_0's l1: 0.266589
[2000]	valid_0's l1: 0.25287
[2200]	valid_0's l1: 0.242125
[2400]	valid_0's l1: 0.233402
[2600]	valid_0's l1: 0.226293
[2800]	valid_0's l1: 0.220329
[3000]	valid_0's l1: 0.214876
[3200]	valid_0's l1: 0.210475
[3400]	valid_0's l1: 0.206732
[3600]	valid_0's l1: 0.203553
[3800]	valid_0's l1: 0.200407
[4000]	valid_0's l1: 0.197539
[4200]	va

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.687827
[400]	valid_0's l1: 0.580542
[600]	valid_0's l1: 0.499776
[800]	valid_0's l1: 0.437794
[1000]	valid_0's l1: 0.389428
[1200]	valid_0's l1: 0.351115
[1400]	valid_0's l1: 0.32161
[1600]	valid_0's l1: 0.298017
[1800]	valid_0's l1: 0.278932
[2000]	valid_0's l1: 0.263555
[2200]	valid_0's l1: 0.250433
[2400]	valid_0's l1: 0.239452
[2600]	valid_0's l1: 0.230264
[2800]	valid_0's l1: 0.222607
[3000]	valid_0's l1: 0.216403
[3200]	valid_0's l1: 0.211032
[3400]	valid_0's l1: 0.20651
[3600]	valid_0's l1: 0.202597
[3800]	valid_0's l1: 0.19903
[4000]	valid_0's l1: 0.195823
[4200]	valid_0's l1: 0.193147
[4400]	valid_0's l1: 0.190635
[4600]	valid_0's l1: 0.188354
[4800]	valid_0's l1: 0.186328
[5000]	valid_0's l1: 0.184619
[5200]	valid_0's l1: 0.183111
[5400]	valid_0's l1: 0.180964
[5600]	valid_0's l1: 0.178797
[5800]	valid_0's l1: 0.176965
[6000]	valid_0's l1: 0.175318
[6200]	valid_0's l1: 0.17395
[6400]	valid_0's

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.633863
[400]	valid_0's l1: 0.536823
[600]	valid_0's l1: 0.466136
[800]	valid_0's l1: 0.411686
[1000]	valid_0's l1: 0.369295
[1200]	valid_0's l1: 0.335891
[1400]	valid_0's l1: 0.308269
[1600]	valid_0's l1: 0.286284
[1800]	valid_0's l1: 0.268654
[2000]	valid_0's l1: 0.254767
[2200]	valid_0's l1: 0.243427
[2400]	valid_0's l1: 0.234066
[2600]	valid_0's l1: 0.226152
[2800]	valid_0's l1: 0.219331
[3000]	valid_0's l1: 0.21352
[3200]	valid_0's l1: 0.208594
[3400]	valid_0's l1: 0.204453
[3600]	valid_0's l1: 0.20079
[3800]	valid_0's l1: 0.197483
[4000]	valid_0's l1: 0.194709
[4200]	valid_0's l1: 0.192466
[4400]	valid_0's l1: 0.190216
[4600]	valid_0's l1: 0.18802
[4800]	valid_0's l1: 0.186297
[5000]	valid_0's l1: 0.184663
[5200]	valid_0's l1: 0.182773
[5400]	valid_0's l1: 0.180799
[5600]	valid_0's l1: 0.179235
[5800]	valid_0's l1: 0.177831
[6000]	valid_0's l1: 0.17668
[6200]	valid_0's l1: 0.175566
[6400]	valid_0's

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.720632
[400]	valid_0's l1: 0.615343
[600]	valid_0's l1: 0.53594
[800]	valid_0's l1: 0.474622
[1000]	valid_0's l1: 0.428984
[1200]	valid_0's l1: 0.392854
[1400]	valid_0's l1: 0.363027
[1600]	valid_0's l1: 0.338941
[1800]	valid_0's l1: 0.319457
[2000]	valid_0's l1: 0.303113
[2200]	valid_0's l1: 0.289517
[2400]	valid_0's l1: 0.278062
[2600]	valid_0's l1: 0.268061
[2800]	valid_0's l1: 0.260205
[3000]	valid_0's l1: 0.253354
[3200]	valid_0's l1: 0.246994
[3400]	valid_0's l1: 0.241879
[3600]	valid_0's l1: 0.237662
[3800]	valid_0's l1: 0.233553
[4000]	valid_0's l1: 0.229606
[4200]	valid_0's l1: 0.226114
[4400]	valid_0's l1: 0.223479
[4600]	valid_0's l1: 0.220827
[4800]	valid_0's l1: 0.218354
[5000]	valid_0's l1: 0.215909
[5200]	valid_0's l1: 0.213978
[5400]	valid_0's l1: 0.212245
[5600]	valid_0's l1: 0.2104
[5800]	valid_0's l1: 0.20822
[6000]	valid_0's l1: 0.206082
[6200]	valid_0's l1: 0.204069
[6400]	valid_0's

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.708894
[400]	valid_0's l1: 0.600324
[600]	valid_0's l1: 0.520889
[800]	valid_0's l1: 0.457792
[1000]	valid_0's l1: 0.408543
[1200]	valid_0's l1: 0.370549
[1400]	valid_0's l1: 0.340378
[1600]	valid_0's l1: 0.316177
[1800]	valid_0's l1: 0.297097
[2000]	valid_0's l1: 0.282112
[2200]	valid_0's l1: 0.269501
[2400]	valid_0's l1: 0.258974
[2600]	valid_0's l1: 0.250249
[2800]	valid_0's l1: 0.242705
[3000]	valid_0's l1: 0.236267
[3200]	valid_0's l1: 0.23095
[3400]	valid_0's l1: 0.226318
[3600]	valid_0's l1: 0.221717
[3800]	valid_0's l1: 0.218076
[4000]	valid_0's l1: 0.214664
[4200]	valid_0's l1: 0.211729
[4400]	valid_0's l1: 0.208828
[4600]	valid_0's l1: 0.206388
[4800]	valid_0's l1: 0.203686
[5000]	valid_0's l1: 0.20128
[5200]	valid_0's l1: 0.199173
[5400]	valid_0's l1: 0.197097
[5600]	valid_0's l1: 0.195443
[5800]	valid_0's l1: 0.194632
[6000]	valid_0's l1: 0.193262
[6200]	valid_0's l1: 0.191561
[6400]	valid_0

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


LightGBM MAPE: 1.0923 (weight: 0.000)
Random Forest MAPE: 1.7452 (weight: 0.000)
Extra Trees MAPE: 1.5231 (weight: 0.000)
Gradient Boosting MAPE: 1.2285 (weight: 0.000)
Ridge MAPE: 0.2432 (weight: 0.442)
Elastic Net MAPE: 0.2200 (weight: 0.558)
Huber MAPE: 1.9676 (weight: 0.000)
Ensemble MAPE: 0.2198

Training for BlendProperty3...
Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.623789
[400]	valid_0's l1: 0.515291
[600]	valid_0's l1: 0.4388
[800]	valid_0's l1: 0.382125
[1000]	valid_0's l1: 0.339082
[1200]	valid_0's l1: 0.306785
[1400]	valid_0's l1: 0.281187
[1600]	valid_0's l1: 0.262326
[1800]	valid_0's l1: 0.248328
[2000]	valid_0's l1: 0.236711
[2200]	valid_0's l1: 0.227458
[2400]	valid_0's l1: 0.219789
[2600]	valid_0's l1: 0.213948
[2800]	valid_0's l1: 0.208953
[3000]	valid_0's l1: 0.205105
[3200]	valid_0's l1: 0.201808
[3400]	valid_0's l1: 0.198918
[3600]	valid_0's l1: 0.196534
[3800]	valid_0's l1: 0.194416
[4000]	valid_0's l1: 0.192732
[4200]	val

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.680391
[400]	valid_0's l1: 0.552707
[600]	valid_0's l1: 0.459154
[800]	valid_0's l1: 0.390298
[1000]	valid_0's l1: 0.341386
[1200]	valid_0's l1: 0.305272
[1400]	valid_0's l1: 0.278047
[1600]	valid_0's l1: 0.256346
[1800]	valid_0's l1: 0.240645
[2000]	valid_0's l1: 0.228807
[2200]	valid_0's l1: 0.219422
[2400]	valid_0's l1: 0.212496
[2600]	valid_0's l1: 0.20699
[2800]	valid_0's l1: 0.202656
[3000]	valid_0's l1: 0.19911
[3200]	valid_0's l1: 0.196161
[3400]	valid_0's l1: 0.193619
[3600]	valid_0's l1: 0.191349
[3800]	valid_0's l1: 0.189144
[4000]	valid_0's l1: 0.186913
[4200]	valid_0's l1: 0.185007
[4400]	valid_0's l1: 0.183751
[4600]	valid_0's l1: 0.182505
[4800]	valid_0's l1: 0.181488
[5000]	valid_0's l1: 0.180764
[5200]	valid_0's l1: 0.180136
[5400]	valid_0's l1: 0.179546
[5600]	valid_0's l1: 0.179115
[5800]	valid_0's l1: 0.178745
[6000]	valid_0's l1: 0.178438
[6200]	valid_0's l1: 0.177862
[6400]	valid_0

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.620619
[400]	valid_0's l1: 0.513633
[600]	valid_0's l1: 0.438717
[800]	valid_0's l1: 0.384622
[1000]	valid_0's l1: 0.34419
[1200]	valid_0's l1: 0.315203
[1400]	valid_0's l1: 0.292526
[1600]	valid_0's l1: 0.27459
[1800]	valid_0's l1: 0.259848
[2000]	valid_0's l1: 0.248292
[2200]	valid_0's l1: 0.238664
[2400]	valid_0's l1: 0.231059
[2600]	valid_0's l1: 0.224286
[2800]	valid_0's l1: 0.218061
[3000]	valid_0's l1: 0.212379
[3200]	valid_0's l1: 0.207885
[3400]	valid_0's l1: 0.204127
[3600]	valid_0's l1: 0.201326
[3800]	valid_0's l1: 0.198654
[4000]	valid_0's l1: 0.196516
[4200]	valid_0's l1: 0.19486
[4400]	valid_0's l1: 0.193109
[4600]	valid_0's l1: 0.191872
[4800]	valid_0's l1: 0.190863
[5000]	valid_0's l1: 0.189952
[5200]	valid_0's l1: 0.189247
[5400]	valid_0's l1: 0.188611
[5600]	valid_0's l1: 0.188101
[5800]	valid_0's l1: 0.18758
[6000]	valid_0's l1: 0.186791
[6200]	valid_0's l1: 0.185178
[6400]	valid_0's

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.640596
[400]	valid_0's l1: 0.53408
[600]	valid_0's l1: 0.45762
[800]	valid_0's l1: 0.400289
[1000]	valid_0's l1: 0.357043
[1200]	valid_0's l1: 0.324695
[1400]	valid_0's l1: 0.299983
[1600]	valid_0's l1: 0.280762
[1800]	valid_0's l1: 0.265548
[2000]	valid_0's l1: 0.253733
[2200]	valid_0's l1: 0.244631
[2400]	valid_0's l1: 0.237308
[2600]	valid_0's l1: 0.231304
[2800]	valid_0's l1: 0.226444
[3000]	valid_0's l1: 0.222497
[3200]	valid_0's l1: 0.219023
[3400]	valid_0's l1: 0.215909
[3600]	valid_0's l1: 0.213091
[3800]	valid_0's l1: 0.210511
[4000]	valid_0's l1: 0.207419
[4200]	valid_0's l1: 0.204313
[4400]	valid_0's l1: 0.201648
[4600]	valid_0's l1: 0.200197
[4800]	valid_0's l1: 0.198613
[5000]	valid_0's l1: 0.196905
[5200]	valid_0's l1: 0.195697
[5400]	valid_0's l1: 0.194663
[5600]	valid_0's l1: 0.193694
[5800]	valid_0's l1: 0.192931
[6000]	valid_0's l1: 0.192122
[6200]	valid_0's l1: 0.19148
[6400]	valid_0'

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.618109
[400]	valid_0's l1: 0.502389
[600]	valid_0's l1: 0.42047
[800]	valid_0's l1: 0.36017
[1000]	valid_0's l1: 0.316987
[1200]	valid_0's l1: 0.283558
[1400]	valid_0's l1: 0.260216
[1600]	valid_0's l1: 0.240868
[1800]	valid_0's l1: 0.225728
[2000]	valid_0's l1: 0.214374
[2200]	valid_0's l1: 0.205176
[2400]	valid_0's l1: 0.198456
[2600]	valid_0's l1: 0.193265
[2800]	valid_0's l1: 0.189219
[3000]	valid_0's l1: 0.185897
[3200]	valid_0's l1: 0.183109
[3400]	valid_0's l1: 0.180769
[3600]	valid_0's l1: 0.178754
[3800]	valid_0's l1: 0.177133
[4000]	valid_0's l1: 0.175803
[4200]	valid_0's l1: 0.174163
[4400]	valid_0's l1: 0.172812
[4600]	valid_0's l1: 0.171525
[4800]	valid_0's l1: 0.170592
[5000]	valid_0's l1: 0.169823
[5200]	valid_0's l1: 0.16917
[5400]	valid_0's l1: 0.168627
[5600]	valid_0's l1: 0.167998
[5800]	valid_0's l1: 0.16729
[6000]	valid_0's l1: 0.167062
[6200]	valid_0's l1: 0.166347
[6400]	valid_0's

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


LightGBM MAPE: 1.3395 (weight: 0.016)
Random Forest MAPE: 1.7078 (weight: 0.000)
Extra Trees MAPE: 1.5256 (weight: 0.003)
Gradient Boosting MAPE: 1.3284 (weight: 0.018)
Ridge MAPE: 1.1278 (weight: 0.137)
Elastic Net MAPE: 0.9481 (weight: 0.825)
Huber MAPE: 1.8197 (weight: 0.000)
Ensemble MAPE: 0.9468

Training for BlendProperty4...
Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.665573
[400]	valid_0's l1: 0.567144
[600]	valid_0's l1: 0.494174
[800]	valid_0's l1: 0.437712
[1000]	valid_0's l1: 0.395124
[1200]	valid_0's l1: 0.362003
[1400]	valid_0's l1: 0.335874
[1600]	valid_0's l1: 0.314341
[1800]	valid_0's l1: 0.296501
[2000]	valid_0's l1: 0.28121
[2200]	valid_0's l1: 0.268376
[2400]	valid_0's l1: 0.258103
[2600]	valid_0's l1: 0.2492
[2800]	valid_0's l1: 0.241683
[3000]	valid_0's l1: 0.235391
[3200]	valid_0's l1: 0.230038
[3400]	valid_0's l1: 0.225538
[3600]	valid_0's l1: 0.221665
[3800]	valid_0's l1: 0.218442
[4000]	valid_0's l1: 0.215312
[4200]	vali

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.68282
[400]	valid_0's l1: 0.574099
[600]	valid_0's l1: 0.491617
[800]	valid_0's l1: 0.42886
[1000]	valid_0's l1: 0.381929
[1200]	valid_0's l1: 0.344799
[1400]	valid_0's l1: 0.315638
[1600]	valid_0's l1: 0.292311
[1800]	valid_0's l1: 0.273742
[2000]	valid_0's l1: 0.259311
[2200]	valid_0's l1: 0.247153
[2400]	valid_0's l1: 0.237456
[2600]	valid_0's l1: 0.229327
[2800]	valid_0's l1: 0.222517
[3000]	valid_0's l1: 0.216841
[3200]	valid_0's l1: 0.212036
[3400]	valid_0's l1: 0.20823
[3600]	valid_0's l1: 0.204685
[3800]	valid_0's l1: 0.201561
[4000]	valid_0's l1: 0.198796
[4200]	valid_0's l1: 0.195908
[4400]	valid_0's l1: 0.193349
[4600]	valid_0's l1: 0.191282
[4800]	valid_0's l1: 0.189617
[5000]	valid_0's l1: 0.188137
[5200]	valid_0's l1: 0.186908
[5400]	valid_0's l1: 0.184931
[5600]	valid_0's l1: 0.182938
[5800]	valid_0's l1: 0.181292
[6000]	valid_0's l1: 0.179943
[6200]	valid_0's l1: 0.178945
[6400]	valid_0'

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.668244
[400]	valid_0's l1: 0.561587
[600]	valid_0's l1: 0.482398
[800]	valid_0's l1: 0.420882
[1000]	valid_0's l1: 0.374868
[1200]	valid_0's l1: 0.340241
[1400]	valid_0's l1: 0.31384
[1600]	valid_0's l1: 0.29249
[1800]	valid_0's l1: 0.275855
[2000]	valid_0's l1: 0.261811
[2200]	valid_0's l1: 0.250661
[2400]	valid_0's l1: 0.241214
[2600]	valid_0's l1: 0.233665
[2800]	valid_0's l1: 0.227149
[3000]	valid_0's l1: 0.221509
[3200]	valid_0's l1: 0.216898
[3400]	valid_0's l1: 0.213011
[3600]	valid_0's l1: 0.209548
[3800]	valid_0's l1: 0.206422
[4000]	valid_0's l1: 0.20367
[4200]	valid_0's l1: 0.201126
[4400]	valid_0's l1: 0.199356
[4600]	valid_0's l1: 0.197974
[4800]	valid_0's l1: 0.196828
[5000]	valid_0's l1: 0.195697
[5200]	valid_0's l1: 0.193616
[5400]	valid_0's l1: 0.190933
[5600]	valid_0's l1: 0.188711
[5800]	valid_0's l1: 0.186556
[6000]	valid_0's l1: 0.184809
[6200]	valid_0's l1: 0.183314
[6400]	valid_0'

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.672863
[400]	valid_0's l1: 0.56789
[600]	valid_0's l1: 0.490275
[800]	valid_0's l1: 0.429687
[1000]	valid_0's l1: 0.383421
[1200]	valid_0's l1: 0.349085
[1400]	valid_0's l1: 0.32258
[1600]	valid_0's l1: 0.301705
[1800]	valid_0's l1: 0.284772
[2000]	valid_0's l1: 0.270663
[2200]	valid_0's l1: 0.25908
[2400]	valid_0's l1: 0.249386
[2600]	valid_0's l1: 0.241155
[2800]	valid_0's l1: 0.233489
[3000]	valid_0's l1: 0.227251
[3200]	valid_0's l1: 0.222184
[3400]	valid_0's l1: 0.217931
[3600]	valid_0's l1: 0.214314
[3800]	valid_0's l1: 0.211488
[4000]	valid_0's l1: 0.208312
[4200]	valid_0's l1: 0.205698
[4400]	valid_0's l1: 0.203008
[4600]	valid_0's l1: 0.200456
[4800]	valid_0's l1: 0.198765
[5000]	valid_0's l1: 0.197368
[5200]	valid_0's l1: 0.195426
[5400]	valid_0's l1: 0.193375
[5600]	valid_0's l1: 0.191045
[5800]	valid_0's l1: 0.189088
[6000]	valid_0's l1: 0.187483
[6200]	valid_0's l1: 0.186283
[6400]	valid_0'

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.72373
[400]	valid_0's l1: 0.614909
[600]	valid_0's l1: 0.53236
[800]	valid_0's l1: 0.46767
[1000]	valid_0's l1: 0.41744
[1200]	valid_0's l1: 0.378487
[1400]	valid_0's l1: 0.347344
[1600]	valid_0's l1: 0.323796
[1800]	valid_0's l1: 0.304979
[2000]	valid_0's l1: 0.289108
[2200]	valid_0's l1: 0.275785
[2400]	valid_0's l1: 0.264501
[2600]	valid_0's l1: 0.255394
[2800]	valid_0's l1: 0.247359
[3000]	valid_0's l1: 0.240818
[3200]	valid_0's l1: 0.235309
[3400]	valid_0's l1: 0.230527
[3600]	valid_0's l1: 0.226352
[3800]	valid_0's l1: 0.223052
[4000]	valid_0's l1: 0.219784
[4200]	valid_0's l1: 0.215679
[4400]	valid_0's l1: 0.212395
[4600]	valid_0's l1: 0.210219
[4800]	valid_0's l1: 0.208393
[5000]	valid_0's l1: 0.206352
[5200]	valid_0's l1: 0.204205
[5400]	valid_0's l1: 0.202228
[5600]	valid_0's l1: 0.200585
[5800]	valid_0's l1: 0.199132
[6000]	valid_0's l1: 0.197909
[6200]	valid_0's l1: 0.197072
[6400]	valid_0's

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


LightGBM MAPE: 1.1984 (weight: 0.000)
Random Forest MAPE: 2.1203 (weight: 0.000)
Extra Trees MAPE: 1.5720 (weight: 0.000)
Gradient Boosting MAPE: 1.4058 (weight: 0.000)
Ridge MAPE: 0.4059 (weight: 0.474)
Elastic Net MAPE: 0.3954 (weight: 0.526)
Huber MAPE: 2.2123 (weight: 0.000)
Ensemble MAPE: 0.3922

Training for BlendProperty5...
Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.611771
[400]	valid_0's l1: 0.493539
[600]	valid_0's l1: 0.401977
[800]	valid_0's l1: 0.330681
[1000]	valid_0's l1: 0.273453
[1200]	valid_0's l1: 0.229959
[1400]	valid_0's l1: 0.196342
[1600]	valid_0's l1: 0.169574
[1800]	valid_0's l1: 0.148179
[2000]	valid_0's l1: 0.130275
[2200]	valid_0's l1: 0.117432
[2400]	valid_0's l1: 0.110027
[2600]	valid_0's l1: 0.105121
[2800]	valid_0's l1: 0.0990846
[3000]	valid_0's l1: 0.0951496
[3200]	valid_0's l1: 0.0934141
[3400]	valid_0's l1: 0.090317
[3600]	valid_0's l1: 0.0878347
[3800]	valid_0's l1: 0.0863475
[4000]	valid_0's l1: 0.0850046
[4

  model = cd_fast.enet_coordinate_descent(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.533294
[400]	valid_0's l1: 0.41781
[600]	valid_0's l1: 0.332429
[800]	valid_0's l1: 0.264371
[1000]	valid_0's l1: 0.212193
[1200]	valid_0's l1: 0.170946
[1400]	valid_0's l1: 0.14138
[1600]	valid_0's l1: 0.12169
[1800]	valid_0's l1: 0.10703
[2000]	valid_0's l1: 0.0971026
[2200]	valid_0's l1: 0.0896061
[2400]	valid_0's l1: 0.0842111
[2600]	valid_0's l1: 0.0796162
[2800]	valid_0's l1: 0.0759127
[3000]	valid_0's l1: 0.0715096
[3200]	valid_0's l1: 0.0692332
[3400]	valid_0's l1: 0.066374
[3600]	valid_0's l1: 0.0649769
[3800]	valid_0's l1: 0.0635328
[4000]	valid_0's l1: 0.0621285
[4200]	valid_0's l1: 0.0613165
[4400]	valid_0's l1: 0.0596078
[4600]	valid_0's l1: 0.058395
[4800]	valid_0's l1: 0.0573634
[5000]	valid_0's l1: 0.056336
[5200]	valid_0's l1: 0.0559584
[5400]	valid_0's l1: 0.0556751
[5600]	valid_0's l1: 0.055452
[5800]	valid_0's l1: 0.0552675
[6000]	valid_0's l1: 0.0551133
[6200]	valid_0's l1: 0.054977

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.598448
[400]	valid_0's l1: 0.473428
[600]	valid_0's l1: 0.37544
[800]	valid_0's l1: 0.298688
[1000]	valid_0's l1: 0.241291
[1200]	valid_0's l1: 0.196935
[1400]	valid_0's l1: 0.164095
[1600]	valid_0's l1: 0.140802
[1800]	valid_0's l1: 0.122037
[2000]	valid_0's l1: 0.107583
[2200]	valid_0's l1: 0.096161
[2400]	valid_0's l1: 0.0894682
[2600]	valid_0's l1: 0.0850559
[2800]	valid_0's l1: 0.0810627
[3000]	valid_0's l1: 0.0781545
[3200]	valid_0's l1: 0.0751487
[3400]	valid_0's l1: 0.0719574
[3600]	valid_0's l1: 0.0702144
[3800]	valid_0's l1: 0.0688443
[4000]	valid_0's l1: 0.0675931
[4200]	valid_0's l1: 0.0656595
[4400]	valid_0's l1: 0.0641684
[4600]	valid_0's l1: 0.063438
[4800]	valid_0's l1: 0.0630413
[5000]	valid_0's l1: 0.062822
[5200]	valid_0's l1: 0.0626795
[5400]	valid_0's l1: 0.0626046
[5600]	valid_0's l1: 0.0625062
[5800]	valid_0's l1: 0.0623007
[6000]	valid_0's l1: 0.0620416
[6200]	valid_0's l1: 0.061

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.572372
[400]	valid_0's l1: 0.452757
[600]	valid_0's l1: 0.364499
[800]	valid_0's l1: 0.295368
[1000]	valid_0's l1: 0.241663
[1200]	valid_0's l1: 0.19815
[1400]	valid_0's l1: 0.165651
[1600]	valid_0's l1: 0.143136
[1800]	valid_0's l1: 0.125401
[2000]	valid_0's l1: 0.112915
[2200]	valid_0's l1: 0.104431
[2400]	valid_0's l1: 0.0973732
[2600]	valid_0's l1: 0.0927327
[2800]	valid_0's l1: 0.0890161
[3000]	valid_0's l1: 0.0856277
[3200]	valid_0's l1: 0.0826132
[3400]	valid_0's l1: 0.0796467
[3600]	valid_0's l1: 0.0756813
[3800]	valid_0's l1: 0.0723282
[4000]	valid_0's l1: 0.070312
[4200]	valid_0's l1: 0.0684994
[4400]	valid_0's l1: 0.067508
[4600]	valid_0's l1: 0.0658933
[4800]	valid_0's l1: 0.0649448
[5000]	valid_0's l1: 0.0639251
[5200]	valid_0's l1: 0.0634846
[5400]	valid_0's l1: 0.0623235
[5600]	valid_0's l1: 0.0614938
[5800]	valid_0's l1: 0.0607703
[6000]	valid_0's l1: 0.0597228
[6200]	valid_0's l1: 0.059

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.522753
[400]	valid_0's l1: 0.404103
[600]	valid_0's l1: 0.3147
[800]	valid_0's l1: 0.246433
[1000]	valid_0's l1: 0.196868
[1200]	valid_0's l1: 0.162111
[1400]	valid_0's l1: 0.139009
[1600]	valid_0's l1: 0.122946
[1800]	valid_0's l1: 0.111041
[2000]	valid_0's l1: 0.102119
[2200]	valid_0's l1: 0.0951275
[2400]	valid_0's l1: 0.0894873
[2600]	valid_0's l1: 0.0851027
[2800]	valid_0's l1: 0.0822718
[3000]	valid_0's l1: 0.0799195
[3200]	valid_0's l1: 0.0773565
[3400]	valid_0's l1: 0.075532
[3600]	valid_0's l1: 0.0741353
[3800]	valid_0's l1: 0.0726722
[4000]	valid_0's l1: 0.0717999
[4200]	valid_0's l1: 0.0713613
[4400]	valid_0's l1: 0.0708258
[4600]	valid_0's l1: 0.0702616
[4800]	valid_0's l1: 0.0694932
[5000]	valid_0's l1: 0.0690592
Early stopping, best iteration is:
[4936]	valid_0's l1: 0.0690378


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


LightGBM MAPE: 0.1832 (weight: 0.096)
Random Forest MAPE: 0.0658 (weight: 0.311)
Extra Trees MAPE: 0.0816 (weight: 0.265)
Gradient Boosting MAPE: 0.0603 (weight: 0.328)
Ridge MAPE: 4.3565 (weight: 0.000)
Elastic Net MAPE: 4.3156 (weight: 0.000)
Huber MAPE: 2.9261 (weight: 0.000)
Ensemble MAPE: 0.0656

Training for BlendProperty6...
Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.656722
[400]	valid_0's l1: 0.558793
[600]	valid_0's l1: 0.485053
[800]	valid_0's l1: 0.427165
[1000]	valid_0's l1: 0.380827
[1200]	valid_0's l1: 0.344885
[1400]	valid_0's l1: 0.316387
[1600]	valid_0's l1: 0.293969
[1800]	valid_0's l1: 0.276859
[2000]	valid_0's l1: 0.263312
[2200]	valid_0's l1: 0.252625
[2400]	valid_0's l1: 0.243367
[2600]	valid_0's l1: 0.235555
[2800]	valid_0's l1: 0.228794
[3000]	valid_0's l1: 0.223271
[3200]	valid_0's l1: 0.218358
[3400]	valid_0's l1: 0.214306
[3600]	valid_0's l1: 0.210805
[3800]	valid_0's l1: 0.207711
[4000]	valid_0's l1: 0.205033
[4200]	v

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.693585
[400]	valid_0's l1: 0.588784
[600]	valid_0's l1: 0.511281
[800]	valid_0's l1: 0.452666
[1000]	valid_0's l1: 0.406549
[1200]	valid_0's l1: 0.369218
[1400]	valid_0's l1: 0.340919
[1600]	valid_0's l1: 0.318568
[1800]	valid_0's l1: 0.299438
[2000]	valid_0's l1: 0.284389
[2200]	valid_0's l1: 0.271475
[2400]	valid_0's l1: 0.260664
[2600]	valid_0's l1: 0.251869
[2800]	valid_0's l1: 0.244497
[3000]	valid_0's l1: 0.237897
[3200]	valid_0's l1: 0.232506
[3400]	valid_0's l1: 0.227904
[3600]	valid_0's l1: 0.224106
[3800]	valid_0's l1: 0.219883
[4000]	valid_0's l1: 0.216189
[4200]	valid_0's l1: 0.21398
[4400]	valid_0's l1: 0.211567
[4600]	valid_0's l1: 0.209333
[4800]	valid_0's l1: 0.207614
[5000]	valid_0's l1: 0.206266
[5200]	valid_0's l1: 0.202801
[5400]	valid_0's l1: 0.199937
[5600]	valid_0's l1: 0.197078
[5800]	valid_0's l1: 0.194772
[6000]	valid_0's l1: 0.192718
[6200]	valid_0's l1: 0.191292
[6400]	valid_

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.643151
[400]	valid_0's l1: 0.545318
[600]	valid_0's l1: 0.471642
[800]	valid_0's l1: 0.416609
[1000]	valid_0's l1: 0.372926
[1200]	valid_0's l1: 0.33956
[1400]	valid_0's l1: 0.313325
[1600]	valid_0's l1: 0.291833
[1800]	valid_0's l1: 0.274275
[2000]	valid_0's l1: 0.2596
[2200]	valid_0's l1: 0.246822
[2400]	valid_0's l1: 0.236522
[2600]	valid_0's l1: 0.228108
[2800]	valid_0's l1: 0.220931
[3000]	valid_0's l1: 0.214246
[3200]	valid_0's l1: 0.208562
[3400]	valid_0's l1: 0.203258
[3600]	valid_0's l1: 0.198982
[3800]	valid_0's l1: 0.195481
[4000]	valid_0's l1: 0.192796
[4200]	valid_0's l1: 0.190795
[4400]	valid_0's l1: 0.188824
[4600]	valid_0's l1: 0.186769
[4800]	valid_0's l1: 0.184845
[5000]	valid_0's l1: 0.18245
[5200]	valid_0's l1: 0.180027
[5400]	valid_0's l1: 0.178488
[5600]	valid_0's l1: 0.177405
[5800]	valid_0's l1: 0.176356
[6000]	valid_0's l1: 0.175244
[6200]	valid_0's l1: 0.173441
[6400]	valid_0's

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.713847
[400]	valid_0's l1: 0.612139
[600]	valid_0's l1: 0.533211
[800]	valid_0's l1: 0.47155
[1000]	valid_0's l1: 0.423107
[1200]	valid_0's l1: 0.383962
[1400]	valid_0's l1: 0.351637
[1600]	valid_0's l1: 0.326184
[1800]	valid_0's l1: 0.305703
[2000]	valid_0's l1: 0.288868
[2200]	valid_0's l1: 0.275147
[2400]	valid_0's l1: 0.263481
[2600]	valid_0's l1: 0.253693
[2800]	valid_0's l1: 0.245872
[3000]	valid_0's l1: 0.239224
[3200]	valid_0's l1: 0.233708
[3400]	valid_0's l1: 0.228724
[3600]	valid_0's l1: 0.224827
[3800]	valid_0's l1: 0.221442
[4000]	valid_0's l1: 0.218365
[4200]	valid_0's l1: 0.214155
[4400]	valid_0's l1: 0.210624
[4600]	valid_0's l1: 0.208129
[4800]	valid_0's l1: 0.205778
[5000]	valid_0's l1: 0.203768
[5200]	valid_0's l1: 0.201849
[5400]	valid_0's l1: 0.200366
[5600]	valid_0's l1: 0.198144
[5800]	valid_0's l1: 0.196238
[6000]	valid_0's l1: 0.194494
[6200]	valid_0's l1: 0.192942
[6400]	valid_

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.673659
[400]	valid_0's l1: 0.573798
[600]	valid_0's l1: 0.495282
[800]	valid_0's l1: 0.436398
[1000]	valid_0's l1: 0.391165
[1200]	valid_0's l1: 0.35574
[1400]	valid_0's l1: 0.328055
[1600]	valid_0's l1: 0.304887
[1800]	valid_0's l1: 0.286385
[2000]	valid_0's l1: 0.271559
[2200]	valid_0's l1: 0.259782
[2400]	valid_0's l1: 0.250172
[2600]	valid_0's l1: 0.242386
[2800]	valid_0's l1: 0.235749
[3000]	valid_0's l1: 0.230382
[3200]	valid_0's l1: 0.225625
[3400]	valid_0's l1: 0.221206
[3600]	valid_0's l1: 0.217709
[3800]	valid_0's l1: 0.214523
[4000]	valid_0's l1: 0.211504
[4200]	valid_0's l1: 0.208546
[4400]	valid_0's l1: 0.205684
[4600]	valid_0's l1: 0.203268
[4800]	valid_0's l1: 0.201167
[5000]	valid_0's l1: 0.199185
[5200]	valid_0's l1: 0.196742
[5400]	valid_0's l1: 0.194831
[5600]	valid_0's l1: 0.193334
[5800]	valid_0's l1: 0.191531
[6000]	valid_0's l1: 0.189576
[6200]	valid_0's l1: 0.188037
[6400]	valid_

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


LightGBM MAPE: 0.9761 (weight: 0.000)
Random Forest MAPE: 1.3239 (weight: 0.000)
Extra Trees MAPE: 0.9889 (weight: 0.000)
Gradient Boosting MAPE: 1.1097 (weight: 0.000)
Ridge MAPE: 0.0124 (weight: 0.521)
Elastic Net MAPE: 0.0207 (weight: 0.479)
Huber MAPE: 2.0396 (weight: 0.000)
Ensemble MAPE: 0.0148

Training for BlendProperty7...
Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.622243
[400]	valid_0's l1: 0.515958
[600]	valid_0's l1: 0.440921
[800]	valid_0's l1: 0.384849
[1000]	valid_0's l1: 0.342614
[1200]	valid_0's l1: 0.310425
[1400]	valid_0's l1: 0.285642
[1600]	valid_0's l1: 0.266833
[1800]	valid_0's l1: 0.253459
[2000]	valid_0's l1: 0.24212
[2200]	valid_0's l1: 0.232585
[2400]	valid_0's l1: 0.22493
[2600]	valid_0's l1: 0.218814
[2800]	valid_0's l1: 0.214061
[3000]	valid_0's l1: 0.209699
[3200]	valid_0's l1: 0.206039
[3400]	valid_0's l1: 0.203006
[3600]	valid_0's l1: 0.200578
[3800]	valid_0's l1: 0.198395
[4000]	valid_0's l1: 0.196571
[4200]	val

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.672036
[400]	valid_0's l1: 0.546605
[600]	valid_0's l1: 0.454782
[800]	valid_0's l1: 0.387549
[1000]	valid_0's l1: 0.340035
[1200]	valid_0's l1: 0.305383
[1400]	valid_0's l1: 0.277429
[1600]	valid_0's l1: 0.256502
[1800]	valid_0's l1: 0.242001
[2000]	valid_0's l1: 0.229862
[2200]	valid_0's l1: 0.220404
[2400]	valid_0's l1: 0.213472
[2600]	valid_0's l1: 0.20782
[2800]	valid_0's l1: 0.203366
[3000]	valid_0's l1: 0.199481
[3200]	valid_0's l1: 0.196375
[3400]	valid_0's l1: 0.193686
[3600]	valid_0's l1: 0.191435
[3800]	valid_0's l1: 0.18949
[4000]	valid_0's l1: 0.187045
[4200]	valid_0's l1: 0.184572
[4400]	valid_0's l1: 0.182474
[4600]	valid_0's l1: 0.180484
[4800]	valid_0's l1: 0.179288
[5000]	valid_0's l1: 0.178065
[5200]	valid_0's l1: 0.176748
[5400]	valid_0's l1: 0.17557
[5600]	valid_0's l1: 0.174591
[5800]	valid_0's l1: 0.173813
[6000]	valid_0's l1: 0.173273
[6200]	valid_0's l1: 0.172785
[6400]	valid_0'

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.615476
[400]	valid_0's l1: 0.511269
[600]	valid_0's l1: 0.437903
[800]	valid_0's l1: 0.38512
[1000]	valid_0's l1: 0.345578
[1200]	valid_0's l1: 0.317028
[1400]	valid_0's l1: 0.295235
[1600]	valid_0's l1: 0.277244
[1800]	valid_0's l1: 0.263135
[2000]	valid_0's l1: 0.251419
[2200]	valid_0's l1: 0.241494
[2400]	valid_0's l1: 0.233948
[2600]	valid_0's l1: 0.227165
[2800]	valid_0's l1: 0.2205
[3000]	valid_0's l1: 0.214823
[3200]	valid_0's l1: 0.210974
[3400]	valid_0's l1: 0.20759
[3600]	valid_0's l1: 0.204832
[3800]	valid_0's l1: 0.202299
[4000]	valid_0's l1: 0.200012
[4200]	valid_0's l1: 0.198125
[4400]	valid_0's l1: 0.196206
[4600]	valid_0's l1: 0.194739
[4800]	valid_0's l1: 0.193251
[5000]	valid_0's l1: 0.191934
[5200]	valid_0's l1: 0.190835
[5400]	valid_0's l1: 0.189948
[5600]	valid_0's l1: 0.189066
[5800]	valid_0's l1: 0.187586
[6000]	valid_0's l1: 0.185673
[6200]	valid_0's l1: 0.183959
[6400]	valid_0's

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.650274
[400]	valid_0's l1: 0.54558
[600]	valid_0's l1: 0.470607
[800]	valid_0's l1: 0.415078
[1000]	valid_0's l1: 0.372063
[1200]	valid_0's l1: 0.340447
[1400]	valid_0's l1: 0.316068
[1600]	valid_0's l1: 0.29717
[1800]	valid_0's l1: 0.282465
[2000]	valid_0's l1: 0.270693
[2200]	valid_0's l1: 0.261382
[2400]	valid_0's l1: 0.254051
[2600]	valid_0's l1: 0.248101
[2800]	valid_0's l1: 0.243092
[3000]	valid_0's l1: 0.238952
[3200]	valid_0's l1: 0.235357
[3400]	valid_0's l1: 0.23196
[3600]	valid_0's l1: 0.229264
[3800]	valid_0's l1: 0.226987
[4000]	valid_0's l1: 0.224718
[4200]	valid_0's l1: 0.22224
[4400]	valid_0's l1: 0.219444
[4600]	valid_0's l1: 0.216149
[4800]	valid_0's l1: 0.213229
[5000]	valid_0's l1: 0.21117
[5200]	valid_0's l1: 0.209315
[5400]	valid_0's l1: 0.207939
[5600]	valid_0's l1: 0.206797
[5800]	valid_0's l1: 0.206105
[6000]	valid_0's l1: 0.204823
[6200]	valid_0's l1: 0.203417
[6400]	valid_0's 

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.614416
[400]	valid_0's l1: 0.501101
[600]	valid_0's l1: 0.420182
[800]	valid_0's l1: 0.360921
[1000]	valid_0's l1: 0.318801
[1200]	valid_0's l1: 0.286597
[1400]	valid_0's l1: 0.263347
[1600]	valid_0's l1: 0.244347
[1800]	valid_0's l1: 0.229565
[2000]	valid_0's l1: 0.218111
[2200]	valid_0's l1: 0.209156
[2400]	valid_0's l1: 0.202437
[2600]	valid_0's l1: 0.197031
[2800]	valid_0's l1: 0.192803
[3000]	valid_0's l1: 0.189623
[3200]	valid_0's l1: 0.18703
[3400]	valid_0's l1: 0.184789
[3600]	valid_0's l1: 0.182666
[3800]	valid_0's l1: 0.180784
[4000]	valid_0's l1: 0.179472
[4200]	valid_0's l1: 0.17751
[4400]	valid_0's l1: 0.175395
[4600]	valid_0's l1: 0.173513
[4800]	valid_0's l1: 0.172224
[5000]	valid_0's l1: 0.171119
[5200]	valid_0's l1: 0.170131
[5400]	valid_0's l1: 0.16947
[5600]	valid_0's l1: 0.168846
[5800]	valid_0's l1: 0.168124
[6000]	valid_0's l1: 0.167397
[6200]	valid_0's l1: 0.167114
[6400]	valid_0'

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


LightGBM MAPE: 1.8017 (weight: 0.128)
Random Forest MAPE: 2.9278 (weight: 0.000)
Extra Trees MAPE: 2.6188 (weight: 0.000)
Gradient Boosting MAPE: 2.2762 (weight: 0.001)
Ridge MAPE: 1.8681 (weight: 0.066)
Elastic Net MAPE: 1.6173 (weight: 0.806)
Huber MAPE: 3.1646 (weight: 0.000)
Ensemble MAPE: 1.6104

Training for BlendProperty8...
Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.660295
[400]	valid_0's l1: 0.573394
[600]	valid_0's l1: 0.508361
[800]	valid_0's l1: 0.457803
[1000]	valid_0's l1: 0.417956
[1200]	valid_0's l1: 0.386557
[1400]	valid_0's l1: 0.362385
[1600]	valid_0's l1: 0.342409
[1800]	valid_0's l1: 0.326093
[2000]	valid_0's l1: 0.312518
[2200]	valid_0's l1: 0.301525
[2400]	valid_0's l1: 0.292372
[2600]	valid_0's l1: 0.284715
[2800]	valid_0's l1: 0.278162
[3000]	valid_0's l1: 0.272373
[3200]	valid_0's l1: 0.26742
[3400]	valid_0's l1: 0.262986
[3600]	valid_0's l1: 0.258826
[3800]	valid_0's l1: 0.255093
[4000]	valid_0's l1: 0.251614
[4200]	va

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.693916
[400]	valid_0's l1: 0.58918
[600]	valid_0's l1: 0.51384
[800]	valid_0's l1: 0.455401
[1000]	valid_0's l1: 0.413216
[1200]	valid_0's l1: 0.380473
[1400]	valid_0's l1: 0.354666
[1600]	valid_0's l1: 0.33374
[1800]	valid_0's l1: 0.317715
[2000]	valid_0's l1: 0.305123
[2200]	valid_0's l1: 0.294943
[2400]	valid_0's l1: 0.286303
[2600]	valid_0's l1: 0.279139
[2800]	valid_0's l1: 0.272801
[3000]	valid_0's l1: 0.267359
[3200]	valid_0's l1: 0.262972
[3400]	valid_0's l1: 0.259186
[3600]	valid_0's l1: 0.254988
[3800]	valid_0's l1: 0.250995
[4000]	valid_0's l1: 0.247831
[4200]	valid_0's l1: 0.244532
[4400]	valid_0's l1: 0.241767
[4600]	valid_0's l1: 0.239312
[4800]	valid_0's l1: 0.237411
[5000]	valid_0's l1: 0.235731
[5200]	valid_0's l1: 0.234408
[5400]	valid_0's l1: 0.233298
[5600]	valid_0's l1: 0.231398
[5800]	valid_0's l1: 0.227956
[6000]	valid_0's l1: 0.225036
[6200]	valid_0's l1: 0.222678
[6400]	valid_0'

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.638737
[400]	valid_0's l1: 0.54896
[600]	valid_0's l1: 0.483347
[800]	valid_0's l1: 0.436031
[1000]	valid_0's l1: 0.401583
[1200]	valid_0's l1: 0.373951
[1400]	valid_0's l1: 0.351097
[1600]	valid_0's l1: 0.332623
[1800]	valid_0's l1: 0.317068
[2000]	valid_0's l1: 0.303604
[2200]	valid_0's l1: 0.293009
[2400]	valid_0's l1: 0.284288
[2600]	valid_0's l1: 0.277336
[2800]	valid_0's l1: 0.271344
[3000]	valid_0's l1: 0.265869
[3200]	valid_0's l1: 0.261386
[3400]	valid_0's l1: 0.257347
[3600]	valid_0's l1: 0.253913
[3800]	valid_0's l1: 0.249935
[4000]	valid_0's l1: 0.247273
[4200]	valid_0's l1: 0.244779
[4400]	valid_0's l1: 0.242061
[4600]	valid_0's l1: 0.239799
[4800]	valid_0's l1: 0.237831
[5000]	valid_0's l1: 0.235956
[5200]	valid_0's l1: 0.234392
[5400]	valid_0's l1: 0.233016
[5600]	valid_0's l1: 0.231058
[5800]	valid_0's l1: 0.229315
[6000]	valid_0's l1: 0.227579
[6200]	valid_0's l1: 0.226182
[6400]	valid_

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.693653
[400]	valid_0's l1: 0.596927
[600]	valid_0's l1: 0.525412
[800]	valid_0's l1: 0.469883
[1000]	valid_0's l1: 0.427463
[1200]	valid_0's l1: 0.394687
[1400]	valid_0's l1: 0.368898
[1600]	valid_0's l1: 0.347935
[1800]	valid_0's l1: 0.330733
[2000]	valid_0's l1: 0.316469
[2200]	valid_0's l1: 0.304672
[2400]	valid_0's l1: 0.294891
[2600]	valid_0's l1: 0.286606
[2800]	valid_0's l1: 0.280063
[3000]	valid_0's l1: 0.274626
[3200]	valid_0's l1: 0.270198
[3400]	valid_0's l1: 0.26626
[3600]	valid_0's l1: 0.262594
[3800]	valid_0's l1: 0.259007
[4000]	valid_0's l1: 0.256161
[4200]	valid_0's l1: 0.253737
[4400]	valid_0's l1: 0.25099
[4600]	valid_0's l1: 0.248121
[4800]	valid_0's l1: 0.246004
[5000]	valid_0's l1: 0.244278
[5200]	valid_0's l1: 0.242762
[5400]	valid_0's l1: 0.241525
[5600]	valid_0's l1: 0.2399
[5800]	valid_0's l1: 0.237516
[6000]	valid_0's l1: 0.235182
[6200]	valid_0's l1: 0.233241
[6400]	valid_0's

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.683181
[400]	valid_0's l1: 0.581694
[600]	valid_0's l1: 0.504309
[800]	valid_0's l1: 0.444869
[1000]	valid_0's l1: 0.401235
[1200]	valid_0's l1: 0.368625
[1400]	valid_0's l1: 0.342456
[1600]	valid_0's l1: 0.321385
[1800]	valid_0's l1: 0.304442
[2000]	valid_0's l1: 0.291138
[2200]	valid_0's l1: 0.279738
[2400]	valid_0's l1: 0.270412
[2600]	valid_0's l1: 0.262487
[2800]	valid_0's l1: 0.255881
[3000]	valid_0's l1: 0.250221
[3200]	valid_0's l1: 0.245437
[3400]	valid_0's l1: 0.241595
[3600]	valid_0's l1: 0.238057
[3800]	valid_0's l1: 0.23511
[4000]	valid_0's l1: 0.23251
[4200]	valid_0's l1: 0.230204
[4400]	valid_0's l1: 0.22773
[4600]	valid_0's l1: 0.224423
[4800]	valid_0's l1: 0.221732
[5000]	valid_0's l1: 0.218832
[5200]	valid_0's l1: 0.216394
[5400]	valid_0's l1: 0.21454
[5600]	valid_0's l1: 0.212994
[5800]	valid_0's l1: 0.211708
[6000]	valid_0's l1: 0.210464
[6200]	valid_0's l1: 0.209489
[6400]	valid_0's

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


LightGBM MAPE: 1.3618 (weight: 0.001)
Random Forest MAPE: 1.9416 (weight: 0.000)
Extra Trees MAPE: 1.8005 (weight: 0.000)
Gradient Boosting MAPE: 1.6468 (weight: 0.000)
Ridge MAPE: 0.8145 (weight: 0.273)
Elastic Net MAPE: 0.7168 (weight: 0.726)
Huber MAPE: 1.8409 (weight: 0.000)
Ensemble MAPE: 0.7374

Training for BlendProperty9...
Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.689085
[400]	valid_0's l1: 0.589239
[600]	valid_0's l1: 0.513146
[800]	valid_0's l1: 0.457975
[1000]	valid_0's l1: 0.418915
[1200]	valid_0's l1: 0.390011
[1400]	valid_0's l1: 0.36749
[1600]	valid_0's l1: 0.349758
[1800]	valid_0's l1: 0.334967
[2000]	valid_0's l1: 0.322878
[2200]	valid_0's l1: 0.312673
[2400]	valid_0's l1: 0.304101
[2600]	valid_0's l1: 0.297143
[2800]	valid_0's l1: 0.2913
[3000]	valid_0's l1: 0.286514
[3200]	valid_0's l1: 0.282799
[3400]	valid_0's l1: 0.280086
[3600]	valid_0's l1: 0.277677
[3800]	valid_0's l1: 0.275772
[4000]	valid_0's l1: 0.271706
[4200]	vali

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.700115
[400]	valid_0's l1: 0.604847
[600]	valid_0's l1: 0.532577
[800]	valid_0's l1: 0.476191
[1000]	valid_0's l1: 0.432016
[1200]	valid_0's l1: 0.39728
[1400]	valid_0's l1: 0.370158
[1600]	valid_0's l1: 0.348646
[1800]	valid_0's l1: 0.331621
[2000]	valid_0's l1: 0.317634
[2200]	valid_0's l1: 0.305756
[2400]	valid_0's l1: 0.295981
[2600]	valid_0's l1: 0.287402
[2800]	valid_0's l1: 0.280382
[3000]	valid_0's l1: 0.274188
[3200]	valid_0's l1: 0.269346
[3400]	valid_0's l1: 0.265145
[3600]	valid_0's l1: 0.261582
[3800]	valid_0's l1: 0.258969
[4000]	valid_0's l1: 0.256076
[4200]	valid_0's l1: 0.252336
[4400]	valid_0's l1: 0.248661
[4600]	valid_0's l1: 0.245446
[4800]	valid_0's l1: 0.243031
[5000]	valid_0's l1: 0.241109
[5200]	valid_0's l1: 0.239916
[5400]	valid_0's l1: 0.238744
[5600]	valid_0's l1: 0.237783
[5800]	valid_0's l1: 0.236608
[6000]	valid_0's l1: 0.235503
[6200]	valid_0's l1: 0.234192
[6400]	valid_

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.662247
[400]	valid_0's l1: 0.561333
[600]	valid_0's l1: 0.486603
[800]	valid_0's l1: 0.430875
[1000]	valid_0's l1: 0.389607
[1200]	valid_0's l1: 0.357301
[1400]	valid_0's l1: 0.333113
[1600]	valid_0's l1: 0.314391
[1800]	valid_0's l1: 0.300379
[2000]	valid_0's l1: 0.289312
[2200]	valid_0's l1: 0.279824
[2400]	valid_0's l1: 0.271889
[2600]	valid_0's l1: 0.265098
[2800]	valid_0's l1: 0.259373
[3000]	valid_0's l1: 0.254271
[3200]	valid_0's l1: 0.250375
[3400]	valid_0's l1: 0.246536
[3600]	valid_0's l1: 0.244415
[3800]	valid_0's l1: 0.242948
[4000]	valid_0's l1: 0.241845
[4200]	valid_0's l1: 0.240736
[4400]	valid_0's l1: 0.23959
[4600]	valid_0's l1: 0.237196
[4800]	valid_0's l1: 0.23463
[5000]	valid_0's l1: 0.232092
[5200]	valid_0's l1: 0.229532
[5400]	valid_0's l1: 0.227641
[5600]	valid_0's l1: 0.22645
[5800]	valid_0's l1: 0.225973
[6000]	valid_0's l1: 0.225503
[6200]	valid_0's l1: 0.224962
[6400]	valid_0'

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.640313
[400]	valid_0's l1: 0.539897
[600]	valid_0's l1: 0.468107
[800]	valid_0's l1: 0.412837
[1000]	valid_0's l1: 0.373263
[1200]	valid_0's l1: 0.342251
[1400]	valid_0's l1: 0.318694
[1600]	valid_0's l1: 0.300933
[1800]	valid_0's l1: 0.287068
[2000]	valid_0's l1: 0.275683
[2200]	valid_0's l1: 0.266745
[2400]	valid_0's l1: 0.258254
[2600]	valid_0's l1: 0.251017
[2800]	valid_0's l1: 0.245189
[3000]	valid_0's l1: 0.240396
[3200]	valid_0's l1: 0.236684
[3400]	valid_0's l1: 0.233018
[3600]	valid_0's l1: 0.229398
[3800]	valid_0's l1: 0.226477
[4000]	valid_0's l1: 0.223857
[4200]	valid_0's l1: 0.221732
[4400]	valid_0's l1: 0.21994
[4600]	valid_0's l1: 0.218288
[4800]	valid_0's l1: 0.216907
[5000]	valid_0's l1: 0.214637
[5200]	valid_0's l1: 0.212156
[5400]	valid_0's l1: 0.209926
[5600]	valid_0's l1: 0.207872
[5800]	valid_0's l1: 0.20678
[6000]	valid_0's l1: 0.205627
[6200]	valid_0's l1: 0.204659
[6400]	valid_0

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.675909
[400]	valid_0's l1: 0.580201
[600]	valid_0's l1: 0.508759
[800]	valid_0's l1: 0.453433
[1000]	valid_0's l1: 0.413886
[1200]	valid_0's l1: 0.380985
[1400]	valid_0's l1: 0.355235
[1600]	valid_0's l1: 0.334187
[1800]	valid_0's l1: 0.317434
[2000]	valid_0's l1: 0.303385
[2200]	valid_0's l1: 0.292332
[2400]	valid_0's l1: 0.282756
[2600]	valid_0's l1: 0.274537
[2800]	valid_0's l1: 0.26789
[3000]	valid_0's l1: 0.262611
[3200]	valid_0's l1: 0.257719
[3400]	valid_0's l1: 0.253271
[3600]	valid_0's l1: 0.249522
[3800]	valid_0's l1: 0.246265
[4000]	valid_0's l1: 0.243773
[4200]	valid_0's l1: 0.241541
[4400]	valid_0's l1: 0.238912
[4600]	valid_0's l1: 0.236397
[4800]	valid_0's l1: 0.23471
[5000]	valid_0's l1: 0.232833
[5200]	valid_0's l1: 0.23118
[5400]	valid_0's l1: 0.230344
[5600]	valid_0's l1: 0.229295
[5800]	valid_0's l1: 0.227855
[6000]	valid_0's l1: 0.226293
[6200]	valid_0's l1: 0.225031
[6400]	valid_0'

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


LightGBM MAPE: 0.9990 (weight: 0.991)
Random Forest MAPE: 1.9522 (weight: 0.000)
Extra Trees MAPE: 1.4699 (weight: 0.009)
Gradient Boosting MAPE: 1.9927 (weight: 0.000)
Ridge MAPE: 1.9583 (weight: 0.000)
Elastic Net MAPE: 2.2114 (weight: 0.000)
Huber MAPE: 1.8533 (weight: 0.000)
Ensemble MAPE: 0.9952

Training for BlendProperty10...
Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.677219
[400]	valid_0's l1: 0.561473
[600]	valid_0's l1: 0.474324
[800]	valid_0's l1: 0.409344
[1000]	valid_0's l1: 0.36365
[1200]	valid_0's l1: 0.329616
[1400]	valid_0's l1: 0.302537
[1600]	valid_0's l1: 0.281484
[1800]	valid_0's l1: 0.264699
[2000]	valid_0's l1: 0.25113
[2200]	valid_0's l1: 0.239702
[2400]	valid_0's l1: 0.230561
[2600]	valid_0's l1: 0.223428
[2800]	valid_0's l1: 0.217141
[3000]	valid_0's l1: 0.211803
[3200]	valid_0's l1: 0.207595
[3400]	valid_0's l1: 0.203956
[3600]	valid_0's l1: 0.200799
[3800]	valid_0's l1: 0.197697
[4000]	valid_0's l1: 0.195031
[4200]	va

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.655121
[400]	valid_0's l1: 0.546537
[600]	valid_0's l1: 0.461492
[800]	valid_0's l1: 0.397903
[1000]	valid_0's l1: 0.352474
[1200]	valid_0's l1: 0.317112
[1400]	valid_0's l1: 0.288005
[1600]	valid_0's l1: 0.265649
[1800]	valid_0's l1: 0.248182
[2000]	valid_0's l1: 0.234294
[2200]	valid_0's l1: 0.222691
[2400]	valid_0's l1: 0.213556
[2600]	valid_0's l1: 0.206483
[2800]	valid_0's l1: 0.200604
[3000]	valid_0's l1: 0.195508
[3200]	valid_0's l1: 0.191253
[3400]	valid_0's l1: 0.18774
[3600]	valid_0's l1: 0.184678
[3800]	valid_0's l1: 0.181843
[4000]	valid_0's l1: 0.179647
[4200]	valid_0's l1: 0.177916
[4400]	valid_0's l1: 0.176242
[4600]	valid_0's l1: 0.174674
[4800]	valid_0's l1: 0.173332
[5000]	valid_0's l1: 0.17226
[5200]	valid_0's l1: 0.170104
[5400]	valid_0's l1: 0.16759
[5600]	valid_0's l1: 0.165735
[5800]	valid_0's l1: 0.164272
[6000]	valid_0's l1: 0.163036
[6200]	valid_0's l1: 0.162061
[6400]	valid_0'

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.62454
[400]	valid_0's l1: 0.509581
[600]	valid_0's l1: 0.4268
[800]	valid_0's l1: 0.367313
[1000]	valid_0's l1: 0.326378
[1200]	valid_0's l1: 0.294806
[1400]	valid_0's l1: 0.270286
[1600]	valid_0's l1: 0.251339
[1800]	valid_0's l1: 0.236209
[2000]	valid_0's l1: 0.223967
[2200]	valid_0's l1: 0.213231
[2400]	valid_0's l1: 0.204624
[2600]	valid_0's l1: 0.197645
[2800]	valid_0's l1: 0.191668
[3000]	valid_0's l1: 0.186671
[3200]	valid_0's l1: 0.182564
[3400]	valid_0's l1: 0.179035
[3600]	valid_0's l1: 0.176076
[3800]	valid_0's l1: 0.17351
[4000]	valid_0's l1: 0.17085
[4200]	valid_0's l1: 0.168622
[4400]	valid_0's l1: 0.166918
[4600]	valid_0's l1: 0.165488
[4800]	valid_0's l1: 0.164004
[5000]	valid_0's l1: 0.162669
[5200]	valid_0's l1: 0.161222
[5400]	valid_0's l1: 0.160074
[5600]	valid_0's l1: 0.158998
[5800]	valid_0's l1: 0.15801
[6000]	valid_0's l1: 0.15708
[6200]	valid_0's l1: 0.156286
[6400]	valid_0's l1

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.649794
[400]	valid_0's l1: 0.537043
[600]	valid_0's l1: 0.452215
[800]	valid_0's l1: 0.388429
[1000]	valid_0's l1: 0.340973
[1200]	valid_0's l1: 0.305124
[1400]	valid_0's l1: 0.277213
[1600]	valid_0's l1: 0.255683
[1800]	valid_0's l1: 0.238048
[2000]	valid_0's l1: 0.223482
[2200]	valid_0's l1: 0.211581
[2400]	valid_0's l1: 0.201559
[2600]	valid_0's l1: 0.193432
[2800]	valid_0's l1: 0.18705
[3000]	valid_0's l1: 0.181866
[3200]	valid_0's l1: 0.177519
[3400]	valid_0's l1: 0.173907
[3600]	valid_0's l1: 0.170605
[3800]	valid_0's l1: 0.167774
[4000]	valid_0's l1: 0.165794
[4200]	valid_0's l1: 0.163952
[4400]	valid_0's l1: 0.162378
[4600]	valid_0's l1: 0.16038
[4800]	valid_0's l1: 0.157629
[5000]	valid_0's l1: 0.155334
[5200]	valid_0's l1: 0.153407
[5400]	valid_0's l1: 0.151904
[5600]	valid_0's l1: 0.150661
[5800]	valid_0's l1: 0.149798
[6000]	valid_0's l1: 0.149391
[6200]	valid_0's l1: 0.148655
[6400]	valid_0

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Training until validation scores don't improve for 150 rounds
[200]	valid_0's l1: 0.679962
[400]	valid_0's l1: 0.5699
[600]	valid_0's l1: 0.485704
[800]	valid_0's l1: 0.421537
[1000]	valid_0's l1: 0.373856
[1200]	valid_0's l1: 0.338054
[1400]	valid_0's l1: 0.309879
[1600]	valid_0's l1: 0.286716
[1800]	valid_0's l1: 0.267424
[2000]	valid_0's l1: 0.251963
[2200]	valid_0's l1: 0.239712
[2400]	valid_0's l1: 0.229936
[2600]	valid_0's l1: 0.221885
[2800]	valid_0's l1: 0.214788
[3000]	valid_0's l1: 0.208992
[3200]	valid_0's l1: 0.204049
[3400]	valid_0's l1: 0.199841
[3600]	valid_0's l1: 0.196187
[3800]	valid_0's l1: 0.192944
[4000]	valid_0's l1: 0.190412
[4200]	valid_0's l1: 0.1878
[4400]	valid_0's l1: 0.185421
[4600]	valid_0's l1: 0.183098
[4800]	valid_0's l1: 0.180818
[5000]	valid_0's l1: 0.178856
[5200]	valid_0's l1: 0.177203
[5400]	valid_0's l1: 0.17609
[5600]	valid_0's l1: 0.174358
[5800]	valid_0's l1: 0.17233
[6000]	valid_0's l1: 0.171016
[6200]	valid_0's l1: 0.169774
[6400]	valid_0's l

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


In [None]:
# Prepare Data
TARGETS = [f'BlendProperty{i}' for i in range(1, 11)]
X_train = train[feat_cols]
y_train = train[TARGETS]
X_test = test[feat_cols]

# Handle NaN values and infinite values
print("Handling NaN and infinite values...")
X_train = X_train.replace([np.inf, -np.inf], np.nan)
X_test = X_test.replace([np.inf, -np.inf], np.nan)
X_train = X_train.fillna(X_train.median())
X_test = X_test.fillna(X_train.median())

# Feature scaling for different models
scaler_robust = RobustScaler()
X_train_robust = scaler_robust.fit_transform(X_train)
X_test_robust = scaler_robust.transform(X_test)

scaler_standard = StandardScaler()
X_train_standard = scaler_standard.fit_transform(X_train)
X_test_standard = scaler_standard.transform(X_test)

# Advanced feature selection
print("Performing advanced feature selection...")

# Multiple feature selection methods
selector_lgb = SelectFromModel(
    LGBMRegressor(n_estimators=200, random_state=42, verbose=-1),
    prefit=False,
    threshold='median'
)

selector_rf = SelectFromModel(
    RandomForestRegressor(n_estimators=200, random_state=42),
    prefit=False,
    threshold='median'
)

# Fit selectors on first target
selector_lgb.fit(X_train, y_train.iloc[:, 0])
selector_rf.fit(X_train, y_train.iloc[:, 0])

# Combine selected features
lgb_selected = selector_lgb.get_support()
rf_selected = selector_rf.get_support()
combined_selected = lgb_selected | rf_selected

selected_features = [feat_cols[i] for i in range(len(feat_cols)) if combined_selected[i]]
X_train_selected = X_train.iloc[:, combined_selected]
X_test_selected = X_test.iloc[:, combined_selected]

print(f"Original features: {len(feat_cols)}")
print(f"Selected features: {len(selected_features)}")

# Hyperparameter optimization with Optuna
def optimize_lightgbm(trial, X, y):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 500, 3000),
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.1, log=True),
        'num_leaves': trial.suggest_int('num_leaves', 15, 100),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.0, 1.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.0, 1.0),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 50),
        'random_state': 42,
        'verbose': -1
    }
    
    model = LGBMRegressor(**params)
    scores = cross_val_score(model, X, y, cv=3, scoring='neg_mean_absolute_percentage_error')
    return scores.mean()

def optimize_xgboost(trial, X, y):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 500, 2000),
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.1, log=True),
        'max_depth': trial.suggest_int('max_depth', 3, 12),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.0, 1.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.0, 1.0),
        'random_state': 42,
        'verbosity': 0
    }
    
    model = XGBRegressor(**params)
    scores = cross_val_score(model, X, y, cv=3, scoring='neg_mean_absolute_percentage_error')
    return scores.mean()

def optimize_catboost(trial, X, y):
    params = {
        'iterations': trial.suggest_int('iterations', 500, 2000),
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.1, log=True),
        'depth': trial.suggest_int('depth', 4, 10),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1.0, 10.0),
        'random_seed': 42,
        'verbose': False
    }
    
    model = CatBoostRegressor(**params)
    scores = cross_val_score(model, X, y, cv=3, scoring='neg_mean_absolute_percentage_error')
    return scores.mean()

def optimize_neural_network(trial, X, y):
    params = {
        'hidden_layers': trial.suggest_int('hidden_layers', 2, 5),
        'neurons': trial.suggest_int('neurons', 64, 512),
        'dropout': trial.suggest_float('dropout', 0.1, 0.5),
        'learning_rate': trial.suggest_float('learning_rate', 0.0001, 0.01, log=True),
        'epochs': 100
    }
    
    model = KerasRegressor(input_dim=X.shape[1], **params)
    scores = cross_val_score(model, X, y, cv=3, scoring='neg_mean_absolute_percentage_error')
    return scores.mean()

# Optimize hyperparameters for key models
print("Optimizing hyperparameters...")
best_params = {}

# Use first target for optimization to save time
target_for_optimization = TARGETS[0]
y_opt = y_train[target_for_optimization]

# Optimize LightGBM
print("Optimizing LightGBM...")
study_lgb = optuna.create_study(direction='maximize', sampler=TPESampler(seed=42))
study_lgb.optimize(lambda trial: optimize_lightgbm(trial, X_train_selected, y_opt), n_trials=30)
best_params['lgb'] = study_lgb.best_params

# Optimize XGBoost
print("Optimizing XGBoost...")
study_xgb = optuna.create_study(direction='maximize', sampler=TPESampler(seed=42))
study_xgb.optimize(lambda trial: optimize_xgboost(trial, X_train_selected, y_opt), n_trials=30)
best_params['xgb'] = study_xgb.best_params

# Optimize CatBoost
print("Optimizing CatBoost...")
study_cat = optuna.create_study(direction='maximize', sampler=TPESampler(seed=42))
study_cat.optimize(lambda trial: optimize_catboost(trial, X_train_selected, y_opt), n_trials=30)
best_params['cat'] = study_cat.best_params

# Optimize Neural Network
print("Optimizing Neural Network...")
study_nn = optuna.create_study(direction='maximize', sampler=TPESampler(seed=42))
study_nn.optimize(lambda trial: optimize_neural_network(trial, X_train_standard, y_opt), n_trials=20)
best_params['nn'] = study_nn.best_params

print("Hyperparameter optimization completed!")
print("Best parameters:")
for model_name, params in best_params.items():
    print(f"{model_name}: {params}")

# Cross-Validation Setup
kf = KFold(n_splits=5, shuffle=True, random_state=42)
final_preds = np.zeros((X_test.shape[0], len(TARGETS)))

print("\nTraining Enhanced Oil Property Prediction Ensemble...")
print(f"Total features: {len(feat_cols)}")
print(f"Selected features: {len(selected_features)}")
print(f"Models: LightGBM, XGBoost, CatBoost, Random Forest, Extra Trees, SVR, Neural Network, Ridge, Elastic Net, Huber")

In [None]:
for i, target in enumerate(TARGETS):
    print(f"\n{'='*60}")
    print(f"Training for {target} ({i+1}/{len(TARGETS)})")
    print(f"{'='*60}")

    # Out-of-fold predictions for each model
    models_oof = {}
    models_test_preds = {}
    
    model_names = ['lgb', 'xgb', 'cat', 'rf', 'et', 'gb', 'svr', 'nn', 'ridge', 'elastic', 'huber', 'lasso', 'bayesian']
    
    for model_name in model_names:
        models_oof[model_name] = np.zeros(X_train.shape[0])
        models_test_preds[model_name] = np.zeros(X_test.shape[0])

    for fold, (tr_idx, val_idx) in enumerate(kf.split(X_train)):
        print(f"\nFold {fold + 1}/5")
        
        # Model 1: Optimized LightGBM
        model_lgb = LGBMRegressor(**best_params['lgb'])
        model_lgb.fit(
            X_train_selected.iloc[tr_idx], y_train[target].iloc[tr_idx],
            eval_set=[(X_train_selected.iloc[val_idx], y_train[target].iloc[val_idx])],
            callbacks=[early_stopping(stopping_rounds=100), log_evaluation(500)]
        )
        models_oof['lgb'][val_idx] = model_lgb.predict(X_train_selected.iloc[val_idx])
        models_test_preds['lgb'] += model_lgb.predict(X_test_selected) / kf.get_n_splits()

        # Model 2: Optimized XGBoost
        model_xgb = XGBRegressor(**best_params['xgb'])
        model_xgb.fit(
            X_train_selected.iloc[tr_idx], y_train[target].iloc[tr_idx],
            eval_set=[(X_train_selected.iloc[val_idx], y_train[target].iloc[val_idx])],
            early_stopping_rounds=100, verbose=False
        )
        models_oof['xgb'][val_idx] = model_xgb.predict(X_train_selected.iloc[val_idx])
        models_test_preds['xgb'] += model_xgb.predict(X_test_selected) / kf.get_n_splits()

        # Model 3: Optimized CatBoost
        model_cat = CatBoostRegressor(**best_params['cat'])
        model_cat.fit(
            X_train_selected.iloc[tr_idx], y_train[target].iloc[tr_idx],
            eval_set=[(X_train_selected.iloc[val_idx], y_train[target].iloc[val_idx])],
            early_stopping_rounds=100, verbose=False
        )
        models_oof['cat'][val_idx] = model_cat.predict(X_train_selected.iloc[val_idx])
        models_test_preds['cat'] += model_cat.predict(X_test_selected) / kf.get_n_splits()

        # Model 4: Enhanced Random Forest
        model_rf = RandomForestRegressor(
            n_estimators=1000, max_depth=25, min_samples_split=3,
            min_samples_leaf=1, max_features='sqrt', random_state=fold, n_jobs=-1
        )
        model_rf.fit(X_train_selected.iloc[tr_idx], y_train[target].iloc[tr_idx])
        models_oof['rf'][val_idx] = model_rf.predict(X_train_selected.iloc[val_idx])
        models_test_preds['rf'] += model_rf.predict(X_test_selected) / kf.get_n_splits()

        # Model 5: Enhanced Extra Trees
        model_et = ExtraTreesRegressor(
            n_estimators=800, max_depth=22, min_samples_split=2,
            min_samples_leaf=1, max_features='sqrt', random_state=fold, n_jobs=-1
        )
        model_et.fit(X_train_selected.iloc[tr_idx], y_train[target].iloc[tr_idx])
        models_oof['et'][val_idx] = model_et.predict(X_train_selected.iloc[val_idx])
        models_test_preds['et'] += model_et.predict(X_test_selected) / kf.get_n_splits()

        # Model 6: Enhanced Gradient Boosting
        model_gb = GradientBoostingRegressor(
            n_estimators=800, learning_rate=0.005, max_depth=7,
            min_samples_split=4, min_samples_leaf=2, max_features='sqrt',
            random_state=fold
        )
        model_gb.fit(X_train_selected.iloc[tr_idx], y_train[target].iloc[tr_idx])
        models_oof['gb'][val_idx] = model_gb.predict(X_train_selected.iloc[val_idx])
        models_test_preds['gb'] += model_gb.predict(X_test_selected) / kf.get_n_splits()

        # Model 7: Support Vector Regression
        model_svr = SVR(
            kernel='rbf', C=10.0, gamma='scale', epsilon=0.01
        )
        model_svr.fit(X_train_standard[tr_idx], y_train[target].iloc[tr_idx])
        models_oof['svr'][val_idx] = model_svr.predict(X_train_standard[val_idx])
        models_test_preds['svr'] += model_svr.predict(X_test_standard) / kf.get_n_splits()

        # Model 8: Optimized Neural Network
        model_nn = KerasRegressor(
            input_dim=X_train_standard.shape[1],
            **best_params['nn']
        )
        model_nn.fit(X_train_standard[tr_idx], y_train[target].iloc[tr_idx])
        models_oof['nn'][val_idx] = model_nn.predict(X_train_standard[val_idx])
        models_test_preds['nn'] += model_nn.predict(X_test_standard) / kf.get_n_splits()

        # Model 9: Enhanced Ridge
        model_ridge = Ridge(alpha=0.1, random_state=fold)
        model_ridge.fit(X_train_robust[tr_idx], y_train[target].iloc[tr_idx])
        models_oof['ridge'][val_idx] = model_ridge.predict(X_train_robust[val_idx])
        models_test_preds['ridge'] += model_ridge.predict(X_test_robust) / kf.get_n_splits()

        # Model 10: Enhanced Elastic Net
        model_elastic = ElasticNet(alpha=0.005, l1_ratio=0.2, random_state=fold, max_iter=3000)
        model_elastic.fit(X_train_standard[tr_idx], y_train[target].iloc[tr_idx])
        models_oof['elastic'][val_idx] = model_elastic.predict(X_train_standard[val_idx])
        models_test_preds['elastic'] += model_elastic.predict(X_test_standard) / kf.get_n_splits()

        # Model 11: Enhanced Huber
        model_huber = HuberRegressor(alpha=0.005, epsilon=1.2)
        model_huber.fit(X_train_robust[tr_idx], y_train[target].iloc[tr_idx])
        models_oof['huber'][val_idx] = model_huber.predict(X_train_robust[val_idx])
        models_test_preds['huber'] += model_huber.predict(X_test_robust) / kf.get_n_splits()

        # Model 12: Lasso Regression
        model_lasso = Lasso(alpha=0.01, random_state=fold, max_iter=3000)
        model_lasso.fit(X_train_standard[tr_idx], y_train[target].iloc[tr_idx])
        models_oof['lasso'][val_idx] = model_lasso.predict(X_train_standard[val_idx])
        models_test_preds['lasso'] += model_lasso.predict(X_test_standard) / kf.get_n_splits()

        # Model 13: Bayesian Ridge
        model_bayesian = BayesianRidge(alpha_1=1e-6, alpha_2=1e-6, lambda_1=1e-6, lambda_2=1e-6)
        model_bayesian.fit(X_train_standard[tr_idx], y_train[target].iloc[tr_idx])
        models_oof['bayesian'][val_idx] = model_bayesian.predict(X_train_standard[val_idx])
        models_test_preds['bayesian'] += model_bayesian.predict(X_test_standard) / kf.get_n_splits()

    # Calculate individual model MAPE scores
    mape_scores = {}
    for model_name in model_names:
        mape_scores[model_name] = mean_absolute_percentage_error(y_train[target], models_oof[model_name])

    # Advanced ensemble with multiple weighting strategies
    
    # Strategy 1: Exponential weighting based on validation performance
    exp_weights = {name: np.exp(-score * 5) for name, score in mape_scores.items()}
    total_exp_weight = sum(exp_weights.values())
    exp_weights = {name: w / total_exp_weight for name, w in exp_weights.items()}
    
    # Strategy 2: Inverse MAPE weighting
    inv_weights = {name: 1/score for name, score in mape_scores.items()}
    total_inv_weight = sum(inv_weights.values())
    inv_weights = {name: w / total_inv_weight for name, w in inv_weights.items()}
    
    # Strategy 3: Rank-based weighting
    sorted_models = sorted(mape_scores.items(), key=lambda x: x[1])
    rank_weights = {}
    for rank, (model_name, _) in enumerate(sorted_models):
        rank_weights[model_name] = (len(model_names) - rank) / sum(range(1, len(model_names) + 1))
    
    # Combine strategies
    final_weights = {}
    for model_name in model_names:
        final_weights[model_name] = (
            0.4 * exp_weights[model_name] + 
            0.4 * inv_weights[model_name] + 
            0.2 * rank_weights[model_name]
        )

    # Final ensemble prediction
    final_preds[:, i] = sum(
        final_weights[model_name] * models_test_preds[model_name] 
        for model_name in model_names
    )

    # Ensemble validation score
    ensemble_oof = sum(
        final_weights[model_name] * models_oof[model_name] 
        for model_name in model_names
    )
    ensemble_mape = mean_absolute_percentage_error(y_train[target], ensemble_oof)

    # Print detailed results
    print(f"\nIndividual Model Performance for {target}:")
    print("-" * 80)
    for model_name in sorted(mape_scores.keys(), key=lambda x: mape_scores[x]):
        print(f"{model_name.upper():>12}: MAPE={mape_scores[model_name]:.6f}, Weight={final_weights[model_name]:.4f}")
    
    print("-" * 80)
    print(f"{'ENSEMBLE':>12}: MAPE={ensemble_mape:.6f}")
    print(f"Best Single Model: {min(mape_scores.keys(), key=lambda x: mape_scores[x]).upper()}")
    print(f"Improvement: {min(mape_scores.values()) - ensemble_mape:.6f}")

# Create Enhanced Submission
submission = pd.DataFrame(final_preds, columns=TARGETS)
submission.insert(0, 'ID', test.get('ID', np.arange(1, len(test) + 1)))

# Apply post-processing to ensure realistic oil property values
for target in TARGETS:
    # Remove extreme outliers
    Q1 = submission[target].quantile(0.01)
    Q3 = submission[target].quantile(0.99)
    submission[target] = submission[target].clip(Q1, Q3)

submission.to_csv('submission_enhanced_oil_properties.csv', index=False)

print(f"\n{'='*80}")
print("ENHANCED OIL PROPERTY PREDICTION ENSEMBLE SUMMARY")
print(f"{'='*80}")
print(f"Total features engineered: {len(feat_cols)}")
print(f"Selected features used: {len(selected_features)}")
print(f"Models in ensemble: {len(model_names)}")
print("Models: LightGBM, XGBoost, CatBoost, Random Forest, Extra Trees,")
print("        Gradient Boosting, SVR, Neural Network, Ridge, Elastic Net,")
print("        Huber, Lasso, Bayesian Ridge")
print("Hyperparameter optimization: Optuna with 30+ trials per model")
print("Feature engineering: Oil-specific domain knowledge + polynomial features")
print("Ensemble strategy: Multi-weighted combination (exponential + inverse + rank)")
print("Cross-validation: 5-fold stratified")
print("Post-processing: Outlier clipping")
print(f"Submission file: submission_enhanced_oil_properties.csv")
print(f"{'='*80}")

# Feature importance analysis
print("\nFeature Importance Analysis:")
print("-" * 40)

# Get feature importance from best LightGBM model
temp_lgb = LGBMRegressor(**best_params['lgb'])
temp_lgb.fit(X_train_selected, y_train.iloc[:, 0])
feature_importance = pd.DataFrame({
    'feature': selected_features,
    'importance': temp_lgb.feature_importances_
}).sort_values('importance', ascending=False)

print("Top 20 Most Important Features:")
for i, (_, row) in enumerate(feature_importance.head(20).iterrows()):
    print(f"{i+1:2d}. {row['feature']:30s} {row['importance']:8.4f}")

print(f"\nModel training completed successfully!")
print(f"Enhanced submission ready for oil property prediction competition.")

In [None]:
# Model Performance Analysis and Validation
print("Performing cross-validation analysis...")

# Quick validation on a subset to estimate performance
sample_size = min(1000, len(X_train))
sample_idx = np.random.choice(len(X_train), sample_size, replace=False)

X_sample = X_train_selected.iloc[sample_idx]
y_sample = y_train.iloc[sample_idx, 0]  # Use first target for quick validation

# Test a few models quickly
quick_models = {
    'LightGBM': LGBMRegressor(n_estimators=100, random_state=42, verbose=-1),
    'XGBoost': XGBRegressor(n_estimators=100, random_state=42, verbosity=0),
    'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42),
    'Neural Network': MLPRegressor(hidden_layer_sizes=(100, 50), max_iter=200, random_state=42)
}

print(f"\nQuick validation on {sample_size} samples:")
print("-" * 50)

for name, model in quick_models.items():
    scores = cross_val_score(model, X_sample, y_sample, cv=3, scoring='neg_mean_absolute_percentage_error')
    print(f"{name:15s}: CV MAPE = {-scores.mean():.4f} (+/- {scores.std() * 2:.4f})")

print(f"\nDataset info:")
print(f"Training samples: {len(X_train):,}")
print(f"Test samples: {len(X_test):,}")
print(f"Total features: {len(feat_cols):,}")
print(f"Targets to predict: {len(TARGETS)}")

# Memory usage estimate
memory_usage = X_train.memory_usage(deep=True).sum() / 1024**2
print(f"Training data memory usage: {memory_usage:.1f} MB")

20.033333333333335

In [None]:
# Advanced Ensemble Analysis and Meta-Learning
print("Setting up advanced ensemble optimization...")

# Meta-features for stacking (if you want to implement stacking later)
def create_meta_features(X, models, cv_folds=3):
    """Create meta-features using cross-validation predictions"""
    meta_features = np.zeros((X.shape[0], len(models)))
    
    kf_meta = KFold(n_splits=cv_folds, shuffle=True, random_state=42)
    
    for i, (name, model) in enumerate(models.items()):
        meta_pred = np.zeros(X.shape[0])
        
        for train_idx, val_idx in kf_meta.split(X):
            model.fit(X.iloc[train_idx], y_train.iloc[train_idx, 0])  # Use first target
            meta_pred[val_idx] = model.predict(X.iloc[val_idx])
        
        meta_features[:, i] = meta_pred
        print(f"Generated meta-features for {name}")
    
    return meta_features

# Oil property-specific validation
def oil_property_validation(predictions, targets):
    """Validate predictions using oil industry constraints"""
    
    # Check for reasonable ranges (these would need to be adjusted based on actual oil properties)
    validation_results = {}
    
    for i, target in enumerate(targets):
        pred_col = predictions[:, i]
        
        # Basic statistics
        validation_results[target] = {
            'mean': np.mean(pred_col),
            'std': np.std(pred_col),
            'min': np.min(pred_col),
            'max': np.max(pred_col),
            'outliers_count': np.sum(np.abs(pred_col - np.mean(pred_col)) > 3 * np.std(pred_col))
        }
    
    return validation_results

# Model diversity analysis
def analyze_model_diversity(predictions_dict):
    """Analyze how diverse the model predictions are"""
    pred_matrix = np.column_stack(list(predictions_dict.values()))
    
    # Correlation between models
    correlation_matrix = np.corrcoef(pred_matrix.T)
    
    print("Model Diversity Analysis:")
    print("-" * 40)
    print(f"Average correlation between models: {np.mean(correlation_matrix[np.triu_indices_from(correlation_matrix, k=1)]):.4f}")
    print(f"Min correlation: {np.min(correlation_matrix[np.triu_indices_from(correlation_matrix, k=1)]):.4f}")
    print(f"Max correlation: {np.max(correlation_matrix[np.triu_indices_from(correlation_matrix, k=1)]):.4f}")
    
    return correlation_matrix

# Feature interaction analysis
def analyze_feature_interactions(X, y, top_features=10):
    """Analyze top feature interactions for oil properties"""
    
    # Use a simple model to get feature importance
    rf = RandomForestRegressor(n_estimators=100, random_state=42)
    rf.fit(X, y)
    
    feature_importance = pd.DataFrame({
        'feature': X.columns,
        'importance': rf.feature_importances_
    }).sort_values('importance', ascending=False)
    
    print(f"\nTop {top_features} Features for Oil Property Prediction:")
    print("-" * 60)
    
    for i, (_, row) in enumerate(feature_importance.head(top_features).iterrows()):
        print(f"{i+1:2d}. {row['feature'][:50]:50s} {row['importance']:8.6f}")
    
    return feature_importance

# Residual analysis
def analyze_residuals(y_true, y_pred, target_name):
    """Analyze prediction residuals"""
    residuals = y_true - y_pred
    
    print(f"\nResidual Analysis for {target_name}:")
    print("-" * 40)
    print(f"Mean residual: {np.mean(residuals):8.6f}")
    print(f"Std residual:  {np.std(residuals):8.6f}")
    print(f"Max residual:  {np.max(np.abs(residuals)):8.6f}")
    print(f"Skewness:      {skew(residuals):8.6f}")
    print(f"Kurtosis:      {kurtosis(residuals):8.6f}")
    
    return residuals

# Oil blending physics validation
def validate_blending_physics(component_fractions, predicted_properties):
    """Validate predictions against basic blending physics"""
    
    # Check that component fractions sum to 1 (approximately)
    fraction_sums = component_fractions.sum(axis=1)
    fraction_violations = np.sum(np.abs(fraction_sums - 1.0) > 0.01)
    
    # Check for conservation of mass principles
    conservation_score = np.mean(np.abs(fraction_sums - 1.0))
    
    print("Blending Physics Validation:")
    print("-" * 30)
    print(f"Fraction sum violations: {fraction_violations} out of {len(fraction_sums)}")
    print(f"Average deviation from unity: {conservation_score:.6f}")
    
    return fraction_violations, conservation_score

print("Advanced analysis functions ready!")
print("\nTo use these functions after training:")
print("1. oil_property_validation(final_preds, TARGETS)")
print("2. analyze_feature_interactions(X_train_selected, y_train.iloc[:, 0])")
print("3. validate_blending_physics(train[[f'Component{i}_fraction' for i in range(1, 6)]], final_preds)")

# Optuna study continuation (for further optimization)
def continue_optimization(study, objective_func, n_additional_trials=50):
    """Continue optimization with more trials"""
    print(f"Continuing optimization with {n_additional_trials} additional trials...")
    study.optimize(objective_func, n_trials=n_additional_trials)
    print(f"Best score after additional trials: {study.best_value:.6f}")
    return study.best_params

print("\nReady for enhanced oil property prediction!")
print("All advanced features, models, and analysis tools are prepared.")