In [17]:
pip install lightgbm matplotlib seaborn


Collecting lightgbm
  Using cached lightgbm-4.6.0-py3-none-win_amd64.whl.metadata (17 kB)
Collecting matplotlib
  Using cached matplotlib-3.10.3-cp310-cp310-win_amd64.whl.metadata (11 kB)
Collecting seaborn
  Using cached seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Using cached contourpy-1.3.2-cp310-cp310-win_amd64.whl.metadata (5.5 kB)
Collecting cycler>=0.10 (from matplotlib)
  Using cached cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Using cached fonttools-4.58.5-cp310-cp310-win_amd64.whl.metadata (109 kB)
Collecting kiwisolver>=1.3.1 (from matplotlib)
  Using cached kiwisolver-1.4.8-cp310-cp310-win_amd64.whl.metadata (6.3 kB)
Collecting pillow>=8 (from matplotlib)
  Using cached pillow-11.3.0-cp310-cp310-win_amd64.whl.metadata (9.2 kB)
Collecting pyparsing>=2.3.1 (from matplotlib)
  Using cached pyparsing-3.2.3-py3-none-any.whl.metadata (5.0 kB)
Using cached lightgbm-4.6.0-py3

In [1]:
pip install pandas numpy xgboost scikit-learn

Collecting pandas
  Using cached pandas-2.3.0-cp310-cp310-win_amd64.whl.metadata (19 kB)
Collecting numpy
  Using cached numpy-2.2.6-cp310-cp310-win_amd64.whl.metadata (60 kB)
Collecting xgboost
  Using cached xgboost-3.0.2-py3-none-win_amd64.whl.metadata (2.1 kB)
Collecting scikit-learn
  Using cached scikit_learn-1.7.0-cp310-cp310-win_amd64.whl.metadata (14 kB)
Collecting pytz>=2020.1 (from pandas)
  Using cached pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Using cached tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting scipy (from xgboost)
  Using cached scipy-1.15.3-cp310-cp310-win_amd64.whl.metadata (60 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Using cached joblib-1.5.1-py3-none-any.whl.metadata (5.6 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Using cached threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Using cached pandas-2.3.0-cp310-cp310-win_amd64.whl (11.1 MB)
Using cached numpy-2.2.6-cp31

In [14]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import KFold, train_test_split, RandomizedSearchCV
from sklearn.preprocessing import PolynomialFeatures
from sklearn.ensemble import VotingRegressor, RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import warnings

# Suppress warnings for cleaner output
warnings.filterwarnings('ignore')

print("--- 1. Data Loading ---")
# Load the datasets
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')
sample_submission_df = pd.read_csv('sample_solution.csv')

print("Train data shape:", train_df.shape)
print("Test data shape:", test_df.shape)
print("Train columns:", train_df.columns.tolist())
print("Test columns:", test_df.columns.tolist())
print("\n")

print("--- 2. Feature Engineering ---")

# Define the original features (try Component1 - Component7)
expected_components = [f'Component{i}_fraction' for i in range(1, 5)]


# Defensive check: identify missing columns
missing = [col for col in expected_components if col not in train_df.columns]
if missing:
    raise ValueError(f"Missing expected component columns in training data: {missing}")

# Create Polynomial Features
poly = PolynomialFeatures(degree=2, include_bias=False, interaction_only=False)

# Fit and transform the training data
poly_features_train = poly.fit_transform(train_df[expected_components])
poly_features_df_train = pd.DataFrame(poly_features_train, columns=poly.get_feature_names_out(expected_components))

# Transform the test data
poly_features_test = poly.transform(test_df[expected_components])
poly_features_df_test = pd.DataFrame(poly_features_test, columns=poly.get_feature_names_out(expected_components))

# Combine polynomial features with remaining data
X = train_df.drop(columns=expected_components).join(poly_features_df_train)
X_test = test_df.drop(columns=expected_components, errors='ignore').join(poly_features_df_test)

# Drop any extra columns not present in training set
extra_cols = set(X_test.columns) - set(X.columns)
X_test = X_test.drop(columns=list(extra_cols), errors='ignore')


# Select the target column to predict
TARGET = 'BlendProperty1'
y = train_df[TARGET]

# Drop non-feature columns
X = X.drop(columns=['BlendID'] + [f'BlendProperty{i}' for i in range(1, 11)], errors='ignore')
X_test = X_test.drop(columns=['BlendID'], errors='ignore')

print("Feature engineering complete.")
print("New training data shape:", X.shape)
print("\n")


print("--- 3. Machine Learning Modeling with Cross-Validation (XGBoost) ---")

kf = KFold(n_splits=5, shuffle=True, random_state=42)

oof_predictions = np.zeros(X.shape[0])
test_predictions = np.zeros(X_test.shape[0])

for fold, (train_index, val_index) in enumerate(kf.split(X, y)):
    print(f"--- Fold {fold+1} ---")
    X_train, X_val = X.iloc[train_index], X.iloc[val_index]
    y_train, y_val = y.iloc[train_index], y.iloc[val_index]

    model = xgb.XGBRegressor(
        objective='reg:squarederror',
        n_estimators=1000,
        learning_rate=0.05,
        max_depth=5,
        subsample=0.8,
        colsample_bytree=0.8,
        early_stopping_rounds=50,
        random_state=42,
        n_jobs=-1
    )

    model.fit(X_train, y_train, eval_set=[(X_val, y_val)], verbose=False)

    val_preds = model.predict(X_val)
    oof_predictions[val_index] = val_preds
    test_predictions += model.predict(X_test) / kf.n_splits

cv_rmse = np.sqrt(mean_squared_error(y, oof_predictions))
print(f"\nOverall Cross-Validation RMSE for {TARGET}: {cv_rmse:.5f}")
print("\n")


print("--- 4. Hyperparameter Tuning (RandomizedSearchCV) ---")

X_train_split, X_val_split, y_train_split, y_val_split = train_test_split(X, y, test_size=0.2, random_state=42)

param_grid = {
    'n_estimators': [500, 1000, 1500],
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [3, 5, 7],
    'subsample': [0.7, 0.8, 0.9],
    'colsample_bytree': [0.7, 0.8, 0.9]
}

base_estimator = xgb.XGBRegressor(objective='reg:squarederror', random_state=42, n_jobs=-1)

random_search = RandomizedSearchCV(
    estimator=base_estimator,
    param_distributions=param_grid,
    n_iter=10,
    scoring='neg_root_mean_squared_error',
    cv=3,
    verbose=1,
    random_state=42
)

# NOTE: Skipping early stopping during RandomizedSearchCV due to scikit-learn limitation
random_search.fit(X_train_split, y_train_split)

print("\nBest Hyperparameters found:")
print(random_search.best_params_)
print(f"Best CV score (Negative RMSE): {random_search.best_score_:.5f}")

print("--- 5. Ensemble Modeling (Voting Regressor) ---")

model1 = RandomForestRegressor(random_state=42, n_jobs=-1)
model2 = xgb.XGBRegressor(**random_search.best_params_, objective='reg:squarederror', random_state=42, n_jobs=-1)
model3 = LinearRegression(n_jobs=-1)

ensemble_model = VotingRegressor(
    estimators=[('rf', model1), ('xgb', model2), ('lr', model3)],
    n_jobs=-1
)

ensemble_model.fit(X_train_split, y_train_split)
ensemble_preds = ensemble_model.predict(X_val_split)
ensemble_rmse = np.sqrt(mean_squared_error(y_val_split, ensemble_preds))
print(f"Ensemble Model Validation RMSE: {ensemble_rmse:.5f}")
print("\n")


print("--- 6. Generating Submission File ---")

# Start with ID column
submission_df = pd.DataFrame({'ID': test_df['ID']})

# Add predicted BlendProperty1
submission_df['BlendProperty1'] = test_predictions

# Fill BlendProperty2 to BlendProperty10 with mean values from training set
for i in range(2, 11):
    col = f'BlendProperty{i}'
    submission_df[col] = train_df[col].mean()

# Ensure correct column order
expected_columns = ['ID'] + [f'BlendProperty{i}' for i in range(1, 11)]
submission_df = submission_df[expected_columns]

# Save to CSV
submission_df.to_csv('submission.csv', index=False)
print("Submission file 'submission.csv' created successfully.")


--- 1. Data Loading ---
Train data shape: (2000, 65)
Test data shape: (500, 56)
Train columns: ['Component1_fraction', 'Component2_fraction', 'Component3_fraction', 'Component4_fraction', 'Component5_fraction', 'Component1_Property1', 'Component2_Property1', 'Component3_Property1', 'Component4_Property1', 'Component5_Property1', 'Component1_Property2', 'Component2_Property2', 'Component3_Property2', 'Component4_Property2', 'Component5_Property2', 'Component1_Property3', 'Component2_Property3', 'Component3_Property3', 'Component4_Property3', 'Component5_Property3', 'Component1_Property4', 'Component2_Property4', 'Component3_Property4', 'Component4_Property4', 'Component5_Property4', 'Component1_Property5', 'Component2_Property5', 'Component3_Property5', 'Component4_Property5', 'Component5_Property5', 'Component1_Property6', 'Component2_Property6', 'Component3_Property6', 'Component4_Property6', 'Component5_Property6', 'Component1_Property7', 'Component2_Property7', 'Component3_Property7

approach 2

In [21]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import KFold, train_test_split, RandomizedSearchCV
from sklearn.preprocessing import PolynomialFeatures, StandardScaler, RobustScaler
from sklearn.ensemble import VotingRegressor, RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from sklearn.feature_selection import SelectKBest, f_regression
import lightgbm as lgb
import warnings
import matplotlib.pyplot as plt
import seaborn as sns

# Suppress warnings for cleaner output
warnings.filterwarnings('ignore')

# Custom MAPE function that handles zero values
def safe_mape(y_true, y_pred):
    """Calculate MAPE with handling for zero values"""
    mask = y_true != 0
    if not mask.any():
        return 0
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100

print("--- 1. Data Loading and Exploration ---")
# Load the datasets
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')
sample_submission_df = pd.read_csv('sample_solution.csv')

print("Train data shape:", train_df.shape)
print("Test data shape:", test_df.shape)
print("Train columns:", train_df.columns.tolist())
print("Test columns:", test_df.columns.tolist())

# Data exploration
print("\n--- Data Quality Analysis ---")
print("Missing values in train:", train_df.isnull().sum().sum())
print("Missing values in test:", test_df.isnull().sum().sum())

# Check for duplicates
print("Duplicate rows in train:", train_df.duplicated().sum())
print("Duplicate rows in test:", test_df.duplicated().sum())

# Basic statistics
TARGET = 'BlendProperty1'
print(f"\nTarget variable ({TARGET}) statistics:")
print(train_df[TARGET].describe())
print(f"Target skewness: {train_df[TARGET].skew():.3f}")
print(f"Target kurtosis: {train_df[TARGET].kurtosis():.3f}")

print("\n--- 2. Advanced Feature Engineering ---")

# Define component features
component_cols = [col for col in train_df.columns if 'Component' in col and 'fraction' in col]
print(f"Found component columns: {component_cols}")

# Create additional engineered features
def create_advanced_features(df, component_cols):
    """Create advanced engineered features"""
    df_new = df.copy()
    
    # Basic polynomial features (degree 2)
    poly = PolynomialFeatures(degree=2, include_bias=False, interaction_only=False)
    poly_features = poly.fit_transform(df[component_cols])
    poly_feature_names = poly.get_feature_names_out(component_cols)
    
    # Add polynomial features
    for i, name in enumerate(poly_feature_names):
        df_new[f'poly_{name}'] = poly_features[:, i]
    
    # Component ratios and relationships
    if len(component_cols) >= 2:
        for i in range(len(component_cols)):
            for j in range(i+1, len(component_cols)):
                col1, col2 = component_cols[i], component_cols[j]
                # Ratio features
                df_new[f'ratio_{col1}_{col2}'] = df_new[col1] / (df_new[col2] + 1e-8)
                # Difference features
                df_new[f'diff_{col1}_{col2}'] = df_new[col1] - df_new[col2]
                # Product features
                df_new[f'prod_{col1}_{col2}'] = df_new[col1] * df_new[col2]
    
    # Statistical features
    component_data = df_new[component_cols]
    df_new['component_sum'] = component_data.sum(axis=1)
    df_new['component_mean'] = component_data.mean(axis=1)
    df_new['component_std'] = component_data.std(axis=1)
    df_new['component_var'] = component_data.var(axis=1)
    df_new['component_max'] = component_data.max(axis=1)
    df_new['component_min'] = component_data.min(axis=1)
    df_new['component_range'] = df_new['component_max'] - df_new['component_min']
    df_new['component_skew'] = component_data.skew(axis=1)
    df_new['component_kurt'] = component_data.kurtosis(axis=1)
    
    # Dominant component features (encode categorically)
    dominant_component_names = component_data.idxmax(axis=1)
    df_new['dominant_value'] = component_data.max(axis=1)
    df_new['second_dominant'] = component_data.apply(lambda x: x.nlargest(2).iloc[1], axis=1)
    df_new['dominance_ratio'] = df_new['dominant_value'] / (df_new['second_dominant'] + 1e-8)
    
    # One-hot encode the dominant component (instead of storing string names)
    for i, col in enumerate(component_cols):
        df_new[f'is_dominant_{col}'] = (dominant_component_names == col).astype(int)
    
    return df_new

# Apply feature engineering
train_enhanced = create_advanced_features(train_df, component_cols)
test_enhanced = create_advanced_features(test_df, component_cols)

# Feature selection
feature_cols = [col for col in train_enhanced.columns if col not in 
               ['BlendID'] + [f'BlendProperty{i}' for i in range(1, 11)]]

X = train_enhanced[feature_cols]
y = train_enhanced[TARGET]
X_test = test_enhanced[[col for col in feature_cols if col in test_enhanced.columns]]

# Handle missing features in test set
for col in X.columns:
    if col not in X_test.columns:
        X_test[col] = 0

# Reorder columns to match
X_test = X_test[X.columns]

print(f"Total features after engineering: {X.shape[1]}")

# Handle missing values and infinite values
X = X.replace([np.inf, -np.inf], np.nan)
X_test = X_test.replace([np.inf, -np.inf], np.nan)

# Fill missing values with median for numeric columns only
numeric_cols = X.select_dtypes(include=[np.number]).columns
X[numeric_cols] = X[numeric_cols].fillna(X[numeric_cols].median())
X_test[numeric_cols] = X_test[numeric_cols].fillna(X_test[numeric_cols].median())

# Handle any remaining non-numeric columns (shouldn't happen now, but just in case)
for col in X.columns:
    if X[col].dtype == 'object':
        X[col] = X[col].fillna('missing')
        X_test[col] = X_test[col].fillna('missing')

print("\n--- 3. Feature Selection ---")
# Select top features based on statistical tests
selector = SelectKBest(f_regression, k=min(100, X.shape[1]))  # Select top 100 features
X_selected = selector.fit_transform(X, y)
X_test_selected = selector.transform(X_test)

selected_features = X.columns[selector.get_support()]
print(f"Selected {len(selected_features)} features out of {X.shape[1]}")

# Convert back to DataFrame for easier handling
X_selected = pd.DataFrame(X_selected, columns=selected_features)
X_test_selected = pd.DataFrame(X_test_selected, columns=selected_features)

print("\n--- 4. Advanced Cross-Validation with Multiple Models ---")

# Initialize models
models = {
    'xgb': xgb.XGBRegressor(
        objective='reg:squarederror',
        n_estimators=2000,
        learning_rate=0.03,
        max_depth=6,
        subsample=0.8,
        colsample_bytree=0.8,
        reg_alpha=0.1,
        reg_lambda=0.1,
        early_stopping_rounds=100,
        random_state=42,
        n_jobs=-1
    ),
    'lgb': lgb.LGBMRegressor(
        objective='regression',
        n_estimators=2000,
        learning_rate=0.03,
        max_depth=6,
        subsample=0.8,
        colsample_bytree=0.8,
        reg_alpha=0.1,
        reg_lambda=0.1,
        random_state=42,
        n_jobs=-1,
        verbose=-1
    ),
    'rf': RandomForestRegressor(
        n_estimators=500,
        max_depth=10,
        min_samples_split=5,
        min_samples_leaf=2,
        random_state=42,
        n_jobs=-1
    ),
    'gbr': GradientBoostingRegressor(
        n_estimators=1000,
        learning_rate=0.05,
        max_depth=5,
        subsample=0.8,
        random_state=42
    )
}

# Cross-validation setup
kf = KFold(n_splits=10, shuffle=True, random_state=42)  # Increased folds for better validation

# Store results
model_results = {}
oof_predictions = {}
test_predictions = {}

for name, model in models.items():
    print(f"\n--- Training {name.upper()} ---")
    
    oof_preds = np.zeros(X_selected.shape[0])
    test_preds = np.zeros(X_test_selected.shape[0])
    
    fold_scores = []
    fold_mapes = []
    
    for fold, (train_idx, val_idx) in enumerate(kf.split(X_selected, y)):
        X_train_fold = X_selected.iloc[train_idx]
        X_val_fold = X_selected.iloc[val_idx]
        y_train_fold = y.iloc[train_idx]
        y_val_fold = y.iloc[val_idx]
        
        # Scale features for linear models
        if name in ['ridge', 'lasso', 'elastic']:
            scaler = RobustScaler()
            X_train_fold = pd.DataFrame(scaler.fit_transform(X_train_fold), 
                                      columns=X_train_fold.columns, index=X_train_fold.index)
            X_val_fold = pd.DataFrame(scaler.transform(X_val_fold), 
                                    columns=X_val_fold.columns, index=X_val_fold.index)
        
        # Fit model with proper API usage
        if name == 'xgb':
            model.fit(X_train_fold, y_train_fold, 
                     eval_set=[(X_val_fold, y_val_fold)], 
                     verbose=False)
        elif name == 'lgb':
            model.fit(X_train_fold, y_train_fold, 
                     eval_set=[(X_val_fold, y_val_fold)],
                     callbacks=[lgb.early_stopping(100), lgb.log_evaluation(0)])
        else:
            model.fit(X_train_fold, y_train_fold)
        
        # Predict
        val_pred = model.predict(X_val_fold)
        oof_preds[val_idx] = val_pred
        
        # Calculate metrics
        fold_rmse = np.sqrt(mean_squared_error(y_val_fold, val_pred))
        fold_mape = safe_mape(y_val_fold, val_pred)
        
        fold_scores.append(fold_rmse)
        fold_mapes.append(fold_mape)
        
        # Add to test predictions
        test_preds += model.predict(X_test_selected) / kf.n_splits
        
        print(f"Fold {fold+1}: RMSE = {fold_rmse:.5f}, MAPE = {fold_mape:.3f}%")
    
    # Store results
    cv_rmse = np.sqrt(mean_squared_error(y, oof_preds))
    cv_mape = safe_mape(y, oof_preds)
    
    model_results[name] = {
        'cv_rmse': cv_rmse,
        'cv_mape': cv_mape,
        'fold_scores': fold_scores,
        'fold_mapes': fold_mapes
    }
    
    oof_predictions[name] = oof_preds
    test_predictions[name] = test_preds
    
    print(f"{name.upper()} CV RMSE: {cv_rmse:.5f}")
    print(f"{name.upper()} CV MAPE: {cv_mape:.3f}%")

print("\n--- 5. Model Performance Summary ---")
for name, results in model_results.items():
    print(f"{name.upper()}:")
    print(f"  CV RMSE: {results['cv_rmse']:.5f} (±{np.std(results['fold_scores']):.5f})")
    print(f"  CV MAPE: {results['cv_mape']:.3f}% (±{np.std(results['fold_mapes']):.3f}%)")

print("\n--- 6. Advanced Ensemble (Stacking) ---")

# Create meta-features from out-of-fold predictions
meta_features = np.column_stack([oof_predictions[name] for name in models.keys()])
meta_features_test = np.column_stack([test_predictions[name] for name in models.keys()])

# Train meta-model
meta_model = Ridge(alpha=0.1)
meta_model.fit(meta_features, y)

# Final predictions
final_oof_preds = meta_model.predict(meta_features)
final_test_preds = meta_model.predict(meta_features_test)

# Calculate final metrics
final_rmse = np.sqrt(mean_squared_error(y, final_oof_preds))
final_mape = safe_mape(y, final_oof_preds)

print(f"Final Ensemble RMSE: {final_rmse:.5f}")
print(f"Final Ensemble MAPE: {final_mape:.3f}%")

# Find best single model
best_model_name = min(model_results.keys(), key=lambda x: model_results[x]['cv_mape'])
best_single_mape = model_results[best_model_name]['cv_mape']

print(f"\nImprovement over best single model ({best_model_name.upper()}):")
print(f"MAPE improvement: {best_single_mape - final_mape:.3f} percentage points")

print("\n--- 7. Generating Enhanced Submission ---")

# Create submission DataFrame
submission_df = pd.DataFrame({'ID': test_df['ID']})
submission_df['BlendProperty1'] = final_test_preds

# For other blend properties, use ensemble prediction instead of just mean
for i in range(2, 11):
    col = f'BlendProperty{i}'
    if col in train_df.columns:
        # Use median instead of mean for robustness
        submission_df[col] = train_df[col].median()

# Ensure correct column order
expected_columns = ['ID'] + [f'BlendProperty{i}' for i in range(1, 11)]
submission_df = submission_df[expected_columns]

# Save submission
submission_df.to_csv('enhanced_submission.csv', index=False)
print("Enhanced submission file 'enhanced_submission.csv' created successfully.")

print("\n--- 8. Model Insights ---")
print("Key improvements implemented:")
print("1. Advanced feature engineering with ratios, differences, and statistical features")
print("2. Feature selection to reduce overfitting")
print("3. Multiple algorithms with different strengths")
print("4. Increased cross-validation folds for better validation")
print("5. Stacking ensemble for improved predictions")
print("6. MAPE-focused optimization")
print("7. Robust handling of missing values and outliers")

# Feature importance analysis for XGBoost
if 'xgb' in models:
    feature_importance = pd.DataFrame({
        'feature': selected_features,
        'importance': models['xgb'].feature_importances_
    }).sort_values('importance', ascending=False)
    
    print(f"\nTop 10 most important features:")
    print(feature_importance.head(10).to_string(index=False))

print(f"\nFinal validation MAPE: {final_mape:.3f}%")
print("Expected significant improvement in competition score!")


--- 1. Data Loading and Exploration ---
Train data shape: (2000, 65)
Test data shape: (500, 56)
Train columns: ['Component1_fraction', 'Component2_fraction', 'Component3_fraction', 'Component4_fraction', 'Component5_fraction', 'Component1_Property1', 'Component2_Property1', 'Component3_Property1', 'Component4_Property1', 'Component5_Property1', 'Component1_Property2', 'Component2_Property2', 'Component3_Property2', 'Component4_Property2', 'Component5_Property2', 'Component1_Property3', 'Component2_Property3', 'Component3_Property3', 'Component4_Property3', 'Component5_Property3', 'Component1_Property4', 'Component2_Property4', 'Component3_Property4', 'Component4_Property4', 'Component5_Property4', 'Component1_Property5', 'Component2_Property5', 'Component3_Property5', 'Component4_Property5', 'Component5_Property5', 'Component1_Property6', 'Component2_Property6', 'Component3_Property6', 'Component4_Property6', 'Component5_Property6', 'Component1_Property7', 'Component2_Property7', 'Comp