In [None]:
# ==== ULTRA-FAST KAGGLE SUBMISSION VERSION ==== #
# Optimized for <480 minute time limit
import os
import numpy as np
import pandas as pd
import polars as pl
from pathlib import Path
import lightgbm as lgb
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from scipy.stats import spearmanr
from sklearn.base import BaseEstimator, TransformerMixin
import pickle
import json
from typing import Dict, List, Optional, Tuple
import warnings
warnings.filterwarnings('ignore')

# Kaggle evaluation - ENABLED FOR SUBMISSION
import kaggle_evaluation.mitsui_inference_server

# ==== ULTRA-FAST Configuration for Kaggle ==== #
class FastConfig:
    """Ultra-fast configuration optimized for speed"""
    NUM_TARGET_COLUMNS = 424
    RANDOM_STATE = 42
    CV_FOLDS = 1  # Minimal cross-validation for speed
    
    # Ultra-fast model parameters
    LGBM_PARAMS_FAST = {
        'n_estimators': 15,         # Drastically reduced
        'learning_rate': 0.3,       # Higher for faster convergence
        'num_leaves': 10,           # Minimal complexity
        'max_depth': 3,             # Shallow trees
        'random_state': RANDOM_STATE,
        'verbose': -1,
        'n_jobs': 1,                # Single thread to avoid overhead
        'force_row_wise': True,
        'min_child_samples': 100,   # Faster training
        'subsample': 0.7,           # Speed up training
        'feature_fraction': 0.7,
    }
    
    # Minimal feature engineering
    ROLLING_WINDOWS = [5]  # Single window only
    LAG_PERIODS = [1]      # Single lag only
    
    # Ultra-fast training configuration
    MAX_COMPLEX_MODELS = 0       # No stacking - too slow
    MAX_SIMPLE_MODELS = 8        # Minimal LightGBM models
    MIN_SAMPLES_REQUIRED = 200   # Higher threshold
    
    # Minimal validation
    EARLY_STOPPING_ROUNDS = 5
    VALIDATION_SPLIT = 0.1       # Smaller validation
    
    @staticmethod
    def get_data_path():
        kaggle_path = Path('/kaggle/input/mitsui-commodity-prediction-challenge')
        local_path = Path("dataset")
        
        if kaggle_path.exists():
            print("🚀 Kaggle environment detected - SPEED MODE")
            return kaggle_path
        else:
            print("🚀 Local development - SPEED MODE")
            return local_path
    
    @staticmethod
    def get_lgbm_params():
        return FastConfig.LGBM_PARAMS_FAST.copy()

CFG = FastConfig()
data_path = CFG.get_data_path()

# ==== MINIMAL Feature Engineering Pipeline ==== #
class FastFeatureEngineer(BaseEstimator, TransformerMixin):
    """Ultra-fast feature engineering - minimal features only"""
    
    def __init__(self):
        self.feature_names_ = []
        
    def fit(self, X, y=None):
        self.feature_names_ = [c for c in X.columns if c != 'date_id']
        return self
    
    def transform(self, X):
        return self._create_minimal_features(X.copy())
    
    def fit_transform(self, X, y=None):
        return self.fit(X, y).transform(X)
    
    def _create_minimal_features(self, df):
        """Create only essential features for speed"""
        feature_cols = [c for c in df.columns if c != 'date_id']
        
        # Only create 3 features per column for speed
        for col in feature_cols:
            try:
                # Most essential features only
                df[f'{col}_ma5'] = df[col].rolling(5).mean()
                df[f'{col}_pct'] = df[col].pct_change()
                df[f'{col}_lag1'] = df[col].shift(1)
            except Exception:
                continue
        
        df = df.fillna(0)
        print(f"⚡ Fast features: {len(df.columns) - len(feature_cols)} added")
        return df

# ==== ULTRA-FAST Model Management ==== #
class FastModelManager:
    """Ultra-fast model management"""
    
    def __init__(self):
        self.models = {}
        self.feature_columns = {}
        
    def calculate_target_importance_fast(self, train_labels_df):
        """Vectorized importance calculation"""
        target_columns = [col for col in train_labels_df.columns if col.startswith('target_')]
        
        # Vectorized - much faster than loops
        variances = train_labels_df[target_columns].var()
        non_null_ratios = train_labels_df[target_columns].notna().mean()
        importance_scores = (variances * non_null_ratios).fillna(0).tolist()
        
        return importance_scores
    
    def select_fast_strategies(self, target_columns, importance_scores):
        """Fast strategy selection"""
        target_scores = list(zip(target_columns, importance_scores))
        target_scores.sort(key=lambda x: x[1], reverse=True)
        
        strategies = {}
        for i, (target, score) in enumerate(target_scores):
            if i < CFG.MAX_SIMPLE_MODELS:
                strategies[target] = 'lightgbm'
            else:
                strategies[target] = 'linear'  # Most targets use linear
                
        return strategies
    
    def train_fast_lightgbm(self, X, y, target_name):
        """Minimal LightGBM training"""
        try:
            lgbm_params = CFG.get_lgbm_params()
            model = lgb.LGBMRegressor(**lgbm_params)
            
            # No validation split - train on all data for speed
            model.fit(X, y, eval_set=[(X.sample(min(100, len(X))), 
                                     y.iloc[:min(100, len(y))])],
                     callbacks=[lgb.early_stopping(CFG.EARLY_STOPPING_ROUNDS),
                               lgb.log_evaluation(0)])
            
            self.models[target_name] = model
            self.feature_columns[target_name] = X.columns.tolist()
            return model
            
        except Exception as e:
            return self.train_linear_model(X, y, target_name)
    
    def train_linear_model(self, X, y, target_name):
        """Ultra-fast linear model"""
        model = Ridge(alpha=0.1, random_state=CFG.RANDOM_STATE)
        model.fit(X, y)
        
        self.models[target_name] = model
        self.feature_columns[target_name] = X.columns.tolist()
        return model

# ==== Minimal Stabilization ==== #
def fast_stabilize_predictions(out_df, date_ids=None):
    """Fast prediction stabilization"""
    out_df = out_df.astype(np.float32)
    out_df[:] = np.nan_to_num(out_df.values, nan=0.0, posinf=0.0, neginf=0.0)
    
    # Quick flat row detection
    row_stds = np.std(out_df.values, axis=1)
    flat_mask = row_stds < 1e-10
    
    if np.any(flat_mask):
        # Add minimal noise to flat rows
        noise = np.random.normal(0, 1e-6, out_df.shape)
        out_df.iloc[flat_mask] += noise[flat_mask]
        
    return out_df

# ==== ULTRA-FAST Pipeline Predictor ==== #
class UltraFastPredictor:
    """Ultra-fast prediction pipeline"""
    
    def __init__(self):
        self.feature_pipeline = None
        self.model_manager = FastModelManager()
        self.is_fitted = False
        self.global_mean = 0.0
    
    def fit(self, train_df, train_labels_df):
        """Ultra-fast fitting"""
        print("🚀 Ultra-fast pipeline fitting...")
        
        # Minimal feature engineering
        self.feature_pipeline = FastFeatureEngineer()
        X_train = self.feature_pipeline.fit_transform(train_df)
        X_train = X_train.drop(columns=['date_id'])
        
        # Calculate global fallback
        target_columns = [col for col in train_labels_df.columns if col.startswith('target_')]
        self.global_mean = train_labels_df[target_columns].mean().mean()
        
        # Fast target selection
        importance_scores = self.model_manager.calculate_target_importance_fast(train_labels_df)
        strategies = self.model_manager.select_fast_strategies(target_columns, importance_scores)
        
        print(f"📊 Training {len(target_columns)} targets:")
        print(f"   - LightGBM: {sum(1 for s in strategies.values() if s == 'lightgbm')}")
        print(f"   - Linear: {sum(1 for s in strategies.values() if s == 'linear')}")
        
        # Fast training with minimal validation
        trained = 0
        for target in target_columns:
            strategy = strategies.get(target, 'linear')
            
            y = train_labels_df[target].dropna()
            if len(y) < CFG.MIN_SAMPLES_REQUIRED:
                continue
                
            common_idx = X_train.index.intersection(y.index)
            X_aligned = X_train.loc[common_idx].fillna(0)
            y_aligned = y.loc[common_idx]
            
            try:
                if strategy == 'lightgbm':
                    self.model_manager.train_fast_lightgbm(X_aligned, y_aligned, target)
                else:
                    self.model_manager.train_linear_model(X_aligned, y_aligned, target)
                trained += 1
            except Exception:
                continue
        
        print(f"✅ Trained {trained} models in minimal time")
        self.is_fitted = True
    
    def predict(self, test: pl.DataFrame, *label_lags) -> pl.DataFrame:
        """Ultra-fast prediction"""
        if not self.is_fitted:
            raise ValueError("Must fit before predict")
            
        test_df = test.to_pandas()
        X_test = self.feature_pipeline.transform(test_df)
        X_test = X_test.drop(columns=['date_id'])
        
        # Fast prediction matrix
        predictions = np.full((len(test_df), CFG.NUM_TARGET_COLUMNS), self.global_mean)
        
        for i in range(CFG.NUM_TARGET_COLUMNS):
            target_name = f"target_{i}"
            
            if target_name in self.model_manager.models:
                try:
                    model = self.model_manager.models[target_name]
                    feature_cols = self.model_manager.feature_columns[target_name]
                    X_aligned = X_test[feature_cols].fillna(0)
                    predictions[:, i] = model.predict(X_aligned)
                except Exception:
                    # Keep global mean fallback
                    pass
        
        # Fast output creation
        column_names = [f"target_{i}" for i in range(CFG.NUM_TARGET_COLUMNS)]
        out_df = pd.DataFrame(predictions, columns=column_names)
        out_df = fast_stabilize_predictions(out_df)
        
        # Verification
        assert out_df.shape[1] == CFG.NUM_TARGET_COLUMNS
        assert out_df.shape[0] == len(test_df)
        
        return pl.DataFrame(out_df)

# ==== ULTRA-FAST Submission Manager ==== #
class UltraFastSubmissionManager:
    """Speed-optimized submission manager"""
    
    def __init__(self):
        self.predictor = None
        self.initialization_attempted = False
        
    def initialize_for_submission(self):
        """Minimal initialization"""
        if self.initialization_attempted:
            return
            
        try:
            print("🚀 Ultra-fast initialization starting...")
            self.predictor = UltraFastPredictor()
            
            # Load data with minimal processing
            train_df = pd.read_csv(data_path / 'train.csv')
            train_labels_df = pd.read_csv(data_path / 'train_labels.csv')
            
            # Sample data for speed if too large
            if len(train_df) > 5000:
                sample_idx = np.random.choice(len(train_df), 5000, replace=False)
                train_df = train_df.iloc[sample_idx]
                train_labels_df = train_labels_df.iloc[sample_idx]
                print(f"📊 Sampled to {len(train_df)} rows for speed")
            
            self.predictor.fit(train_df, train_labels_df)
            print("✅ Ultra-fast initialization completed")
            
        except Exception as e:
            print(f"❌ Init failed: {e}")
            self.predictor = self._create_minimal_fallback()
            
        finally:
            self.initialization_attempted = True
    
    def _create_minimal_fallback(self):
        """Minimal fallback predictor"""
        class MinimalFallback:
            def predict(self, test, *args):
                n_samples = len(test)
                # Simple random predictions
                preds = np.random.normal(0, 0.01, (n_samples, CFG.NUM_TARGET_COLUMNS))
                columns = [f"target_{i}" for i in range(CFG.NUM_TARGET_COLUMNS)]
                return pl.DataFrame(pd.DataFrame(preds, columns=columns))
        
        return MinimalFallback()
    
    def predict(self, test, *label_lags):
        """Main prediction with verification"""
        if not self.initialization_attempted:
            self.initialize_for_submission()
            
        result = self.predictor.predict(test, *label_lags)
        
        # Final verification
        assert result.shape[1] == CFG.NUM_TARGET_COLUMNS
        assert result.shape[0] == len(test)
        
        return result

# Global ultra-fast manager
submission_manager = UltraFastSubmissionManager()

def predict(test: pl.DataFrame,
           label_lags_1_batch: pl.DataFrame,
           label_lags_2_batch: pl.DataFrame,
           label_lags_3_batch: pl.DataFrame,
           label_lags_4_batch: pl.DataFrame) -> pl.DataFrame:
    """Ultra-fast predict function"""
    result = submission_manager.predict(test, label_lags_1_batch, 
                                      label_lags_2_batch, label_lags_3_batch, 
                                      label_lags_4_batch)
    
    # Safety check for exact column count
    if result.shape[1] != CFG.NUM_TARGET_COLUMNS:
        if result.shape[1] < CFG.NUM_TARGET_COLUMNS:
            missing = CFG.NUM_TARGET_COLUMNS - result.shape[1]
            zeros = pl.DataFrame({f"target_{result.shape[1] + i}": [0.0] * len(result) 
                                for i in range(missing)})
            result = pl.concat([result, zeros], how="horizontal")
        else:
            result = result.select([f"target_{i}" for i in range(CFG.NUM_TARGET_COLUMNS)])
    
    print(f"⚡ Ultra-fast prediction: {result.shape[0]} × {result.shape[1]}")
    return result

# Kaggle Inference Server - ENABLED
inference_server = kaggle_evaluation.mitsui_inference_server.MitsuiInferenceServer(predict)

if os.getenv("KAGGLE_IS_COMPETITION_RERUN"):
    inference_server.serve()
else:
    inference_server.run_local_gateway((str(data_path),))