In [6]:
# Test robust import system for ultra-fast version
import os
import sys

print("🔧 Testing Ultra-Fast Kaggle Submission Imports...")

# Check if running on Kaggle
is_kaggle = os.path.exists('/kaggle')
print(f"🌐 Running on Kaggle: {is_kaggle}")

# Test Kaggle evaluation import with proper error handling
try:
    if is_kaggle:
        import kaggle_evaluation.mitsui_inference_server
        print("✅ Kaggle evaluation module imported successfully")
    else:
        print("⚠️ Local environment - kaggle_evaluation not needed")
        # Mock for local testing only
        class MockInferenceServer:
            def __init__(self): pass
        sys.modules['kaggle_evaluation'] = type('Module', (), {})()
        sys.modules['kaggle_evaluation.mitsui_inference_server'] = MockInferenceServer()
        print("✅ Mock inference server created for local testing")
except ImportError as e:
    print(f"❌ Import error: {e}")
    print("This is expected in local environment - will work on Kaggle")

print("🚀 Import system ready!")

🔧 Testing Ultra-Fast Kaggle Submission Imports...
🌐 Running on Kaggle: False
⚠️ Local environment - kaggle_evaluation not needed
✅ Mock inference server created for local testing
🚀 Import system ready!


In [7]:
# # Test the ultra-fast Kaggle submission version
# print("🧪 Testing Ultra-Fast Kaggle Submission")
# print("=" * 50)

# # Test configuration
# print(f"📊 Target columns: {CFG.NUM_TARGET_COLUMNS}")
# print(f"🚀 Data path: {data_path}")
# print(f"🔧 Kaggle environment: {CFG.is_kaggle_environment()}")

# # Test LightGBM parameters
# lgbm_params = CFG.get_lgbm_params()
# print(f"⚡ LightGBM estimators: {lgbm_params['n_estimators']}")
# print(f"⚡ Learning rate: {lgbm_params['learning_rate']}")

# # Test feature engineering
# print(f"🛠️ Rolling windows: {CFG.ROLLING_WINDOWS}")
# print(f"🛠️ Lag periods: {CFG.LAG_PERIODS}")

# # Test manager initialization
# print(f"📦 Manager class: {type(submission_manager).__name__}")
# print(f"🎯 Initialization attempted: {submission_manager.initialization_attempted}")

# print("✅ All tests passed - ready for Kaggle submission!")

In [8]:
# ===== ULTRA-FAST MITSUI FUSION KAGGLE SUBMISSION =====
# Optimized for <480 minute constraint with 15 estimators (vs 80)
# Minimal feature engineering for speed: 3 features per column

import numpy as np
import pandas as pd
import polars as pl
from pathlib import Path
import gc
import warnings
from typing import List, Dict, Tuple, Optional, Union
from sklearn.model_selection import KFold, train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_squared_error
import lightgbm as lgb
from scipy import stats
from scipy.stats import skew, kurtosis
import joblib
import os
import sys
import time
warnings.filterwarnings('ignore')

# Robust Kaggle evaluation import for deployment
is_kaggle = os.path.exists('/kaggle')
print(f"🌐 Environment: {'Kaggle' if is_kaggle else 'Local'}")

try:
    if is_kaggle:
        import kaggle_evaluation.mitsui_inference_server
        print("✅ Kaggle evaluation imported")
    else:
        # Mock for local testing only
        class MockInferenceServer:
            def __init__(self): pass
        if 'kaggle_evaluation' not in sys.modules:
            sys.modules['kaggle_evaluation'] = type('Module', (), {})()
            sys.modules['kaggle_evaluation.mitsui_inference_server'] = MockInferenceServer()
        print("⚠️ Local mode - mock server ready")
except ImportError as e:
    print(f"Import note: {e}")
    print("This will work correctly when deployed to Kaggle")

# ==== ULTRA-FAST Configuration for Kaggle ==== #
class FastConfig:
    """Ultra-fast configuration to meet 480-minute constraint"""
    
    # Model parameters - ULTRA FAST
    N_ESTIMATORS = 15      # Reduced from 80 for speed
    LEARNING_RATE = 0.3    # Higher LR to compensate
    MAX_DEPTH = 6          # Controlled depth
    MIN_CHILD_SAMPLES = 20
    SUBSAMPLE = 0.8
    COLSAMPLE_BYTREE = 0.8
    N_JOBS = 1            # Single thread for Kaggle
    
    # Feature engineering - MINIMAL
    MAX_FEATURES_PER_COL = 3  # Reduced from 5
    N_FOLDS = 3               # Reduced from 5
    
    # Data processing
    RANDOM_STATE = 42
    VALIDATION_SIZE = 0.1
    
    # Target guarantee
    REQUIRED_TARGETS = 424
    
    print(f"🚀 ULTRA-FAST CONFIG: {N_ESTIMATORS} estimators, {MAX_FEATURES_PER_COL} features/col")

# ==== FAST Data Paths for Kaggle ==== #
def get_data_path():
    """Get correct data path for environment"""
    if is_kaggle:
        return Path('/kaggle/input')
    else:
        # Try common local paths
        possible_paths = [
            Path('./data'),
            Path('../data'),
            Path('../../data'),
        ]
        for p in possible_paths:
            if p.exists():
                return p
        
        # Create mock path for testing
        mock_path = Path('./mock_data')
        mock_path.mkdir(exist_ok=True)
        print(f"⚠️ Created mock data path: {mock_path}")
        return mock_path

DATA_PATH = get_data_path()
print(f"📁 Data path: {DATA_PATH}")

# ==== ULTRA-FAST Feature Engineering (Minimal) ==== #
class FastFeatureEngineer:
    """Ultra-fast feature engineering with only 3 features per column"""
    
    def __init__(self, max_features=FastConfig.MAX_FEATURES_PER_COL):
        self.max_features = max_features
        self.feature_names = []
        
    def create_features(self, df: pd.DataFrame) -> pd.DataFrame:
        """Create minimal but effective features"""
        print(f"🔧 Fast feature engineering: {self.max_features} features per column...")
        
        result_df = df.copy()
        numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
        
        for col in numeric_cols:
            if col.startswith('target_'):
                continue
                
            # Only 3 fastest features
            # 1. Rolling mean (7 days)
            result_df[f'{col}_ma7'] = df[col].rolling(window=7, min_periods=1).mean()
            
            # 2. Lag 1
            result_df[f'{col}_lag1'] = df[col].shift(1)
            
            # 3. Simple ratio
            result_df[f'{col}_ratio'] = df[col] / (df[col].rolling(window=7, min_periods=1).mean() + 1e-8)
            
            self.feature_names.extend([f'{col}_ma7', f'{col}_lag1', f'{col}_ratio'])
        
        result_df = result_df.fillna(0)
        print(f"✅ Created {len(self.feature_names)} features")
        return result_df

# ==== ULTRA-FAST Predictor ==== #
class UltraFastPredictor:
    """Ultra-fast predictor optimized for Kaggle constraints"""
    
    def __init__(self):
        self.models = {}
        self.feature_engineer = FastFeatureEngineer()
        self.feature_importance = {}
        
    def prepare_features(self, df: pd.DataFrame) -> pd.DataFrame:
        """Fast feature preparation"""
        print("🔧 Preparing features (ultra-fast mode)...")
        
        # Basic feature engineering
        df = self.feature_engineer.create_features(df)
        
        # Get feature columns (exclude targets)
        feature_cols = [col for col in df.columns if not col.startswith('target_')]
        
        return df[feature_cols]
    
    def train_single_target(self, X: pd.DataFrame, y: pd.Series, target_name: str) -> lgb.LGBMRegressor:
        """Train single target with ultra-fast settings"""
        
        # Quick validation split
        X_train, X_val, y_train, y_val = train_test_split(
            X, y, test_size=FastConfig.VALIDATION_SIZE, 
            random_state=FastConfig.RANDOM_STATE
        )
        
        # Ultra-fast LightGBM
        model = lgb.LGBMRegressor(
            n_estimators=FastConfig.N_ESTIMATORS,
            learning_rate=FastConfig.LEARNING_RATE,
            max_depth=FastConfig.MAX_DEPTH,
            min_child_samples=FastConfig.MIN_CHILD_SAMPLES,
            subsample=FastConfig.SUBSAMPLE,
            colsample_bytree=FastConfig.COLSAMPLE_BYTREE,
            n_jobs=FastConfig.N_JOBS,
            random_state=FastConfig.RANDOM_STATE,
            verbose=-1
        )
        
        model.fit(
            X_train, y_train,
            eval_set=[(X_val, y_val)],
            callbacks=[lgb.early_stopping(10), lgb.log_evaluation(0)]
        )
        
        # Store feature importance
        self.feature_importance[target_name] = dict(zip(X.columns, model.feature_importances_))
        
        return model
    
    def train(self, df: pd.DataFrame):
        """Ultra-fast training pipeline"""
        print("🚀 Starting ULTRA-FAST training...")
        start_time = time.time()
        
        # Prepare features
        X = self.prepare_features(df)
        
        # Get target columns
        target_cols = [col for col in df.columns if col.startswith('target_')]
        
        # Ensure we have exactly 424 targets
        if len(target_cols) != FastConfig.REQUIRED_TARGETS:
            print(f"⚠️ Warning: Found {len(target_cols)} targets, expected {FastConfig.REQUIRED_TARGETS}")
        
        print(f"🎯 Training {len(target_cols)} targets with {len(X.columns)} features...")
        
        # Train all targets
        for i, target_col in enumerate(target_cols):
            if i % 50 == 0:
                elapsed = time.time() - start_time
                print(f"Progress: {i+1}/{len(target_cols)} targets ({elapsed:.1f}s)")
            
            y = df[target_col]
            self.models[target_col] = self.train_single_target(X, y, target_col)
            
            # Memory management
            if i % 100 == 0:
                gc.collect()
        
        total_time = time.time() - start_time
        print(f"✅ ULTRA-FAST training completed in {total_time:.1f}s")
        print(f"📊 Trained {len(self.models)} models")
    
    def predict(self, df: pd.DataFrame) -> pd.DataFrame:
        """Ultra-fast prediction"""
        print("🔮 Making ultra-fast predictions...")
        
        X = self.prepare_features(df)
        predictions = {}
        
        for target_col, model in self.models.items():
            predictions[target_col] = model.predict(X)
        
        result_df = pd.DataFrame(predictions, index=df.index)
        
        # Ensure we have exactly 424 columns
        if len(result_df.columns) != FastConfig.REQUIRED_TARGETS:
            print(f"⚠️ Prediction shape: {result_df.shape}, expected 424 columns")
        
        print(f"✅ Predictions shape: {result_df.shape}")
        return result_df

# ==== FAST Model Manager ==== #
class FastModelManager:
    """Manage ultra-fast model lifecycle"""
    
    def __init__(self):
        self.predictor = UltraFastPredictor()
        self.is_trained = False
        
    def train_on_data(self, df: pd.DataFrame):
        """Train with ultra-fast settings"""
        print("🚀 FastModelManager: Ultra-fast training...")
        self.predictor.train(df)
        self.is_trained = True
        print("✅ Ultra-fast model ready!")
        
    def predict_batch(self, df: pd.DataFrame) -> pd.DataFrame:
        """Fast batch prediction"""
        if not self.is_trained:
            raise ValueError("Model not trained yet!")
            
        return self.predictor.predict(df)
    
    def get_model_info(self) -> Dict:
        """Get ultra-fast model information"""
        return {
            'model_type': 'ultra_fast_lgb',
            'n_estimators': FastConfig.N_ESTIMATORS,
            'n_targets': len(self.predictor.models),
            'n_features': len(self.predictor.feature_engineer.feature_names),
            'is_trained': self.is_trained
        }

print("🚀 ULTRA-FAST Mitsui Fusion System Ready!")
print(f"⚡ Configuration: {FastConfig.N_ESTIMATORS} estimators, {FastConfig.MAX_FEATURES_PER_COL} features/col")
print(f"🎯 Target guarantee: {FastConfig.REQUIRED_TARGETS} columns")

🌐 Environment: Local
⚠️ Local mode - mock server ready
🚀 ULTRA-FAST CONFIG: 15 estimators, 3 features/col
⚠️ Created mock data path: mock_data
📁 Data path: mock_data
🚀 ULTRA-FAST Mitsui Fusion System Ready!
⚡ Configuration: 15 estimators, 3 features/col
🎯 Target guarantee: 424 columns


In [9]:
# Test the ultra-fast system with mock data
print("🧪 Testing Ultra-Fast System...")

# Create mock training data
np.random.seed(42)
n_samples = 100
n_features = 10

# Create mock data with some basic patterns
mock_data = {}
for i in range(n_features):
    mock_data[f'feature_{i}'] = np.random.randn(n_samples) + np.sin(np.arange(n_samples) * 0.1)

# Add 424 target columns (required)
for i in range(424):
    # Targets correlated with features but with noise
    target_val = (mock_data['feature_0'] * 0.3 + 
                  mock_data['feature_1'] * 0.2 + 
                  np.random.randn(n_samples) * 0.5)
    mock_data[f'target_{i}'] = target_val

mock_df = pd.DataFrame(mock_data)
print(f"📊 Mock data shape: {mock_df.shape}")

# Test the ultra-fast model manager
manager = FastModelManager()
print(f"📋 Model info before training: {manager.get_model_info()}")

# Quick training test (on small subset for speed)
small_df = mock_df.iloc[:20].copy()  # Very small for quick test
print(f"🏃‍♂️ Quick training on {small_df.shape[0]} samples...")

start_time = time.time()
manager.train_on_data(small_df)
training_time = time.time() - start_time

print(f"⏱️ Training time: {training_time:.2f}s")
print(f"📋 Model info after training: {manager.get_model_info()}")

# Test prediction
print("🔮 Testing prediction...")
pred_df = manager.predict_batch(small_df.iloc[:5])
print(f"✅ Prediction shape: {pred_df.shape}")
print(f"🎯 Has 424 columns: {pred_df.shape[1] == 424}")

print("\n🚀 ULTRA-FAST SYSTEM TEST COMPLETE!")
print("✅ All components working correctly")
print("✅ Ready for Kaggle deployment")

🧪 Testing Ultra-Fast System...
📊 Mock data shape: (100, 434)
📋 Model info before training: {'model_type': 'ultra_fast_lgb', 'n_estimators': 15, 'n_targets': 0, 'n_features': 0, 'is_trained': False}
🏃‍♂️ Quick training on 20 samples...
🚀 FastModelManager: Ultra-fast training...
🚀 Starting ULTRA-FAST training...
🔧 Preparing features (ultra-fast mode)...
🔧 Fast feature engineering: 3 features per column...
✅ Created 30 features
🎯 Training 424 targets with 40 features...
Progress: 1/424 targets (0.0s)
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.0894485
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.0167968
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.0876188
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.340154
Tr