In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model, optimizers, callbacks
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import mean_absolute_percentage_error
import warnings
import time
import os
import gc
import optuna

# --- Environment Setup ---
warnings.filterwarnings('ignore')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
optuna.logging.set_verbosity(optuna.logging.WARNING)

# Set up GPU if available
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"TensorFlow is using GPU: {gpus[0].name}")
        TF_DEVICE = "/GPU:0"
    except RuntimeError as e:
        print(e)
        TF_DEVICE = "/CPU:0"
else:
    print("TensorFlow is using CPU.")
    TF_DEVICE = "/CPU:0"

# =========================================================================
# The Standalone TabTransformerPredictor
# =========================================================================
class TabTransformerPredictor:
    
    # --- Custom Keras Layers for the Architecture ---
    class CLSToken(layers.Layer):
        def __init__(self, embed_dim, **kwargs):
            super().__init__(**kwargs)
            self.embed_dim = embed_dim
            self.cls_token = self.add_weight(shape=[1, 1, self.embed_dim], initializer='random_normal', name='cls_token')
        def call(self, inputs):
            batch_size = tf.shape(inputs)[0]
            cls_broadcasted = tf.tile(self.cls_token, [batch_size, 1, 1])
            return tf.concat([cls_broadcasted, inputs], axis=1)

    class TransformerBlock(layers.Layer):
        def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
            super().__init__()
            self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
            self.ffn = tf.keras.Sequential([layers.Dense(ff_dim, activation="gelu"), layers.Dense(embed_dim)])
            self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
            self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
            self.dropout1 = layers.Dropout(rate)
            self.dropout2 = layers.Dropout(rate)
        def call(self, inputs, training=None):
            attn_output = self.att(inputs, inputs)
            attn_output = self.dropout1(attn_output, training=training)
            out1 = self.layernorm1(inputs + attn_output)
            ffn_output = self.ffn(out1)
            ffn_output = self.dropout2(ffn_output, training=training)
            return self.layernorm2(out1 + ffn_output)

    def __init__(self, best_params=None):
        self.model = None
        self.scalers = {}
        self.best_features = None
        self.target_names = None
        self.is_fitted = False
        self.params = best_params if best_params else {}

    def _create_features(self, df):
        print("  - Generating features...")
        df_features = df.copy()
        fractions = [f'Component{i}_fraction' for i in range(1, 6)]
        for p in range(1, 11):
            prop_cols = [f'Component{c}_Property{p}' for c in range(1, 6)]
            df_features[f'prop_{p}_weighted_avg'] = np.sum(df[fractions].values * df[prop_cols].values, axis=1)
        for col in df_features.columns:
            if df_features[col].dtype == 'float64':
                df_features[col] = df_features[col].astype(np.float32)
        return df_features

    def _create_model(self, input_shape, output_shape):
        # Use tuned parameters if available, otherwise use defaults
        EMBED_DIM = self.params.get('embed_dim', 64)
        NUM_HEADS = self.params.get('num_heads', 8)
        FF_DIM = self.params.get('ff_dim', 128)
        NUM_TRANSFORMER_BLOCKS = self.params.get('num_blocks', 4)
        MLP_UNITS = self.params.get('mlp_units', [256, 128])
        DROPOUT_RATE = self.params.get('dropout_rate', 0.2)
        
        inputs = layers.Input(shape=(input_shape,))
        x = layers.Reshape((input_shape, 1))(inputs)
        x = layers.Dense(EMBED_DIM)(x)
        x = self.CLSToken(EMBED_DIM)(x)
        for _ in range(NUM_TRANSFORMER_BLOCKS):
            x = self.TransformerBlock(EMBED_DIM, NUM_HEADS, FF_DIM, DROPOUT_RATE)(x)
        representation = x[:, 0, :]
        for dim in MLP_UNITS:
            representation = layers.Dense(dim, activation="gelu")(representation)
            representation = layers.Dropout(DROPOUT_RATE)(representation)
        outputs = layers.Dense(output_shape, name="output")(representation)
        return Model(inputs=inputs, outputs=outputs)

    def fit(self, X, y):
        print("--- Fitting Final TabTransformerPredictor with Best Parameters ---")
        start_time = time.time()
        self.target_names = y.columns.tolist()
        X_featured = self._create_features(X)
        self.best_features = X_featured.columns.tolist()
        self.scalers['feature_scaler'] = RobustScaler()
        X_scaled = pd.DataFrame(self.scalers['feature_scaler'].fit_transform(X_featured), columns=self.best_features, dtype=np.float32)
        self.scalers['target_scaler'] = RobustScaler()
        y_scaled = pd.DataFrame(self.scalers['target_scaler'].fit_transform(y), columns=y.columns, dtype=np.float32)
        del X_featured; gc.collect()

        with tf.device(TF_DEVICE):
            self.model = self._create_model(X_scaled.shape[1], y_scaled.shape[1])
            lr = self.params.get('learning_rate', 1e-3)
            optimizer = optimizers.AdamW(learning_rate=lr, weight_decay=1e-5)
            self.model.compile(loss='mean_squared_error', optimizer=optimizer)
            
            # For the final fit, we don't need early stopping as we use the whole dataset
            self.model.fit(X_scaled, y_scaled, epochs=200, batch_size=64, verbose=1)
        
        self.is_fitted = True
        print(f"\nCompleted final model fitting in {time.time() - start_time:.2f}s\n")
        return self

    def predict(self, X):
        if not self.is_fitted: raise RuntimeError("You must fit the model before predicting.")
        print("  - Predicting with Final TabTransformer...")
        X_featured = self._create_features(X)
        X_scaled = pd.DataFrame(self.scalers['feature_scaler'].transform(X_featured), columns=self.best_features, dtype=np.float32)
        with tf.device(TF_DEVICE):
            preds_scaled = self.model.predict(X_scaled, verbose=0)
        preds_orig = self.scalers['target_scaler'].inverse_transform(preds_scaled)
        return pd.DataFrame(preds_orig, columns=self.target_names)

# =========================================================================
# Main Execution Block with Optuna Optimization
# =========================================================================
def main():
    print("🚀 Starting Fuel Blending ML Pipeline: Stage 3 - TabTransformer with Optuna Tuning")
    try:
        train_df = pd.read_csv('/kaggle/input/training/train.csv')
        test_df = pd.read_csv('/kaggle/input/testing/test.csv')
        for df in [train_df, test_df]:
            for col in df.select_dtypes(include=['float64']).columns:
                df[col] = df[col].astype(np.float32)
    except FileNotFoundError as e:
        print(f"Error loading data: {e}. Please check file paths.")
        return
    
    target_columns = [col for col in train_df.columns if 'BlendProperty' in col]
    feature_columns = [col for col in train_df.columns if col not in target_columns and 'ID' not in col]
    
    X, y = train_df[feature_columns], train_df[target_columns]
    X_test = test_df[feature_columns]

    # --- Pre-computation outside the Optuna loop for efficiency ---
    temp_predictor = TabTransformerPredictor()
    X_featured = temp_predictor._create_features(X)
    feature_scaler = RobustScaler().fit(X_featured)
    X_scaled = pd.DataFrame(feature_scaler.transform(X_featured), columns=X_featured.columns, dtype=np.float32)
    target_scaler = RobustScaler().fit(y)
    y_scaled = pd.DataFrame(target_scaler.transform(y), columns=y.columns, dtype=np.float32)
    X_train, X_val, y_train, y_val = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)
    del X_featured; gc.collect()

    def objective(trial):
        params = {
            'embed_dim': trial.suggest_categorical('embed_dim', [32, 64, 128]),
            'num_heads': trial.suggest_categorical('num_heads', [4, 8]),
            'ff_dim': trial.suggest_categorical('ff_dim', [128, 256]),
            'num_blocks': trial.suggest_int('num_blocks', 2, 6),
            'mlp_units': trial.suggest_categorical('mlp_units', [[128, 64], [256, 128]]),
            'dropout_rate': trial.suggest_float('dropout_rate', 0.1, 0.4),
            'learning_rate': trial.suggest_float('learning_rate', 1e-4, 1e-2, log=True),
        }
        
        with tf.device(TF_DEVICE):
            # Build and train a temporary model for this trial
            trial_predictor = TabTransformerPredictor(best_params=params)
            model = trial_predictor._create_model(X_train.shape[1], y_train.shape[1])
            optimizer = optimizers.AdamW(learning_rate=params['learning_rate'], weight_decay=1e-5)
            model.compile(loss='mean_squared_error', optimizer=optimizer)
            
            callbacks_list = [callbacks.EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)]
            
            model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=200, batch_size=64,
                      callbacks=callbacks_list, verbose=0) # verbose=0 to keep logs clean
            
            # Evaluate and return MAPE on the original scale
            preds_scaled = model.predict(X_val, verbose=0)
            preds_orig = target_scaler.inverse_transform(preds_scaled)
            y_val_orig = target_scaler.inverse_transform(y_val)
            mape = mean_absolute_percentage_error(y_val_orig, preds_orig)
        
        return mape

    # --- Run the Optuna Study ---
    print("\n--- Starting Hyperparameter Optimization with Optuna ---")
    study = optuna.create_study(direction='minimize')
    # Run for 50 trials. For a quick test, use 5-10. For a serious run, use 50-100.
    study.optimize(objective, n_trials=50) 
    
    print("\n--- Optimization Complete ---")
    print(f"Best validation MAPE: {study.best_value:.6f}")
    print("Best parameters found:")
    print(study.best_params)

    # --- Train Final Model and Predict ---
    final_model = TabTransformerPredictor(best_params=study.best_params)
    final_model.fit(X, y) # Fit on ALL data with the best params
    
    predictions = final_model.predict(X_test)

    submission = pd.DataFrame({'ID': test_df.get('ID', test_df.index)})
    submission = pd.concat([submission, predictions], axis=1)
    submission.to_csv('submission.csv', index=False)
    
    print("\n💾 Final submission file 'submission.csv' saved successfully.")
    print(submission.head())

if __name__ == "__main__":
    main()

2025-07-20 04:27:50.764326: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1752985670.936111      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1752985670.991555      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


TensorFlow is using GPU: /physical_device:GPU:0
🚀 Starting Fuel Blending ML Pipeline: Stage 3 - TabTransformer with Optuna Tuning
  - Generating features...

--- Starting Hyperparameter Optimization with Optuna ---


I0000 00:00:1752985684.027865      19 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0
I0000 00:00:1752985704.194667      58 service.cc:148] XLA service 0x7dfb50002500 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1752985704.195439      58 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1752985705.749067      58 cuda_dnn.cc:529] Loaded cuDNN version 90300
I0000 00:00:1752985714.165552      58 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.



--- Optimization Complete ---
Best validation MAPE: 1.045577
Best parameters found:
{'embed_dim': 128, 'num_heads': 8, 'ff_dim': 128, 'num_blocks': 3, 'mlp_units': [128, 64], 'dropout_rate': 0.1823351772550523, 'learning_rate': 0.00928836234214192}
--- Fitting Final TabTransformerPredictor with Best Parameters ---
  - Generating features...
Epoch 1/200
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 258ms/step - loss: 1.0544
Epoch 2/200
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - loss: 0.5521
Epoch 3/200
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - loss: 0.5328
Epoch 4/200
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - loss: 0.5460
Epoch 5/200
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - loss: 0.5405
Epoch 6/200
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - loss: 0.5456
Epoch 7/200
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━

In [2]:
! pip install optuna

