In [1]:
!pip install /kaggle/input/libraries/joblib-1.4.2-py3-none-any.whl
!pip install /kaggle/input/libraries/lightgbm-4.3.0-py3-none-manylinux_2_28_x86_64.whl
!pip install /kaggle/input/libraries/numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
!pip install /kaggle/input/libraries/optuna-3.5.0-py3-none-any.whl
!pip install /kaggle/input/libraries/pandas-2.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
!pip install /kaggle/input/libraries/scikit_learn-1.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
!pip install /kaggle/input/libraries/torch-2.2.2-cp310-cp310-manylinux1_x86_64.whl
!pip install /kaggle/input/libraries/scikit_learn-1.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
!pip install /kaggle/input/libraries/rdkit-2025.3.3-cp311-cp311-manylinux_2_28_x86_64.whl

Processing /kaggle/input/libraries/joblib-1.4.2-py3-none-any.whl
Installing collected packages: joblib
  Attempting uninstall: joblib
    Found existing installation: joblib 1.5.1
    Uninstalling joblib-1.5.1:
      Successfully uninstalled joblib-1.5.1
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
preprocessing 0.1.13 requires nltk==3.2.4, but you have nltk 3.9.1 which is incompatible.
cesium 0.12.4 requires numpy<3.0,>=2.0, but you have numpy 1.26.4 which is incompatible.
imbalanced-learn 0.13.0 requires scikit-learn<2,>=1.3.2, but you have scikit-learn 1.2.2 which is incompatible.
mlxtend 0.23.4 requires scikit-learn>=1.3.1, but you have scikit-learn 1.2.2 which is incompatible.[0m[31m
[0mSuccessfully installed joblib-1.4.2
Processing /kaggle/input/libraries/lightgbm-4.3.0-py3-none-manylinux_2_28_x86_64.whl
Installing collected packag

In [2]:
# Import libraries
import pandas as pd
import numpy as np
from rdkit import Chem
from rdkit.Chem import AllChem, MACCSkeys, Descriptors, DataStructs
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
import lightgbm as lgb
import optuna
import torch
import torch.nn as nn
import torch.utils.data
import joblib


train_df = pd.read_csv('/kaggle/input/neurips-open-polymer-prediction-2025/train.csv')
test_df = pd.read_csv('/kaggle/input/neurips-open-polymer-prediction-2025/test.csv')

targets = ['Tg', 'FFV', 'Tc', 'Density', 'Rg']

def featurize_combo(smiles):
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        return np.zeros(2048 + 167)
    morgan_fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius=2, nBits=2048)
    morgan_arr = np.zeros((2048,), dtype=int)
    DataStructs.ConvertToNumpyArray(morgan_fp, morgan_arr)
    maccs_fp = MACCSkeys.GenMACCSKeys(mol)
    maccs_arr = np.zeros((167,), dtype=int)
    DataStructs.ConvertToNumpyArray(maccs_fp, maccs_arr)
    return np.concatenate([morgan_arr, maccs_arr])

def calc_extended_descriptors(smiles):
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        return np.zeros(12)
    desc = [
        Descriptors.MolWt(mol),
        Descriptors.MolLogP(mol),
        Descriptors.TPSA(mol),
        Descriptors.NumHDonors(mol),
        Descriptors.NumHAcceptors(mol),
        Descriptors.RingCount(mol),
        Descriptors.NumRotatableBonds(mol),
        Descriptors.FpDensityMorgan1(mol),
        Descriptors.FpDensityMorgan2(mol),
        Descriptors.FpDensityMorgan3(mol),
        Descriptors.HeavyAtomCount(mol),
        Descriptors.NumValenceElectrons(mol),
    ]
    return np.array(desc)

print("Featurizing train data...")
fps_train = [featurize_combo(smi) for smi in train_df['SMILES']]
desc_train = [calc_extended_descriptors(smi) for smi in train_df['SMILES']]
X_train_full = np.hstack([fps_train, desc_train])

print("Featurizing test data...")
fps_test = [featurize_combo(smi) for smi in test_df['SMILES']]
desc_test = [calc_extended_descriptors(smi) for smi in test_df['SMILES']]
X_test_full = np.hstack([fps_test, desc_test])



feature_scaler = StandardScaler()
feature_scaler.fit(X_train_full)
joblib.dump(feature_scaler, 'feature_scaler.pkl')
print("Saved global feature scaler as feature_scaler.pkl")

X_train_scaled = feature_scaler.transform(X_train_full)
X_test_scaled = feature_scaler.transform(X_test_full)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


class SimpleNN(nn.Module):
    def __init__(self, input_dim, output_dim=1, hidden_dims=[512, 128], dropout_rates=[0.3, 0.2]):
        super().__init__()
        layers = []
        prev_dim = input_dim
        for hdim, drop in zip(hidden_dims, dropout_rates):
            layers.append(nn.Linear(prev_dim, hdim))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(drop))
            prev_dim = hdim
        layers.append(nn.Linear(prev_dim, output_dim))
        self.net = nn.Sequential(*layers)

    def forward(self, x):
        return self.net(x)

def train_nn(X_train, y_train, X_val, y_val, params, epochs=30, batch_size=64):
    model = SimpleNN(X_train.shape[1], output_dim=1, hidden_dims=params['hidden_dims'], dropout_rates=params['dropout_rates']).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=params['lr'])
    loss_fn = nn.MSELoss()

    train_dataset = torch.utils.data.TensorDataset(torch.tensor(X_train, dtype=torch.float32),
                                                   torch.tensor(y_train, dtype=torch.float32).unsqueeze(1))
    val_dataset = torch.utils.data.TensorDataset(torch.tensor(X_val, dtype=torch.float32),
                                                 torch.tensor(y_val, dtype=torch.float32).unsqueeze(1))
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size)

    best_val_loss = float('inf')
    best_model_state = None
    for epoch in range(epochs):
        model.train()
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad()
            preds = model(xb)
            loss = loss_fn(preds, yb)
            loss.backward()
            optimizer.step()

        model.eval()
        val_losses = []
        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(device), yb.to(device)
                preds = model(xb)
                loss = loss_fn(preds, yb)
                val_losses.append(loss.item())
        avg_val_loss = np.mean(val_losses)
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            best_model_state = model.state_dict()
    model.load_state_dict(best_model_state)
    return model


def tune_lgbm(X, y):
    def objective(trial):
        params = {
            'objective': 'regression',
            'metric': 'rmse',
            'boosting_type': 'gbdt',
            'learning_rate': trial.suggest_float('learning_rate', 0.005, 0.05, log=True),
            'num_leaves': trial.suggest_int('num_leaves', 31, 256),
            'feature_fraction': trial.suggest_float('feature_fraction', 0.5, 1.0),
            'bagging_fraction': trial.suggest_float('bagging_fraction', 0.5, 1.0),
            'bagging_freq': trial.suggest_int('bagging_freq', 1, 10),
            'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 10, 100),
            'seed': 42,
            'verbose': -1,
        }
        cv = KFold(n_splits=3, shuffle=True, random_state=42)
        rmses = []
        for train_idx, val_idx in cv.split(X):
            X_tr, X_val = X[train_idx], X[val_idx]
            y_tr, y_val = y[train_idx], y[val_idx]
            train_data = lgb.Dataset(X_tr, label=y_tr)
            val_data = lgb.Dataset(X_val, label=y_val)
            model = lgb.train(
                params,
                train_data,
                valid_sets=[val_data],
                num_boost_round=1000,
                callbacks=[
                    lgb.early_stopping(50),
                    lgb.log_evaluation(0)
                ],
            )
            preds = model.predict(X_val, num_iteration=model.best_iteration)
            rmse = np.sqrt(mean_squared_error(y_val, preds))
            rmses.append(rmse)
        return np.mean(rmses)

    study = optuna.create_study(direction='minimize')
    study.optimize(objective, n_trials=20, show_progress_bar=True)
    return study.best_params

def tune_nn_params(X_train, y_train, X_val, y_val):
    def objective(trial):
        params = {
            'lr': trial.suggest_float('lr', 1e-4, 1e-2, log=True),
            'hidden_dims': [trial.suggest_int('hd1', 256, 512), trial.suggest_int('hd2', 64, 256)],
            'dropout_rates': [trial.suggest_float('drop1', 0.1, 0.5), trial.suggest_float('drop2', 0.1, 0.5)],
        }
        model = train_nn(X_train, y_train, X_val, y_val, params, epochs=10)
        model.eval()
        with torch.no_grad():
            preds = model(torch.tensor(X_val, dtype=torch.float32).to(device)).cpu().numpy().reshape(-1)
        rmse = np.sqrt(mean_squared_error(y_val, preds))
        return rmse

    study = optuna.create_study(direction='minimize')
    study.optimize(objective, n_trials=15, show_progress_bar=True)
    return study.best_params


models = {}
nn_models = {}
scalers = {}

print("\n--- Training models per target ---")
for target in targets:
    print(f"\nProcessing target: {target}")
    mask = train_df[target].notnull()
    X_target = X_train_scaled[mask.values]
    y_target = train_df.loc[mask, target].values


    print("Tuning LightGBM...")
    best_params = tune_lgbm(X_target, y_target)
    best_params.update({'objective': 'regression', 'metric': 'rmse', 'verbose': -1, 'seed': 42})
    train_data = lgb.Dataset(X_target, label=y_target)
    model_lgb = lgb.train(best_params, train_data, num_boost_round=1000)
    models[target] = model_lgb

    print("Tuning Neural Network...")
    X_tr_nn, X_val_nn, y_tr_nn, y_val_nn = train_test_split(X_target, y_target, test_size=0.2, random_state=42)
    best_nn_params = tune_nn_params(X_tr_nn, y_tr_nn, X_val_nn, y_val_nn)
    print(f"Training NN with best params: {best_nn_params}")
    model_nn = train_nn(
        X_tr_nn, y_tr_nn, X_val_nn, y_val_nn,
        params={
            'lr': best_nn_params['lr'],
            'hidden_dims': [best_nn_params['hd1'], best_nn_params['hd2']],
            'dropout_rates': [best_nn_params['drop1'], best_nn_params['drop2']]
        },
        epochs=20
    )
    nn_models[target] = model_nn


print("\n--- Preparing stacking training data ---")
train_preds = pd.DataFrame(index=train_df.index, columns=targets)

for target in targets:
    mask = train_df[target].notnull()
    X_target = X_train_scaled[mask.values]
    model_lgb = models[target]
    model_nn = nn_models[target]

    preds_lgb = np.full(train_df.shape[0], np.nan)
    preds_lgb[mask.values] = model_lgb.predict(X_target)

    model_nn.eval()
    with torch.no_grad():
        preds_nn = np.full(train_df.shape[0], np.nan)
        preds_nn[mask.values] = model_nn(torch.tensor(X_target, dtype=torch.float32).to(device)).cpu().numpy().reshape(-1)

    avg_preds = np.nanmean(np.vstack([preds_lgb, preds_nn]), axis=0)
    train_preds[target] = avg_preds


train_preds_filled = train_preds.fillna(train_df[targets].median())


stack_model = LinearRegression()
stack_model.fit(train_preds_filled.values, train_df[targets].fillna(train_df[targets].median()).values)


final_train_preds = stack_model.predict(train_preds_filled.values)
true_train_targets = train_df[targets].fillna(train_df[targets].median()).values

rmses = np.sqrt(np.mean((true_train_targets - final_train_preds) ** 2, axis=0))
r2s = [r2_score(true_train_targets[:, i], final_train_preds[:, i]) for i in range(len(targets))]

print("\n=== Final Model Performance on Training Data ===")
for i, target in enumerate(targets):
    print(f"{target}: RMSE = {rmses[i]:.4f}, R2 = {r2s[i]:.4f}")
print(f"Overall RMSE (mean across targets): {rmses.mean():.4f}")
print(f"Overall R2 (mean across targets): {np.mean(r2s):.4f}")


class FullStackedModel:
    def __init__(self, models, nn_models, stack_model, feature_scaler, targets):
        self.models = models
        self.nn_models = nn_models
        self.stack_model = stack_model
        self.feature_scaler = feature_scaler
        self.targets = targets
        self.device = device

    def predict(self, X_raw):
        X_scaled = self.feature_scaler.transform(X_raw)
        preds_list = []
        for target in self.targets:
            model = self.models[target]
            model_nn = self.nn_models[target]

            lgb_pred = model.predict(X_scaled)

            model_nn.eval()
            with torch.no_grad():
                nn_pred = model_nn(torch.tensor(X_scaled, dtype=torch.float32).to(self.device)).cpu().numpy().reshape(-1)

            preds_avg = (lgb_pred + nn_pred) / 2
            preds_list.append(preds_avg)

        preds_stack_input = np.vstack(preds_list).T  
        final_preds = self.stack_model.predict(preds_stack_input)
        return final_preds


full_model = FullStackedModel(models, nn_models, stack_model, feature_scaler, targets)


joblib.dump(full_model, 'full_stacked_model.pkl')
print("Saved full combined model as full_stacked_model.pkl")

def create_submission(test_df, model, scaler, submission_path='submission.csv'):
    print("Featurizing test data for submission...")
    fps_test = [featurize_combo(smi) for smi in test_df['SMILES']]
    desc_test = [calc_extended_descriptors(smi) for smi in test_df['SMILES']]
    X_test_full = np.hstack([fps_test, desc_test])

    print("Predicting test data targets...")
    preds = model.predict(X_test_full) 

    submission_df = pd.DataFrame({
        'id': test_df['id'],
        'Tg': preds[:, 0],
        'FFV': preds[:, 1],
        'Tc': preds[:, 2],
        'Density': preds[:, 3],
        'Rg': preds[:, 4]
    })

    submission_df.to_csv(submission_path, index=False)
    print(f"Submission file saved as {submission_path}")


create_submission(test_df, full_model, feature_scaler)

Featurizing train data...




Featurizing test data...


[I 2025-07-22 19:02:23,377] A new study created in memory with name: no-name-de532dc3-8047-42c0-8c26-f5f15b64c1ce


Saved global feature scaler as feature_scaler.pkl

--- Training models per target ---

Processing target: Tg
Tuning LightGBM...


  0%|          | 0/20 [00:00<?, ?it/s]

Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[910]	valid_0's rmse: 68.5708
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[964]	valid_0's rmse: 69.9114
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[535]	valid_0's rmse: 65.1928
[I 2025-07-22 19:02:24,228] Trial 0 finished with value: 67.89168468945515 and parameters: {'learning_rate': 0.009227527128136353, 'num_leaves': 203, 'feature_fraction': 0.8332987858629131, 'bagging_fraction': 0.6540157228422425, 'bagging_freq': 7, 'min_data_in_leaf': 39}. Best is trial 0 with value: 67.89168468945515.
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1000]	valid_0's rmse: 75.7545
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1000]	valid_0's rmse: 80.304
Training 

[I 2025-07-22 19:02:41,532] A new study created in memory with name: no-name-b7b7b626-a19d-455a-aa41-66b4149dd2f6


Tuning Neural Network...


  0%|          | 0/15 [00:00<?, ?it/s]

[I 2025-07-22 19:02:48,201] Trial 0 finished with value: 90.47791308988248 and parameters: {'lr': 0.0011041481508456755, 'hd1': 421, 'hd2': 220, 'drop1': 0.2543134988442336, 'drop2': 0.13425625100722827}. Best is trial 0 with value: 90.47791308988248.
[I 2025-07-22 19:02:48,800] Trial 1 finished with value: 126.77093747189814 and parameters: {'lr': 0.0001164620736151061, 'hd1': 410, 'hd2': 152, 'drop1': 0.12681188326560938, 'drop2': 0.3619163472068848}. Best is trial 0 with value: 90.47791308988248.
[I 2025-07-22 19:02:49,394] Trial 2 finished with value: 88.10486965099126 and parameters: {'lr': 0.0021648840552993876, 'hd1': 398, 'hd2': 151, 'drop1': 0.23839631850220688, 'drop2': 0.37982636036025763}. Best is trial 2 with value: 88.10486965099126.
[I 2025-07-22 19:02:50,026] Trial 3 finished with value: 83.94834165994268 and parameters: {'lr': 0.0026485423610182906, 'hd1': 441, 'hd2': 144, 'drop1': 0.37657090696344964, 'drop2': 0.13161703031969135}. Best is trial 3 with value: 83.94834

[I 2025-07-22 19:02:56,922] A new study created in memory with name: no-name-93725d55-2d57-4a83-a76c-a42b5dacbf22



Processing target: FFV
Tuning LightGBM...


  0%|          | 0/20 [00:00<?, ?it/s]

Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1000]	valid_0's rmse: 0.0146559
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1000]	valid_0's rmse: 0.0122508
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1000]	valid_0's rmse: 0.0142165
[I 2025-07-22 19:03:22,875] Trial 0 finished with value: 0.013707715602824216 and parameters: {'learning_rate': 0.006441386685619307, 'num_leaves': 63, 'feature_fraction': 0.7726839870584742, 'bagging_fraction': 0.9396162923991245, 'bagging_freq': 5, 'min_data_in_leaf': 30}. Best is trial 0 with value: 0.013707715602824216.
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[999]	valid_0's rmse: 0.0140476
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is

[I 2025-07-22 19:13:06,939] A new study created in memory with name: no-name-c70232fb-50f7-4aec-a98e-6b4a7092fc38


Tuning Neural Network...


  0%|          | 0/15 [00:00<?, ?it/s]

[I 2025-07-22 19:13:12,278] Trial 0 finished with value: 0.021271750502386173 and parameters: {'lr': 0.001361823234493184, 'hd1': 257, 'hd2': 84, 'drop1': 0.20586327882514072, 'drop2': 0.42017826844126505}. Best is trial 0 with value: 0.021271750502386173.
[I 2025-07-22 19:13:19,417] Trial 1 finished with value: 0.022260572886745874 and parameters: {'lr': 0.005560376157456649, 'hd1': 331, 'hd2': 188, 'drop1': 0.36543100346959334, 'drop2': 0.2694116750351472}. Best is trial 0 with value: 0.021271750502386173.
[I 2025-07-22 19:13:26,739] Trial 2 finished with value: 0.029418289133150897 and parameters: {'lr': 0.0070900871924759155, 'hd1': 387, 'hd2': 82, 'drop1': 0.4791444277010979, 'drop2': 0.4554595907578075}. Best is trial 0 with value: 0.021271750502386173.
[I 2025-07-22 19:13:35,488] Trial 3 finished with value: 0.026066149575540782 and parameters: {'lr': 0.0015359355377118614, 'hd1': 471, 'hd2': 245, 'drop1': 0.29151665058342724, 'drop2': 0.339509128272815}. Best is trial 0 with va

[I 2025-07-22 19:14:57,812] A new study created in memory with name: no-name-cbb5c5aa-67ef-4977-abb4-97fbd1ffa71a



Processing target: Tc
Tuning LightGBM...


  0%|          | 0/20 [00:00<?, ?it/s]

Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[990]	valid_0's rmse: 0.0520005
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[563]	valid_0's rmse: 0.0453452
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1000]	valid_0's rmse: 0.0448724
[I 2025-07-22 19:14:58,500] Trial 0 finished with value: 0.047406052279771606 and parameters: {'learning_rate': 0.007875302017071735, 'num_leaves': 255, 'feature_fraction': 0.8365798913733791, 'bagging_fraction': 0.7392569579643302, 'bagging_freq': 9, 'min_data_in_leaf': 94}. Best is trial 0 with value: 0.047406052279771606.
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[549]	valid_0's rmse: 0.0423454
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[297]	valid_0's rmse: 0.0376266
Traini

[I 2025-07-22 19:15:21,091] A new study created in memory with name: no-name-a1df860f-46ba-4fcb-bf3c-886aaa1b1b00


Tuning Neural Network...


  0%|          | 0/15 [00:00<?, ?it/s]

[I 2025-07-22 19:15:21,798] Trial 0 finished with value: 0.06397512125318662 and parameters: {'lr': 0.0005852448143254734, 'hd1': 310, 'hd2': 173, 'drop1': 0.10272037893509466, 'drop2': 0.22057604636208317}. Best is trial 0 with value: 0.06397512125318662.
[I 2025-07-22 19:15:22,764] Trial 1 finished with value: 0.06698062204845148 and parameters: {'lr': 0.0023458712952065097, 'hd1': 492, 'hd2': 188, 'drop1': 0.1844297372099363, 'drop2': 0.41215746173047485}. Best is trial 0 with value: 0.06397512125318662.
[I 2025-07-22 19:15:23,481] Trial 2 finished with value: 0.05881497780772329 and parameters: {'lr': 0.0003923326487259841, 'hd1': 337, 'hd2': 105, 'drop1': 0.14043989351583308, 'drop2': 0.1122652987654505}. Best is trial 2 with value: 0.05881497780772329.
[I 2025-07-22 19:15:24,261] Trial 3 finished with value: 0.06610602795586269 and parameters: {'lr': 0.00013228218196723276, 'hd1': 389, 'hd2': 65, 'drop1': 0.19480340147173258, 'drop2': 0.36159053990114376}. Best is trial 2 with va

[I 2025-07-22 19:15:34,452] A new study created in memory with name: no-name-1fd4974c-3e89-46e7-b07b-46aa9d50ba85



Processing target: Density
Tuning LightGBM...


  0%|          | 0/20 [00:00<?, ?it/s]

Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1000]	valid_0's rmse: 0.086451
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[592]	valid_0's rmse: 0.0700861
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[544]	valid_0's rmse: 0.0622267
[I 2025-07-22 19:15:35,546] Trial 0 finished with value: 0.0729212568915973 and parameters: {'learning_rate': 0.012431570897195949, 'num_leaves': 128, 'feature_fraction': 0.6895499319926279, 'bagging_fraction': 0.5803157983445958, 'bagging_freq': 4, 'min_data_in_leaf': 26}. Best is trial 0 with value: 0.0729212568915973.
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[996]	valid_0's rmse: 0.10214
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[712]	valid_0's rmse: 0.0791788
Training unti

[I 2025-07-22 19:16:02,915] A new study created in memory with name: no-name-f4b44d04-ec01-4a77-a52e-a84c2bf10069


Tuning Neural Network...


  0%|          | 0/15 [00:00<?, ?it/s]

[I 2025-07-22 19:16:03,598] Trial 0 finished with value: 0.4552287804983046 and parameters: {'lr': 0.005221456736963104, 'hd1': 390, 'hd2': 148, 'drop1': 0.3867643715777008, 'drop2': 0.33978933159303853}. Best is trial 0 with value: 0.4552287804983046.
[I 2025-07-22 19:16:04,320] Trial 1 finished with value: 0.32338129485599226 and parameters: {'lr': 0.0012063029727427874, 'hd1': 432, 'hd2': 200, 'drop1': 0.4491211708693734, 'drop2': 0.32468437192949073}. Best is trial 1 with value: 0.32338129485599226.
[I 2025-07-22 19:16:04,945] Trial 2 finished with value: 0.2553934233596681 and parameters: {'lr': 0.0010473536435877614, 'hd1': 341, 'hd2': 149, 'drop1': 0.2735996286733482, 'drop2': 0.29488819622747986}. Best is trial 2 with value: 0.2553934233596681.
[I 2025-07-22 19:16:05,713] Trial 3 finished with value: 0.3319414016372996 and parameters: {'lr': 0.0024040049356716165, 'hd1': 456, 'hd2': 238, 'drop1': 0.3167426531503905, 'drop2': 0.26536325616539475}. Best is trial 2 with value: 0.2

[I 2025-07-22 19:16:14,682] A new study created in memory with name: no-name-4006652d-000d-4ddf-81b6-810235d41948



Processing target: Rg
Tuning LightGBM...


  0%|          | 0/20 [00:00<?, ?it/s]

Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[448]	valid_0's rmse: 2.83658
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[399]	valid_0's rmse: 2.72572
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[332]	valid_0's rmse: 2.87571
[I 2025-07-22 19:16:15,116] Trial 0 finished with value: 2.8126712407657464 and parameters: {'learning_rate': 0.027425410354416813, 'num_leaves': 78, 'feature_fraction': 0.6652489638811658, 'bagging_fraction': 0.6569890429590748, 'bagging_freq': 4, 'min_data_in_leaf': 70}. Best is trial 0 with value: 2.8126712407657464.
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[588]	valid_0's rmse: 2.75772
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[505]	valid_0's rmse: 2.60645
Training until validation scores don't improve f

[I 2025-07-22 19:16:40,808] A new study created in memory with name: no-name-02745e8f-ff74-479c-bfd3-e762c59ee4e8


Tuning Neural Network...


  0%|          | 0/15 [00:00<?, ?it/s]

[I 2025-07-22 19:16:41,516] Trial 0 finished with value: 5.954339872851981 and parameters: {'lr': 0.004535874242635379, 'hd1': 431, 'hd2': 118, 'drop1': 0.20215299916543522, 'drop2': 0.19671013304498974}. Best is trial 0 with value: 5.954339872851981.
[I 2025-07-22 19:16:42,080] Trial 1 finished with value: 5.734598499459601 and parameters: {'lr': 0.005973819670098687, 'hd1': 286, 'hd2': 219, 'drop1': 0.4422847553525948, 'drop2': 0.23347861549102264}. Best is trial 1 with value: 5.734598499459601.
[I 2025-07-22 19:16:42,640] Trial 2 finished with value: 5.799972708742293 and parameters: {'lr': 0.0005924538346884702, 'hd1': 278, 'hd2': 148, 'drop1': 0.4643820687187411, 'drop2': 0.4527816106467245}. Best is trial 1 with value: 5.734598499459601.
[I 2025-07-22 19:16:43,421] Trial 3 finished with value: 6.0582471875370985 and parameters: {'lr': 0.009724928426603434, 'hd1': 506, 'hd2': 146, 'drop1': 0.4213145441532521, 'drop2': 0.48247486488785685}. Best is trial 1 with value: 5.73459849945

  avg_preds = np.nanmean(np.vstack([preds_lgb, preds_nn]), axis=0)



=== Final Model Performance on Training Data ===
Tg: RMSE = 6.4099, R2 = 0.9500
FFV: RMSE = 0.0074, R2 = 0.9298
Tc: RMSE = 0.0062, R2 = 0.9497
Density: RMSE = 0.0121, R2 = 0.9156
Rg: RMSE = 0.4542, R2 = 0.8832
Overall RMSE (mean across targets): 1.3780
Overall R2 (mean across targets): 0.9257
Saved full combined model as full_stacked_model.pkl
Featurizing test data for submission...
Predicting test data targets...
Submission file saved as submission.csv


