In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.linear_model import RidgeClassifier,Ridge,LogisticRegression
from sklearn.ensemble import RandomForestClassifier,RandomForestRegressor
import lightgbm as lgb
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import xgboost as xgb
from catboost import CatBoostClassifier,CatBoostRegressor
from category_encoders import TargetEncoder
from lightgbm import LGBMClassifier
import os
import joblib
from sklearn.decomposition import PCA
from sklearn.inspection import permutation_importance

In [2]:
import os
import joblib
import numpy as np
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler

In [3]:
OUTDIR = "model_outputs"
os.makedirs(OUTDIR, exist_ok=True)

In [4]:
X = pd.read_csv('data/X_train.csv',index_col='ROW_ID')
X_test_final = pd.read_csv('data/X_test.csv',index_col='ROW_ID')
y = pd.read_csv('data/y_train.csv',index_col='ROW_ID')
y_bin = (y > 0).astype(int) #binariser l outcome pour faire de la classification et pas regression
sample_submission = pd.read_csv('data/sample_submission.csv',index_col='ROW_ID')

In [5]:
embed_dim=6
max_epochs=50
batch_size=128
lr=1e-3
patience=5
random_state=0

In [6]:

ret_cols = [f"RET_{i}" for i in range(1, 21)]
scaler_ae = StandardScaler()
X_ret = scaler_ae.fit_transform(X[ret_cols].fillna(0).astype(np.float32))
X_test_ret = scaler_ae.transform(X_test_final[ret_cols].fillna(0).astype(np.float32))

# Split train / val
n = X_ret.shape[0]
rng = np.random.default_rng(random_state)
idx = np.arange(n)
rng.shuffle(idx)
split = int(n * 0.9)
train_idx, val_idx = idx[:split], idx[split:]
X_train_arr, X_val_arr = X_ret[train_idx], X_ret[val_idx]

# Torch tensors
X_train_arr = np.ascontiguousarray(X_train_arr.astype(np.float32))
X_val_arr   = np.ascontiguousarray(X_val_arr.astype(np.float32))
X_train_tensor = torch.from_numpy(X_train_arr)   # stays on CPU
X_val_tensor   = torch.from_numpy(X_val_arr)

In [7]:
train_ds = TensorDataset(X_train_tensor, X_train_tensor)
val_ds   = TensorDataset(X_val_tensor, X_val_tensor)

In [8]:

train_loader = DataLoader(train_ds, batch_size=min(batch_size, len(train_idx)), shuffle=True)
val_loader = DataLoader(val_ds, batch_size=min(batch_size, len(val_idx)), shuffle=False)

In [9]:
class AE(nn.Module):
    def __init__(self, input_dim, embed_dim):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 32),
            nn.ReLU(),
            nn.Linear(32, max(16, embed_dim * 2)),
            nn.ReLU(),
            nn.Linear(max(16, embed_dim * 2), embed_dim)
        )
        self.decoder = nn.Sequential(
            nn.Linear(embed_dim, max(16, embed_dim * 2)),
            nn.ReLU(),
            nn.Linear(max(16, embed_dim * 2), 32),
            nn.ReLU(),
            nn.Linear(32, input_dim)
        )

    def forward(self, x):
        return self.decoder(self.encoder(x))

In [10]:
input_dim = X_ret.shape[1]

In [11]:
device = torch.device("mps")
model = AE(input_dim, embed_dim).to(device)

In [12]:
opt = torch.optim.Adam(model.parameters(), lr=lr)

In [13]:
loss_fn = nn.MSELoss()

In [14]:
best_val = np.inf
patience_counter = 0

In [15]:
for epoch in range(max_epochs):
    model.train()
    total_loss = 0.0
    for xb, _ in train_loader:
        xb = xb.to(device)
        opt.zero_grad()
        recon = model(xb)
        loss = loss_fn(recon, xb)
        loss.backward()
        opt.step()
        total_loss += loss.item()

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for xb, _ in val_loader:
            xb = xb.to(device)
            recon = model(xb)
            val_loss += loss_fn(recon, xb).item()

    val_loss /= len(val_loader)
    if val_loss < best_val - 1e-6:
        best_val = val_loss
        best_state = {k: v.cpu() for k, v in model.state_dict().items()}
        patience_counter = 0
    else:
        patience_counter += 1

    if epoch % 5 == 0 or patience_counter == patience:
        print(f"Epoch {epoch}: val_loss={val_loss:.6f} patience={patience_counter}")

    if patience_counter >= patience:
        print("Early stopping reached.")
        break

: 

In [None]:
model.load_state_dict(best_state)
model.eval()

# Encodage
def encode(X_input):
    X_t = torch.tensor(X_input, dtype=torch.float32).to(device)
    with torch.no_grad():
        out = model.encoder(X_t).cpu().numpy()
    return out

emb_train = encode(X_ret)
emb_test = encode(X_test_ret)

# Sauvegarde
joblib.dump(scaler_ae, os.path.join(OUTDIR, "ae_scaler.joblib"))
torch.save(model.state_dict(), os.path.join(OUTDIR, "ae_state_dict.pt"))

print("Autoencoder OK. Embeddings:", emb_train.shape, emb_test.shape)


Autoencoder OK. Embeddings: (180245, 6) (7735, 6)


In [None]:
for i in range(emb_train.shape[1]):
    X[f"RET_EMB_{i+1}"] = emb_train[:, i]
    X_test_final[f"RET_EMB_{i+1}"] = emb_test[:, i]

In [None]:

RET_features = [f'RET_{i}' for i in range(1, 20)]
SIGNED_VOLUME_features = [f'SIGNED_VOLUME_{i}' for i in range(1, 20)]
TURNOVER_features = ['AVG_DAILY_TURNOVER']

for i in range(2, 21):
    X[f'AVERAGE_PERF_{i}'] = X[RET_features[:i]].mean(1)
    X[f'ALLOCATIONS_AVERAGE_PERF_{i}'] = X.groupby('TS')[f'AVERAGE_PERF_{i}'].transform('mean')
    X_test_final[f'AVERAGE_PERF_{i}'] = X_test_final[RET_features[:i]].mean(1)
    X_test_final[f'ALLOCATIONS_AVERAGE_PERF_{i}'] = X_test_final.groupby('TS')[f'AVERAGE_PERF_{i}'].transform('mean')


In [None]:
X["RET_VOLATILITY_20"] = X[RET_features].std(axis=1)
X_test_final["RET_VOLATILITY_20"] = X_test_final[RET_features].std(axis=1)

X["RET_MOMENTUM"] = X["RET_1"] - X[RET_features].mean(axis=1)
X_test_final["RET_MOMENTUM"] = X_test_final["RET_1"] - X_test_final[RET_features].mean(axis=1)

# Ratio perf/vol (Sharpe-like)
X["RET_SHARPE"] = X["RET_MOMENTUM"] / (X["RET_VOLATILITY_20"] + 1e-6)
X_test_final["RET_SHARPE"] = X_test_final["RET_MOMENTUM"] / (X_test_final["RET_VOLATILITY_20"] + 1e-6)

# Liquidity volatility
X["SIGNED_VOLUME_VOL"] = X[SIGNED_VOLUME_features].std(axis=1)
X_test_final["SIGNED_VOLUME_VOL"] = X_test_final[SIGNED_VOLUME_features].std(axis=1)

X["TS_num"] = X["TS"].str.extract("(\d+)").astype(int)
X_test_final["TS_num"] = X_test_final["TS"].str.extract("(\d+)").astype(int)

X["RET_TREND_5"] = X["RET_20"] - X["RET_15"]
X["VOL_TREND_5"] = X["SIGNED_VOLUME_20"] - X["SIGNED_VOLUME_15"]
X_test_final["RET_TREND_5"] = X_test_final["RET_20"] - X_test_final["RET_15"]
X_test_final["VOL_TREND_5"] = X_test_final["SIGNED_VOLUME_20"] - X_test_final["SIGNED_VOLUME_15"]

# Décalage du dernier retour
X["RET_LAG1"] = X["RET_1"]
X["RET_DIFF1"] = X["RET_1"] - X["RET_2"]
X_test_final["RET_LAG1"] = X_test_final["RET_1"]
X_test_final["RET_DIFF1"] = X_test_final["RET_1"] - X_test_final["RET_2"]
# Skewness et kurtosis sur les 20 derniers jours
X["RET_SKEW_20"] = X[RET_features].skew(axis=1)
X["RET_KURT_20"] = X[RET_features].kurt(axis=1)
X_test_final["RET_SKEW_20"] = X_test_final[RET_features].skew(axis=1)
X_test_final["RET_KURT_20"] = X_test_final[RET_features].kurt(axis=1)
# Ratio volume/performance (liquidité relative)
X["VOL_PERF_RATIO"] = X[SIGNED_VOLUME_features].mean(axis=1) / (X[RET_features].std(axis=1) + 1e-6)

# Moyenne pondérée des performances récentes (momentum lissé)
weights = np.linspace(1, 2, 19)
X["RET_WEIGHTED_MOMENTUM"] = (X[RET_features].values * weights).sum(axis=1) / weights.sum()

X_test_final["VOL_PERF_RATIO"] = X_test_final[SIGNED_VOLUME_features].mean(axis=1) / (X_test_final[RET_features].std(axis=1) + 1e-6)

# Moyenne pondérée des performances récentes (momentum lissé)
weights = np.linspace(1, 2, 19)
X_test_final["RET_WEIGHTED_MOMENTUM"] = (X_test_final[RET_features].values * weights).sum(axis=1) / weights.sum()


  X["TS_num"] = X["TS"].str.extract("(\d+)").astype(int)
  X_test_final["TS_num"] = X_test_final["TS"].str.extract("(\d+)").astype(int)


In [None]:
from sklearn.linear_model import LinearRegression
lags = np.arange(1, 21)
model = LinearRegression()

trend = []
for i in range(len(X)):
    y1 = X.loc[i, [f'RET_{j}' for j in range(1, 21)]].values
    model.fit(lags.reshape(-1,1), y1)
    trend.append(model.coef_[0])
X["RET_TREND_SLOPE_20"] = trend

X["RET_PAST_SKEW"] = X[[f'RET_{i}' for i in range(1, 6)]].mean(axis=1) - \
                     X[[f'RET_{i}' for i in range(6, 21)]].mean(axis=1)
X["RET_SHORT_LONG_RATIO"] = (
    X[[f'RET_{i}' for i in range(1, 5)]].mean(axis=1) /
    (X[[f'RET_{i}' for i in range(5, 21)]].mean(axis=1) + 1e-6)
)
rets = X[[f'RET_{i}' for i in range(1, 21)]]
X["RET_ASYM_VOL"] = rets.clip(lower=0).std(axis=1) - rets.clip(upper=0).std(axis=1)

X["rank_perf_20"] = X.groupby("TS")["AVERAGE_PERF_20"].rank(pct=True)


In [None]:
alloc_col = "ALLOCATION"
ret_cols = [f'RET_{i}' for i in range(1, 21)]
def compute_time_features_for_allocation(df_alloc):
    mean_rets = df_alloc[ret_cols].mean()

    x = np.arange(1, 21).reshape(-1, 1)
    model = LinearRegression().fit(x, mean_rets.values)
    slope = model.coef_[0]

    short_mean = mean_rets[:5].mean()
    long_mean = mean_rets[5:].mean()
    short_long_ratio = short_mean / (long_mean + 1e-6)

    autocorr = np.corrcoef(mean_rets[:-1], mean_rets[1:])[0, 1]

    fft_vals = np.fft.rfft(mean_rets - mean_rets.mean())
    spectral_energy = np.sum(np.abs(fft_vals)**2)

    # 5. Volatilité et asymétrie
    volatility = mean_rets.std()
    skew = ((mean_rets - mean_rets.mean())**3).mean() / (volatility**3 + 1e-6)

    return pd.Series({
        "ALLOC_TREND_SLOPE": slope,
        "ALLOC_SHORT_LONG_RATIO": short_long_ratio,
        "ALLOC_AUTOCORR": autocorr,
        "ALLOC_SPECTRAL_ENERGY": spectral_energy,
        "ALLOC_VOLATILITY": volatility,
        "ALLOC_SKEW": skew
    })

In [None]:
features = (
    RET_features
    + SIGNED_VOLUME_features
    + TURNOVER_features
    + [f'AVERAGE_PERF_{i}' for i in range(2, 21)]
    + [f'ALLOCATIONS_AVERAGE_PERF_{i}' for i in range(2, 21)]
    + ["RET_VOLATILITY_20", "RET_MOMENTUM", "RET_SHARPE", "SIGNED_VOLUME_VOL","VOL_TREND_5","RET_TREND_5"]
    + ["alloc_enc"]
    +["rank_perf_20"]
    +["VOL_PERF_RATIO","RET_KURT_20","RET_SKEW_20","RET_DIFF1","RET_LAG1"]
    #+[ "ALLOC_TREND_SLOPE","ALLOC_SHORT_LONG_RATIO","ALLOC_AUTOCORR","ALLOC_SPECTRAL_ENERGY","ALLOC_VOLATILITY","ALLOC_SKEW"]
    +["RET_TREND_SLOPE_20","RET_PAST_SKEW","RET_SHORT_LONG_RATIO","RET_ASYM_VOL"]
    +[f"RET_EMB_{i+1}" for i in range(emb_train.shape[1])]
)

pr RF

In [None]:
unique_dates = np.sort(X['TS'].unique())
n = len(unique_dates)
train_dates = unique_dates[: int(0.8 * n)]   # 80% train
test_dates = unique_dates[int(0.8 * n):]     # 20% test

train_idx = X['TS'].isin(train_dates)
test_idx = X['TS'].isin(test_dates)

X_train, y_train_bin, y_train_cont = X.loc[train_idx, :], y_bin.loc[train_idx], y.loc[train_idx]
X_test, y_test_bin, y_test_cont = X.loc[test_idx, :], y_bin.loc[test_idx], y.loc[test_idx]

In [None]:
enc = TargetEncoder()
X_train["alloc_enc"] = enc.fit_transform(X_train["ALLOCATION"], y_train_bin)
X_test["alloc_enc"] = enc.transform(X_test["ALLOCATION"], y_test_bin)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train[features].fillna(0))
X_test_scaled = scaler.transform(X_test[features].fillna(0))

In [None]:
results_classif = {}

name, model="RidgeClassifier",RidgeClassifier(alpha=1e-2, fit_intercept=False)
model.fit(X_train_scaled, y_train_bin)
y_pred = model.predict(X_test_scaled)


# Évaluation
acc = accuracy_score(y_test_bin, y_pred)
results_classif[name] = acc
print(f"✅ Test Accuracy ({name}): {acc:.4f}")

# Importance des features
print("\n🔍 Top 10 feature importances:")

if hasattr(model, "feature_importances_"):
    importances = model.feature_importances_
elif hasattr(model, "coef_"):
    importances = np.abs(model.coef_.ravel())  # abs pour Ridge

importance_df = pd.DataFrame({
    "feature": features,
    "importance": importances
}).sort_values("importance", ascending=False)
top_half = importance_df.iloc[:importance_df.shape[0] // 2]["feature"].tolist()
best_ridge = top_half




--- Permutation importance fold 1/5 ---


  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret 


--- Permutation importance fold 2/5 ---


  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret 


--- Permutation importance fold 3/5 ---


  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret 


--- Permutation importance fold 4/5 ---


  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret 


--- Permutation importance fold 5/5 ---


  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret 

In [None]:
'''''
X_full = X[features_pruned].fillna(0)
X_test_full = X_test_final[features_pruned].fillna(0)
y_full = (y > 0).astype(int).values.ravel()

# encode alloc on full set
if "alloc_enc" in features_pruned:
    enc = TargetEncoder()
    X_full["alloc_enc"] = enc.fit_transform(X["ALLOCATION"], y_full)
    X_test_full["alloc_enc"] = enc.transform(X_test_final["ALLOCATION"])

preds_test = {}
for name, model in base_models.items():
    if name == "Ridge":
        scaler = StandardScaler()
        X_full_s = scaler.fit_transform(X_full)
        X_test_s = scaler.transform(X_test_full)
        model.fit(X_full_s, y_full)
        preds_test[name] = model.decision_function(X_test_s)
    else:
        model.fit(X_full, y_full)
        preds_test[name] = model.predict_proba(X_test_full)[:, 1]

# meta predict on test
test_meta_df = pd.DataFrame(preds_test)
meta_test_proba = meta.predict_proba(test_meta_df)[:, 1]
meta_test_pred = (meta_test_proba > best_thresh).astype(int)

# save final submission
sub = pd.DataFrame(meta_test_pred, index=sample_submission.index, columns=["target"])
sub.to_csv(os.path.join(OUTDIR, "submission_meta_stack.csv"))
print("Saved stacked submission to:", os.path.join(OUTDIR, "submission_meta_stack.csv"))
'''''

'\'\'\nX_full = X[features_pruned].fillna(0)\nX_test_full = X_test_final[features_pruned].fillna(0)\ny_full = (y > 0).astype(int).values.ravel()\n\n# encode alloc on full set\nif "alloc_enc" in features_pruned:\n    enc = TargetEncoder()\n    X_full["alloc_enc"] = enc.fit_transform(X["ALLOCATION"], y_full)\n    X_test_full["alloc_enc"] = enc.transform(X_test_final["ALLOCATION"])\n\npreds_test = {}\nfor name, model in base_models.items():\n    if name == "Ridge":\n        scaler = StandardScaler()\n        X_full_s = scaler.fit_transform(X_full)\n        X_test_s = scaler.transform(X_test_full)\n        model.fit(X_full_s, y_full)\n        preds_test[name] = model.decision_function(X_test_s)\n    else:\n        model.fit(X_full, y_full)\n        preds_test[name] = model.predict_proba(X_test_full)[:, 1]\n\n# meta predict on test\ntest_meta_df = pd.DataFrame(preds_test)\nmeta_test_proba = meta.predict_proba(test_meta_df)[:, 1]\nmeta_test_pred = (meta_test_proba > best_thresh).astype(int)\