In [None]:
import os, time, warnings, random
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd

# Reproducibility
RANDOM_SEED = 42
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
)
from sklearn.model_selection import StratifiedKFold, GridSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from imblearn.over_sampling import SMOTE

from xgboost import XGBClassifier

import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env

import tensorflow as tf
tf.random.set_seed(RANDOM_SEED)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, LSTM, Bidirectional, Flatten, Input
from tensorflow.keras.optimizers import Adam

file_path = os.path.join("Data", "Hybrid_Augmented_TSAFE_Features.xlsx")
N_SPLITS   = 3
DL_EPOCHS  = 10
PPO_STEPS  = 50_000          # reduce if slow
VAL_SIZE   = 0.2             # not used unless you later add calibration

df = pd.read_excel(FILE_PATH)

if 'Plant_Destination' not in df.columns:
    if {'Plant Code', 'Destination Port'}.issubset(df.columns):
        df['Plant_Destination'] = (
            df['Plant Code'].astype(str) + ' | ' + df['Destination Port'].astype(str)
        )
    else:
        raise ValueError("Missing 'Plant Code' or 'Destination Port' to build Plant_Destination.")

cat_features = [
    'Origin Port', 'Carrier', 'Plant Code', 'Destination Port', 'Plant_Destination'
]

num_features = [
    'Unit quantity','Weight','TPT',
    'TPT_per_Unit','LeadTime_Deviation','Weight_per_Unit','log_UnitQty',
    'carrier_origin_risk','route_cum_late_rate','route_bb_mean','carrier_bb_mean',
    'route_orders_last7d','route_roll10_Weight_q90',
    'congestion_trend','Weight_vsCarrierMean','seq_pos_norm'
]

requested_cols = cat_features + num_features
missing = [c for c in requested_cols if c not in df.columns]
if missing:
    print(f"[WARN] Missing columns skipped: {missing}")
    cat_features = [c for c in cat_features if c in df.columns]
    num_features = [c for c in num_features if c in df.columns]

X_df = pd.get_dummies(df[cat_features + num_features], drop_first=False)
X_df = X_df.replace([np.inf, -np.inf], np.nan)
X_df = X_df.fillna(X_df.median(numeric_only=True))
X_all = X_df.values

y_all = (df['Ship Late Day count'] > 0).astype(int).values


all_indices = np.arange(len(df))

def metrics_dict(y_true, y_score_or_pred):
    arr = np.asarray(y_score_or_pred).reshape(-1)
    if set(np.unique(arr)) <= {0,1}:
        y_pred = arr.astype(int)
        y_prob = None
    else:
        y_prob = arr
        y_pred = (arr >= 0.5).astype(int)
    out = {
        'Accuracy':  accuracy_score(y_true, y_pred),
        'Precision': precision_score(y_true, y_pred, average='weighted', zero_division=1),
        'Recall':    recall_score(y_true, y_pred, average='weighted', zero_division=1),
        'F1-Score':  f1_score(y_true, y_pred, average='weighted', zero_division=1),
    }
    if y_prob is not None and len(np.unique(y_true)) == 2:
        try:
            out['AUROC'] = roc_auc_score(y_true, y_prob)
        except Exception:
            out['AUROC'] = np.nan
    else:
        out['AUROC'] = np.nan
    return out


def create_model(model_type, input_dim):
    model = Sequential()
    model.add(Input(shape=(input_dim, 1)))
    if model_type == 'CNN':
        model.add(Conv1D(64, 2, activation='relu'))
        model.add(Flatten())
    elif model_type == 'LSTM':
        model.add(LSTM(64, activation='tanh'))
    elif model_type == 'Bi-LSTM':
        model.add(Bidirectional(LSTM(64, activation='tanh')))
    elif model_type == 'Stacked LSTM':
        model.add(LSTM(64, activation='tanh', return_sequences=True))
        model.add(LSTM(32, activation='tanh'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer=Adam(learning_rate=1e-3, clipnorm=1.0), loss='binary_crossentropy')
    return model

class PPOHybridEnv(gym.Env):
    """
    Static classification PPO (independent rows).
    Observation: (D,) stacked model outputs in [0,1].
    Action: {0,1}. Reward: +1 correct, -5 incorrect (cost-sensitive).
    """
    metadata = {"render_modes": []}
    def __init__(self, inputs, labels, pos_reward=1.0, neg_reward=-5.0):
        super().__init__()
        self.inputs = inputs.astype(np.float32)
        self.labels = labels.astype(int)
        self.n = len(labels)
        self.pos_reward = pos_reward
        self.neg_reward = neg_reward
        self.observation_space = spaces.Box(low=0, high=1, shape=(inputs.shape[1],), dtype=np.float32)
        self.action_space = spaces.Discrete(2)
        self.idx = 0

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.idx = 0
        return self.inputs[self.idx], {}

    def step(self, action):
        reward = self.pos_reward if action == self.labels[self.idx] else self.neg_reward
        self.idx += 1
        terminated = self.idx >= self.n
        obs = np.zeros(self.inputs.shape[1], dtype=np.float32) if terminated else self.inputs[self.idx]
        return obs, float(reward), terminated, False, {}

class SequentialPPOEnv(gym.Env):
    """
    Sequential PPO with simple temporal/memory state.
    Observation = [base_inputs(5), time_sin, time_cos, last_K_actions(K), cum_FP, cum_FN]
    Action      = {0,1}
    Reward:
        +2 correct late (TP), +1 correct on-time (TN)
        -5 false negative, -2 false positive
        -0.01 step penalty; +0.2 bonus if FN-rate improves vs previous step
    Episodes follow route continuity in time.
    """
    metadata = {"render_modes": []}
    def __init__(self, base_inputs, labels, episodes, K=5):
        super().__init__()
        self.base_inputs = base_inputs.astype(np.float32)
        self.labels = labels.astype(int)
        self.episodes = episodes
        self.K = K
        self.obs_dim = base_inputs.shape[1] + 2 + K + 2
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.obs_dim,), dtype=np.float32)
        self.action_space = spaces.Discrete(2)
        self._ep_idx = -1
        self._indices = None
        self._t = None
        self._last_actions = None
        self._cum_fp = None
        self._cum_fn = None
        self._prev_fn_rate = 0.0

    def _time_features(self, t, T):
        pos = (t / max(T - 1, 1))
        return np.array([np.sin(2*np.pi*pos), np.cos(2*np.pi*pos)], dtype=np.float32)

    def _obs(self):
        T = len(self._indices)
        cur_idx = self._indices[self._t]
        x = self.base_inputs[cur_idx]           # (D,)
        time_feat = self._time_features(self._t, T)  # (2,)
        lastK = self._last_actions.copy()       # (K,)
        obs = np.concatenate([x, time_feat, lastK, np.array([self._cum_fp, self._cum_fn], dtype=np.float32)], axis=0)
        return obs.astype(np.float32)

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self._ep_idx = (self._ep_idx + 1) % len(self.episodes)
        sl = self.episodes[self._ep_idx]
        self._indices = np.arange(sl.start, sl.stop, dtype=int)
        self._t = 0
        self._last_actions = np.zeros(self.K, dtype=np.float32)
        self._cum_fp = 0.0
        self._cum_fn = 0.0
        self._prev_fn_rate = 0.0
        return self._obs(), {}

    def step(self, action):
        cur_i = self._indices[self._t]
        y = self.labels[cur_i]
        if action == y:
            reward = 2.0 if y == 1 else 1.0
        else:
            if y == 1 and action == 0:
                reward = -5.0
                self._cum_fn += 1.0
            else:
                reward = -2.0
                self._cum_fp += 1.0
        reward -= 0.01  # small step penalty

        steps_so_far = float(self._t + 1)
        fn_rate = self._cum_fn / steps_so_far
        if fn_rate < self._prev_fn_rate:
            reward += 0.2
        self._prev_fn_rate = fn_rate

        self._last_actions = np.roll(self._last_actions, -1)
        self._last_actions[-1] = float(action)

        self._t += 1
        terminated = self._t >= len(self._indices)
        obs = np.zeros(self.obs_dim, dtype=np.float32) if terminated else self._obs()
        return obs, float(reward), terminated, False, {}


dl_models = ['CNN', 'LSTM', 'Bi-LSTM', 'Stacked LSTM']
per_fold_rows = []

skf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=RANDOM_SEED)

fold_id = 0
for train_idx, test_idx in skf.split(X_all, y_all):
    fold_id += 1
    print(f"\n=== Fold {fold_id}/{N_SPLITS} ===")

    X_tr_raw, X_te_raw = X_all[train_idx], X_all[test_idx]
    y_tr, y_te = y_all[train_idx], y_all[test_idx]
    idx_tr, idx_te = all_indices[train_idx], all_indices[test_idx]
   
    smote = SMOTE(random_state=RANDOM_SEED)
    X_tr_res, y_tr_res = smote.fit_resample(X_tr_raw, y_tr)

    scaler = StandardScaler()
    X_tr_res_scaled = scaler.fit_transform(X_tr_res)       # fit scaler on resampled (DL fit)
    X_tr_orig_scaled = scaler.transform(X_tr_raw)          # for train preds
    X_te_scaled      = scaler.transform(X_te_raw)          # for test preds

    X_tr_dl_fit  = X_tr_res_scaled.reshape(-1, X_tr_res_scaled.shape[1], 1)
    X_tr_dl_pred = X_tr_orig_scaled.reshape(-1, X_tr_orig_scaled.shape[1], 1)
    X_te_dl_pred = X_te_scaled.reshape(-1, X_te_scaled.shape[1], 1)

    dl_train_probs = {}
    dl_test_probs  = {}
    for m in dl_models:
        mdl = create_model(m, X_tr_res_scaled.shape[1])
        t0 = time.time()
        mdl.fit(X_tr_dl_fit, y_tr_res, epochs=DL_EPOCHS, batch_size=256, verbose=0)
        t1 = time.time()
        p_tr = mdl.predict(X_tr_dl_pred, verbose=0).reshape(-1)
        p_te = mdl.predict(X_te_dl_pred, verbose=0).reshape(-1)
        dl_train_probs[m] = p_tr
        dl_test_probs[m]  = p_te
       
        met = metrics_dict(y_te, p_te)
        met.update({'Model': m, 'Fold': fold_id, 'Time (s)': round(t1 - t0, 2)})
        per_fold_rows.append(met)

   
    xgb = GridSearchCV(
        estimator=XGBClassifier(
            random_state=RANDOM_SEED,
            eval_metric='logloss',
            use_label_encoder=False
        ),
        param_grid={'n_estimators':[100], 'max_depth':[3,5], 'learning_rate':[0.1,0.05], 'subsample':[0.8]},
        scoring='roc_auc', cv=3, n_jobs=-1
    )
    xgb.fit(X_tr_raw, y_tr)
    xgb_proba_tr = xgb.best_estimator_.predict_proba(X_tr_raw)[:,1]
    xgb_proba_te = xgb.best_estimator_.predict_proba(X_te_raw)[:,1]

    xgb_met = metrics_dict(y_te, xgb_proba_te)
    xgb_met.update({'Model': 'XGBoost', 'Fold': fold_id, 'Time (s)': 0.0})
    per_fold_rows.append(xgb_met)

    ppo_input_tr = np.vstack([
        xgb_proba_tr,
        dl_train_probs['CNN'],
        dl_train_probs['LSTM'],
        dl_train_probs['Bi-LSTM'],
        dl_train_probs['Stacked LSTM']
    ]).T.astype(np.float32)

    ppo_input_te = np.vstack([
        xgb_proba_te,
        dl_test_probs['CNN'],
        dl_test_probs['LSTM'],
        dl_test_probs['Bi-LSTM'],
        dl_test_probs['Stacked LSTM']
    ]).T.astype(np.float32)

    mm_static = MinMaxScaler().fit(ppo_input_tr)
    ppo_input_tr = mm_static.transform(ppo_input_tr).astype(np.float32)
    ppo_input_te = mm_static.transform(ppo_input_te).astype(np.float32)

    env_static_tr = make_vec_env(lambda: PPOHybridEnv(ppo_input_tr, y_tr), n_envs=1)
    ppo_static = PPO("MlpPolicy", env_static_tr, verbose=0, seed=RANDOM_SEED)
    t0 = time.time()
    ppo_static.learn(total_timesteps=PPO_STEPS)
    t1 = time.time()

    eval_env_static = PPOHybridEnv(ppo_input_te, y_te)
    obs, _ = eval_env_static.reset()
    preds_static, done = [], False
    while not done:
        a, _ = ppo_static.predict(obs, deterministic=True)
        preds_static.append(int(a))
        obs, _, done, _, _ = eval_env_static.step(a)
    met_static = metrics_dict(y_te, np.array(preds_static))
    met_static.update({'Model': 'PPO (Static)', 'Fold': fold_id, 'Time (s)': round(t1 - t0, 2)})
    per_fold_rows.append(met_static)

    cols_for_routes = ['Order Date','Origin Port','Destination Port','Carrier']
    missing_r = [c for c in cols_for_routes if c not in df.columns]
    if missing_r:
        print(f"[WARN] Sequential PPO skipped (missing {missing_r}).")
    else:
        tr_frame = df.loc[idx_tr, cols_for_routes].copy()
        te_frame = df.loc[idx_te, cols_for_routes].copy()
        tr_frame['Order Date'] = pd.to_datetime(tr_frame['Order Date'])
        te_frame['Order Date'] = pd.to_datetime(te_frame['Order Date'])

        tr_frame['y']   = y_tr
        te_frame['y']   = y_te
        tr_frame['xgb'] = xgb_proba_tr
        te_frame['xgb'] = xgb_proba_te
        for m in dl_models:
            tr_frame[m] = dl_train_probs[m]
            te_frame[m] = dl_test_probs[m]

        for fr in (tr_frame, te_frame):
            fr['route_key'] = (
                fr['Origin Port'].astype(str) + ' | ' +
                fr['Destination Port'].astype(str) + ' | ' +
                fr['Carrier'].astype(str)
            )

        tr_frame = tr_frame.sort_values('Order Date').reset_index(drop=True)
        te_frame = te_frame.sort_values('Order Date').reset_index(drop=True)

        base_tr = tr_frame[['xgb','CNN','LSTM','Bi-LSTM','Stacked LSTM']].values.astype(np.float32)
        base_te = te_frame[['xgb','CNN','LSTM','Bi-LSTM','Stacked LSTM']].values.astype(np.float32)
        mm_seq = MinMaxScaler().fit(base_tr)
        base_tr = mm_seq.transform(base_tr).astype(np.float32)
        base_te = mm_seq.transform(base_te).astype(np.float32)
        y_tr_sorted = tr_frame['y'].values.astype(int)
        y_te_sorted = te_frame['y'].values.astype(int)
        rt_tr = tr_frame['route_key'].values
        rt_te = te_frame['route_key'].values

        def make_episodes(route_vec):
            eps, start = [], 0
            for i in range(1, len(route_vec) + 1):
                if i == len(route_vec) or route_vec[i] != route_vec[i-1]:
                    eps.append(slice(start, i))
                    start = i
            return eps

        eps_tr = make_episodes(rt_tr)
        eps_te = make_episodes(rt_te)

        env_seq_tr = make_vec_env(lambda: SequentialPPOEnv(base_tr, y_tr_sorted, eps_tr, K=5), n_envs=1)
        ppo_seq = PPO("MlpPolicy", env_seq_tr, verbose=0, seed=RANDOM_SEED)
        s0 = time.time()
        ppo_seq.learn(total_timesteps= max(PPO_STEPS, 60_000))  # a bit more steps helps sequence
        s1 = time.time()

        eval_env = SequentialPPOEnv(base_te, y_te_sorted, eps_te, K=5)
        obs, _ = eval_env.reset()
        preds_seq, visited = [], 0
        while True:
            a, _ = ppo_seq.predict(obs, deterministic=True)
            preds_seq.append(int(a))
            obs, _, done, _, _ = eval_env.step(a)
            if done:
                visited += 1
                if visited >= len(eps_te):
                    break
                obs, _ = eval_env.reset()

        preds_seq = np.array(preds_seq[:len(y_te_sorted)])
        met_seq = metrics_dict(y_te_sorted, preds_seq)
        met_seq.update({'Model': 'PPO (Sequential)', 'Fold': fold_id, 'Time (s)': round(s1 - s0, 2)})
        per_fold_rows.append(met_seq)

per_fold_df = pd.DataFrame(per_fold_rows)
cols = ['Accuracy','Precision','Recall','F1-Score','AUROC','Time (s)']

print("\n=== Per-Fold Results ===")
print(per_fold_df[['Fold','Model'] + cols].sort_values(['Fold','Model']))

summary = per_fold_df.groupby('Model')[cols].agg(['mean','std'])
def fmt(m, s):
    if np.isnan(m): return "nan"
    if np.isnan(s): return f"{m:.6f}"
    return f"{m:.6f} ± {s:.6f}"
summary_out = []
for model, row in summary.iterrows():
    d = {'Model': model}
    for c in cols:
        d[c] = fmt(row[(c,'mean')], row[(c,'std')])
    summary_out.append(d)
summary_df = pd.DataFrame(summary_out)

print("\n=== K-Fold Summary (Mean ± Std) ===")
print(summary_df[['Model'] + cols])


per_fold_df.to_csv('kfold_per_fold_results.csv', index=False)
summary_df.to_csv('kfold_summary_mean_std.csv', index=False)



=== Fold 1/3 ===


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.




=== Fold 2/3 ===


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.




=== Fold 3/3 ===


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.




=== Per-Fold Results ===
    Fold             Model  Accuracy  Precision    Recall  F1-Score     AUROC  \
2      1           Bi-LSTM  0.986582   0.986535  0.986582  0.986541  0.996556   
0      1               CNN  0.987635   0.987641  0.987635  0.987638  0.998778   
1      1              LSTM  0.985267   0.985243  0.985267  0.985253  0.991359   
6      1  PPO (Sequential)  0.972376   0.974398  0.972376  0.972836       NaN   
5      1      PPO (Static)  0.995264   0.995274  0.995264  0.995249       NaN   
3      1      Stacked LSTM  0.980005   0.980024  0.980005  0.979772  0.994269   
4      1           XGBoost  0.995527   0.995543  0.995527  0.995512  0.999678   
9      2           Bi-LSTM  0.973684   0.973927  0.973684  0.973781  0.991840   
7      2               CNN  0.979474   0.979562  0.979474  0.979512  0.994295   
8      2              LSTM  0.984474   0.984615  0.984474  0.984289  0.988795   
13     2  PPO (Sequential)  0.976316   0.977208  0.976316  0.976559       NaN   
12