In [None]:
import warnings, os, time, random
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd


random.seed(42); np.random.seed(42)

from sklearn.metrics import (accuracy_score, precision_score, recall_score, f1_score,
                             confusion_matrix, matthews_corrcoef, cohen_kappa_score)
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from imblearn.over_sampling import SMOTE

from xgboost import XGBClassifier

import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env

import tensorflow as tf
tf.random.set_seed(42)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, LSTM, Bidirectional, Flatten, Input
from tensorflow.keras.optimizers import Adam

file_path = os.path.join("Data", "Hybrid_Augmented_TSAFE_Features.xlsx")
df = pd.read_excel(file_path)

if 'Plant_Destination' not in df.columns and {'Plant Code','Destination Port'}.issubset(df.columns):
    df['Plant_Destination'] = df['Plant Code'].astype(str) + ' | ' + df['Destination Port'].astype(str)

cat_features = [c for c in ['Origin Port','Carrier','Plant Code','Destination Port','Plant_Destination'] if c in df.columns]
num_features = [c for c in [
    'Unit quantity','Weight','TPT',
    'TPT_per_Unit','LeadTime_Deviation','Weight_per_Unit','log_UnitQty',
    'carrier_origin_risk','route_cum_late_rate','route_bb_mean','carrier_bb_mean',
    'route_orders_last7d','route_roll10_Weight_q90',
    'congestion_trend','Weight_vsCarrierMean','seq_pos_norm'
] if c in df.columns]


if 'Ship Late Day count' not in df.columns:
    raise ValueError("Missing target column 'Ship Late Day count'.")
y = (df['Ship Late Day count'] > 0).astype(int)

has_date = 'Order Date' in df.columns
if has_date:
    df['Order Date'] = pd.to_datetime(df['Order Date'], errors='coerce')


X = pd.get_dummies(df[cat_features + num_features], drop_first=False)
X = X.replace([np.inf, -np.inf], np.nan).fillna(X.median(numeric_only=True))

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, random_state=42, stratify=y
)
smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_res)
X_test_scaled  = scaler.transform(X_test)


def compute_all_metrics(y_true, y_score_or_pred):
    arr = np.asarray(y_score_or_pred).reshape(-1)
    if set(np.unique(arr)) <= {0,1}:
        y_pred = arr.astype(int)
    else:
        y_pred = (arr >= 0.5).astype(int)

    acc  = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, average='weighted', zero_division=1)
    rec  = recall_score(y_true, y_pred, average='weighted', zero_division=1)
    f1   = f1_score(y_true, y_pred, average='weighted', zero_division=1)
    mcc  = matthews_corrcoef(y_true, y_pred)
    kap  = cohen_kappa_score(y_true, y_pred)

    tn, fp, fn, tp = confusion_matrix(y_true, y_pred, labels=[0,1]).ravel()
    fnr = fn / (fn + tp) if (fn + tp) > 0 else 0.0

    return {
        'Accuracy': acc, 'Precision': prec, 'Recall': rec, 'F1-Score': f1,
        'MCC': mcc, "Cohen's Kappa": kap, 'FNR': fnr
    }

def as_row(name, m, t, kind):
    r = m.copy(); r.update({'Model': name, 'Variant': kind, 'Time (s)': round(float(t),2)})
    return r

all_rows = []


xgb = GridSearchCV(
    estimator=XGBClassifier(random_state=42, eval_metric='logloss', use_label_encoder=False),
    param_grid={'n_estimators':[200], 'max_depth':[4,6], 'learning_rate':[0.1,0.05], 'subsample':[0.8]},
    scoring='roc_auc', cv=3, n_jobs=-1
)
t0 = time.time()
xgb.fit(X_train, y_train)
xgb_time = time.time() - t0
xgb_proba = xgb.best_estimator_.predict_proba(X_test)[:, 1]
all_rows.append(as_row('XGBoost', compute_all_metrics(y_test, xgb_proba), xgb_time, 'Standalone'))


def create_model(model_type, input_dim):
    model = Sequential()
    model.add(Input(shape=(input_dim, 1)))
    if model_type == 'CNN':
        model.add(Conv1D(64, 2, activation='relu')); model.add(Flatten())
    elif model_type == 'LSTM':
        model.add(LSTM(64, activation='tanh'))
    elif model_type == 'Bi-LSTM':
        model.add(Bidirectional(LSTM(64, activation='tanh')))
    elif model_type == 'Stacked LSTM':
        model.add(LSTM(64, activation='tanh', return_sequences=True))
        model.add(LSTM(32, activation='tanh'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer=Adam(learning_rate=1e-3, clipnorm=1.0), loss='binary_crossentropy')
    return model

X_train_dl = X_train_scaled.reshape(-1, X_train_scaled.shape[1], 1)
X_test_dl  = X_test_scaled.reshape(-1,  X_test_scaled.shape[1],  1)

dl_models = ['CNN', 'LSTM', 'Bi-LSTM', 'Stacked LSTM']
dl_outputs, dl_times = {}, {}

for m in dl_models:
    mdl = create_model(m, X_train_dl.shape[1])
    t0 = time.time()
    mdl.fit(X_train_dl, y_train_res, epochs=10, batch_size=256, verbose=0)
    dl_times[m] = time.time() - t0
    proba = mdl.predict(X_test_dl, verbose=0).reshape(-1)
    dl_outputs[m] = proba
    all_rows.append(as_row(m, compute_all_metrics(y_test, proba), dl_times[m], 'Standalone'))


test_idx = X_test.index
cols_for_frame = [c for c in ['Order Date','Origin Port','Destination Port','Carrier'] if c in df.columns]
test_frame = df.loc[test_idx, cols_for_frame].copy()
if has_date:
    test_frame['Order Date'] = pd.to_datetime(test_frame['Order Date'])
else:
    test_frame['Order Date'] = np.arange(len(test_idx))

test_frame['y']   = y_test.values.astype(int)
test_frame['xgb'] = xgb_proba
for m in dl_models:
    test_frame[m] = dl_outputs[m]


if set(['Origin Port','Destination Port','Carrier']).issubset(test_frame.columns):
    test_frame['route_key'] = (
        test_frame['Origin Port'].astype(str) + ' | ' +
        test_frame['Destination Port'].astype(str) + ' | ' +
        test_frame['Carrier'].astype(str)
    )
else:
    test_frame['route_key'] = 'ALL'

test_frame = test_frame.sort_values('Order Date').reset_index(drop=True)
labels_sorted = test_frame['y'].values.astype(int)
route_sorted  = test_frame['route_key'].values

episodes = []
start = 0
for i in range(1, len(test_frame)+1):
    if i == len(test_frame) or route_sorted[i] != route_sorted[i-1]:
        episodes.append(slice(start, i))
        start = i

class SequentialPPOEnv(gym.Env):
    """
    Sequential PPO with temporal extras and cost-sensitive rewards.
    Observation = [base_inputs(D), time_sin, time_cos, last_K_actions(K), cum_FP, cum_FN]
    Action = {0,1}
    """
    metadata = {"render_modes": []}

    def __init__(self, base_inputs, labels, episodes,
                 K=5, reward_correct_pos=2.0, reward_correct_neg=1.0,
                 penalty_FN=-5.0, penalty_FP=-2.0,
                 step_penalty=-0.01, fn_improve_bonus=0.2):
        super().__init__()
        self.X = base_inputs.astype(np.float32)           # shape (N, D)
        self.y = labels.astype(int)
        self.episodes = episodes
        self.K = int(K)

        # Rewards
        self.reward_correct_pos = float(reward_correct_pos)
        self.reward_correct_neg = float(reward_correct_neg)
        self.penalty_FN = float(penalty_FN)
        self.penalty_FP = float(penalty_FP)
        self.step_penalty = float(step_penalty)
        self.fn_improve_bonus = float(fn_improve_bonus)

        D = self.X.shape[1]
        self.obs_dim = D + 2 + self.K + 2               # D + time(2) + lastK + cumFP/FN(2)
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.obs_dim,), dtype=np.float32)
        self.action_space = spaces.Discrete(2)

        self._ep_idx = -1
        self._indices = None
        self._t = None
        self._last_actions = None
        self._cum_fp = None
        self._cum_fn = None
        self._prev_fn_rate = 0.0

    def _time_features(self, t, T):
        pos = (t / max(T - 1, 1))
        return np.array([np.sin(2*np.pi*pos), np.cos(2*np.pi*pos)], dtype=np.float32)

    def _obs(self):
        T = len(self._indices)
        cur_idx = self._indices[self._t]
        base = self.X[cur_idx]  # (D,)
        parts = [base, self._time_features(self._t, T), self._last_actions,
                 np.array([self._cum_fp, self._cum_fn], dtype=np.float32)]
        return np.concatenate(parts, axis=0).astype(np.float32)

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self._ep_idx = (self._ep_idx + 1) % len(self.episodes)
        sl = self.episodes[self._ep_idx]
        self._indices = np.arange(sl.start, sl.stop, dtype=int)
        self._t = 0
        self._last_actions = np.zeros(self.K, dtype=np.float32)
        self._cum_fp = 0.0; self._cum_fn = 0.0; self._prev_fn_rate = 0.0
        return self._obs(), {}

    def step(self, action):
        cur_i = self._indices[self._t]
        y = self.y[cur_i]

       
        if action == y:
            reward = self.reward_correct_pos if y == 1 else self.reward_correct_neg
        else:
            if y == 1 and action == 0:
                reward = self.penalty_FN; self._cum_fn += 1.0
            else:
                reward = self.penalty_FP; self._cum_fp += 1.0
        reward += self.step_penalty

    
        steps_so_far = float(self._t + 1)
        fn_rate = self._cum_fn / steps_so_far
        if fn_rate < self._prev_fn_rate:
            reward += self.fn_improve_bonus
        self._prev_fn_rate = fn_rate

      
        self._last_actions = np.roll(self._last_actions, -1)
        self._last_actions[-1] = float(action)

        
        self._t += 1
        terminated = self._t >= len(self._indices)
        obs = np.zeros(self.obs_dim, dtype=np.float32) if terminated else self._obs()
        return obs, float(reward), terminated, False, {}


def train_eval_seqppo(input_mat, name, timesteps=80_000, K=5,
                      reward_correct_pos=2.0, reward_correct_neg=1.0,
                      penalty_FN=-5.0, penalty_FP=-2.0, step_penalty=-0.01, fn_improve_bonus=0.2):
    X_in = MinMaxScaler().fit_transform(input_mat).astype(np.float32)

    def mk_env():
        return SequentialPPOEnv(
            base_inputs=X_in, labels=labels_sorted, episodes=episodes,
            K=K, reward_correct_pos=reward_correct_pos, reward_correct_neg=reward_correct_neg,
            penalty_FN=penalty_FN, penalty_FP=penalty_FP,
            step_penalty=step_penalty, fn_improve_bonus=fn_improve_bonus
        )

    env = make_vec_env(mk_env, n_envs=1)
    model = PPO("MlpPolicy", env, verbose=0, seed=42)
    t0 = time.time()
    model.learn(total_timesteps=timesteps)
    runtime = time.time() - t0

    eval_env = mk_env()
    obs, _ = eval_env.reset()
    preds, visited = [], 0
    while True:
        action, _ = model.predict(obs, deterministic=True)
        preds.append(int(action))
        obs, _, terminated, truncated, _ = eval_env.step(action)
        if terminated:
            visited += 1
            if visited >= len(episodes):
                break
            obs, _ = eval_env.reset()

    preds = np.array(preds[:len(labels_sorted)])
    all_rows.append(as_row(name, compute_all_metrics(labels_sorted, preds), runtime, 'PPO-Seq'))
    return runtime

pair_specs = [
    ('PPO-Seq: XGB+CNN',    test_frame[['xgb','CNN']].values),
    ('PPO-Seq: XGB+LSTM',   test_frame[['xgb','LSTM']].values),
    ('PPO-Seq: XGB+BiLSTM', test_frame[['xgb','Bi-LSTM']].values),
    ('PPO-Seq: XGB+Stacked',test_frame[['xgb','Stacked LSTM']].values),
]
for name, mat in pair_specs:
    train_eval_seqppo(mat, name)


all_dl_cols = ['CNN','LSTM','Bi-LSTM','Stacked LSTM']
mat_all_dl = test_frame[all_dl_cols].values
train_eval_seqppo(mat_all_dl, 'PPO-Seq: All DL')

mat_xgb_all = test_frame[['xgb'] + all_dl_cols].values
train_eval_seqppo(mat_xgb_all, 'PPO-Seq: XGB + All DL')

results_df = pd.DataFrame(all_rows)
order = ['Model','Variant','Accuracy','Precision','Recall','F1-Score','MCC',"Cohen's Kappa",'FNR','Time (s)']
results_df = results_df[order].sort_values(
    by=['Variant','F1-Score','MCC','Recall'], ascending=[True, False, False, False]
).reset_index(drop=True)

print("\n=== Standalone & Sequential PPO (Pairs / All DL / XGB+All DL) ===")
print(results_df.to_string(index=False))

out_csv = 'standalone_and_seqppo_hybrid_results.csv'
results_df.to_csv(out_csv, index=False)
print(f"\nSaved: {out_csv}")


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.




=== Standalone & Sequential PPO (Pairs / All DL / XGB+All DL) ===
                Model    Variant  Accuracy  Precision   Recall  F1-Score      MCC  Cohen's Kappa      FNR  Time (s)
     PPO-Seq: XGB+CNN    PPO-Seq  0.994739   0.994733 0.994739  0.994735 0.984050       0.984046 0.014706     23.36
  PPO-Seq: XGB+BiLSTM    PPO-Seq  0.994301   0.994297 0.994301  0.994299 0.982731       0.982730 0.014706     23.70
PPO-Seq: XGB + All DL    PPO-Seq  0.994301   0.994297 0.994301  0.994299 0.982731       0.982730 0.014706     27.38
    PPO-Seq: XGB+LSTM    PPO-Seq  0.993862   0.993862 0.993862  0.993862 0.981416       0.981416 0.014706     23.73
 PPO-Seq: XGB+Stacked    PPO-Seq  0.993424   0.993419 0.993424  0.993421 0.980074       0.980073 0.016807     29.77
      PPO-Seq: All DL    PPO-Seq  0.986409   0.986360 0.986409  0.986362 0.958631       0.958561 0.042017     28.17
              XGBoost Standalone  0.992986   0.992974 0.992986  0.992969 0.978693       0.978662 0.023109      2.25
     