In [None]:
# ==============================================================
# DataCo on M2 (8GB): PPO (Sequential) ONLY
# - No TensorFlow / Keras
# - No XGBoost / classic DL heads
# - Light features + K-fold Target Mean Encoding
# - Route-aware sequential environment for PPO
# ==============================================================

import os, gc, time, warnings, random
warnings.filterwarnings("ignore")

# ---- keep mac stable; cap threads ----
os.environ["OMP_NUM_THREADS"]="4"
os.environ["OPENBLAS_NUM_THREADS"]="4"
os.environ["MKL_NUM_THREADS"]="4"
os.environ["VECLIB_MAXIMUM_THREADS"]="4"
os.environ["NUMEXPR_NUM_THREADS"]="4"

import numpy as np; np.random.seed(42)
import pandas as pd; random.seed(42)

# ================= PyTorch / SB3 (MPS for PPO) =================
import torch
rl_device = "mps" if torch.backends.mps.is_available() else "cpu"
print("[INFO] RL device:", rl_device)

from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env

# ================= A) CSV -> Parquet (first run only) =================
# >>>> CHANGE THIS PATH <<<<
# Portable loader for DataCoSupplyChainDataset.csv
from pathlib import Path
import os
import pandas as pd

FILENAME = "DataCoSupplyChainDataset.csv"

def resolve_data_path(filename=FILENAME):
    """
    Find the CSV in common repo locations or via the SCAI_DATA env var.
    Works when running from repo root OR from scripts/.
    """
    # 1) Optional override via environment variable
    env_dir = os.getenv("SCAI_DATA")
    if env_dir:
        p = Path(env_dir).expanduser() / filename
        if p.exists():
            return p

    # 2) Try common relative locations
    cwd = Path.cwd()
    candidates = [
        cwd / filename,                   # .
        cwd / "data" / filename,          # ./data
        cwd / "Data" / filename,          # ./Data
        cwd.parent / filename,            # ..
        cwd.parent / "data" / filename,   # ../data
        cwd.parent / "Data" / filename,   # ../Data
    ]
    for p in candidates:
        if p.exists():
            return p

    tried = "\n".join(str(p) for p in candidates)
    raise FileNotFoundError(
        f"Could not find {filename}. Tried:\n{tried}\nCWD={cwd}\n"
        "Tip: place the file under your repo 'data/' or set SCAI_DATA to its folder."
    )

file_path = resolve_data_path()
print(f"[info] Using data at: {file_path}")

# Load CSV (low_memory=False avoids dtype issues on large files)
df = pd.read_csv(file_path, low_memory=False)
print(df.shape, "rows x cols")

parquet_path = os.path.splitext(file_path)[0] + ".parquet"

t0_all = time.time()
if not os.path.exists(parquet_path):
    print("[INFO] Converting CSV -> Parquet (first run only)...")
    chunks = []
    for ch in pd.read_csv(file_path, engine="python", encoding="latin-1",
                          on_bad_lines="skip", chunksize=20000):
        chunks.append(ch)
    df_all = pd.concat(chunks, ignore_index=True)
    df_all.to_parquet(parquet_path, index=False)
    del chunks, df_all; gc.collect()
    print("[OK] Parquet saved:", parquet_path)

df = pd.read_parquet(parquet_path)
print("[INFO] Loaded Parquet", df.shape)

# ================= B) Column mapping =================
def first_col(d: pd.DataFrame, names):
    for n in names:
        if n in d.columns: return n
    return None

col_real  = first_col(df, ["Days for shipping (real)", "Days for shipping (real)_", "TPT"])
col_sched = first_col(df, ["Days for shipment (scheduled)", "Days for shipment (scheduled)_"])
col_y     = first_col(df, ["Late_delivery_risk", "Is_Late"])
col_qty   = first_col(df, ["Order Item Quantity", "Unit quantity", "Quantity"])
col_org   = first_col(df, ["Order City", "Order Region", "Order Country"])
col_dst   = first_col(df, ["Customer City", "Customer Region", "Customer Country"])
col_car   = first_col(df, ["Shipping Mode", "Carrier", "Shipment Mode", "Ship Mode"])
col_date  = first_col(df, ["Order Date (DateOrders)", "Order Date", "DateOrders", "Date"])

# Build target if missing
if col_y is None:
    if (col_real is None) or (col_sched is None):
        raise ValueError("Need Late_delivery_risk or both real/scheduled days.")
    df["__y__"] = (pd.to_numeric(df[col_real], errors="coerce") >
                   pd.to_numeric(df[col_sched], errors="coerce")).astype(int)
    col_y = "__y__"

# ================= C) Minimal numeric + light categoricals =================
keep_cols = [c for c in [col_qty, col_real, col_sched, col_y, col_org, col_dst, col_car, col_date] if c is not None]
df = df[keep_cols].copy()

# Cast numerics
for c in [col_qty, col_real, col_sched]:
    if c is not None:
        df[c] = pd.to_numeric(df[c], errors="coerce")

# Drop core NA
df = df.dropna(subset=[col_y, col_real, col_sched, col_qty])

# Target
y_all = df[col_y].astype(int).to_numpy()

# Numeric features (compact, strong)
num_df = pd.DataFrame(index=df.index)
num_df["qty"]          = df[col_qty]
num_df["tpt_real"]     = df[col_real]
num_df["tpt_sched"]    = df[col_sched]
num_df["lead_dev"]     = num_df["tpt_real"] - num_df["tpt_sched"]
num_df["tpt_per_unit"] = num_df["tpt_real"] / np.clip(num_df["qty"].replace(0, np.nan), 1, None)
num_df["log_qty"]      = np.log1p(np.clip(num_df["qty"], 1, None))

# Light categoricals (no one-hot explosion)
cat_df = pd.DataFrame(index=df.index)
cat_df["org"] = df[col_org].astype(str) if col_org else "NA"
cat_df["dst"] = df[col_dst].astype(str) if col_dst else "NA"
cat_df["car"] = df[col_car].astype(str) if col_car else "NA"

# ================= D) Split first (avoid leakage), then target-mean encode =================
X_num_all = num_df.to_numpy(dtype="float32")
X_cat_all = cat_df.copy()

X_cat_tr, X_cat_te, X_num_tr, X_num_te, y_tr, y_te, idx_tr, idx_te = train_test_split(
    X_cat_all, X_num_all, y_all, df.index.values,
    test_size=0.2, random_state=42, stratify=y_all
)

def kfold_tme(X_cat: pd.DataFrame, y: np.ndarray, n_splits=5, alpha=10.0, seed=42):
    """K-fold target mean encoding with smoothing; returns encoded train and mapping."""
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=seed)
    enc_maps = {col: [] for col in X_cat.columns}
    X_enc = pd.DataFrame(index=X_cat.index, dtype="float32")
    global_mean = float(y.mean())

    for col in X_cat.columns:
        col_enc = pd.Series(index=X_cat.index, dtype="float32")
        for tr_idx, va_idx in skf.split(np.zeros(len(y)), y):
            keys_tr = X_cat.iloc[tr_idx, X_cat.columns.get_loc(col)]
            y_tr_f  = y[tr_idx]
            grp = pd.DataFrame({"k": keys_tr.values, "y": y_tr_f})
            stats = grp.groupby("k")["y"].agg(["count", "mean"])
            smooth = (stats["count"]*stats["mean"] + alpha*global_mean) / (stats["count"] + alpha)
            keys_va = X_cat.iloc[va_idx, X_cat.columns.get_loc(col)]
            col_enc.iloc[va_idx] = keys_va.map(smooth).fillna(global_mean).astype("float32")
        X_enc[col] = col_enc.values
        # full-train map for test
        grp_full = pd.DataFrame({"k": X_cat[col].values, "y": y})
        stats_full = grp_full.groupby("k")["y"].agg(["count", "mean"])
        smooth_full = (stats_full["count"]*stats_full["mean"] + alpha*global_mean) / (stats_full["count"] + alpha)
        enc_maps[col] = [smooth_full.to_dict(), float(global_mean)]
    return X_enc.astype("float32"), enc_maps

def apply_tme(X_cat: pd.DataFrame, maps):
    out = pd.DataFrame(index=X_cat.index, dtype="float32")
    for col in X_cat.columns:
        m, g = maps[col]
        out[col] = X_cat[col].map(m).fillna(g).astype("float32")
    return out

t0 = time.time()
Xtr_cat_tme, encoders = kfold_tme(X_cat_tr, y_tr, n_splits=5, alpha=10.0)
Xte_cat_tme = apply_tme(X_cat_te, encoders)
print(f"[INFO] TME in {time.time()-t0:.2f}s | train={Xtr_cat_tme.shape}, test={Xte_cat_tme.shape}")

# Final tabular features
Xtr_tab = np.hstack([X_num_tr, Xtr_cat_tme.to_numpy()])
Xte_tab = np.hstack([X_num_te, Xte_cat_tme.to_numpy()])

# Scale (fit on train, apply to test)
scaler = StandardScaler()
Xtr_tab_s = scaler.fit_transform(Xtr_tab).astype("float32")
Xte_tab_s = scaler.transform(Xte_tab).astype("float32")

# ================= E) Build route keys & time for episodes =================
def safe_series(indexes, colname):
    if colname and colname in df.columns:
        return df.loc[indexes, colname].astype(str)
    return pd.Series("NA", index=indexes)

def get_time_series(indexes):
    if col_date and col_date in df.columns:
        return pd.to_datetime(df.loc[indexes, col_date], errors="coerce")
    # fallback synthetic time
    return pd.Series(pd.date_range("2000-01-01", periods=len(indexes), freq="H"), index=indexes)

def build_ordering_and_routes(indices):
    order_time = get_time_series(indices)
    route_key = (safe_series(indices, col_org) + " | " +
                 safe_series(indices, col_dst) + " | " +
                 safe_series(indices, col_car)).astype(str)
    ord_idx = np.argsort(order_time.values)
    return ord_idx, route_key.values

# Train ordering / episodes
ord_tr, routes_tr = build_ordering_and_routes(idx_tr)
Xtr_seq = Xtr_tab_s[ord_tr]
ytr_seq = y_tr[ord_tr]
routes_tr = routes_tr[ord_tr]

# Test ordering / episodes
ord_te, routes_te = build_ordering_and_routes(idx_te)
Xte_seq = Xte_tab_s[ord_te]
yte_seq = y_te[ord_te]
routes_te = routes_te[ord_te]

def build_episodes(routes):
    episodes, start = [], 0
    for i in range(1, len(routes)+1):
        if i == len(routes) or routes[i] != routes[i-1]:
            episodes.append(slice(start, i)); start = i
    return episodes

episodes_tr = build_episodes(routes_tr)
episodes_te = build_episodes(routes_te)

print(f"[INFO] Episodes: train={len(episodes_tr)}, test={len(episodes_te)}")

# ================= F) PPO Environment (Sequential) =================
class SeqEnv(gym.Env):
    metadata = {"render_modes":[]}
    def __init__(self, X, y, episodes, K=5):
        super().__init__()
        self.X, self.y, self.episodes, self.K = X, y.astype(int), episodes, K
        # obs = features + 2 time-pos enc + last-K actions + FP/FN counters
        self.obs_dim = X.shape[1] + 2 + K + 2
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.obs_dim,), dtype=np.float32)
        self.action_space = spaces.Discrete(2)
        self._ep = -1

    def _tfeat(self, t, T):
        pos = t/max(T-1,1)
        return np.array([np.sin(2*np.pi*pos), np.cos(2*np.pi*pos)], dtype=np.float32)

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self._ep = (self._ep + 1) % len(self.episodes)
        sl = self.episodes[self._ep]
        self.idx = np.arange(sl.start, sl.stop, dtype=int)
        self.t = 0
        self.last = np.zeros(self.K, dtype=np.float32)
        self.fp = 0.0; self.fn = 0.0; self.prev_fn = 0.0
        return self._obs(), {}

    def _obs(self):
        T = len(self.idx); i = self.idx[self.t]
        return np.concatenate([self.X[i], self._tfeat(self.t, T), self.last,
                               np.array([self.fp, self.fn], dtype=np.float32)]).astype(np.float32)

    def step(self, a):
        i = self.idx[self.t]; yv = self.y[i]
        # Reward: FN >> FP (missed delays costly); small step cost; small bonus if FN-rate improves
        r = 2.0 if (a==1 and yv==1) else (1.0 if (a==0 and yv==0) else (-5.0 if (yv==1 and a==0) else -2.0))
        r -= 0.01
        self.fn += float(yv==1 and a==0); self.fp += float(yv==0 and a==1)
        steps = float(self.t + 1); fn_rate = self.fn/max(steps, 1.0)
        if fn_rate < self.prev_fn: r += 0.2
        self.prev_fn = fn_rate

        self.last = np.roll(self.last, -1); self.last[-1] = float(a)
        self.t += 1
        done = self.t >= len(self.idx)
        obs = np.zeros(self.observation_space.shape[0], dtype=np.float32) if done else self._obs()
        return obs, float(r), done, False, {}

# ================= G) Train PPO on TRAIN EPISODES; Evaluate on TEST =================
PPO_STEPS = 20_000       # increase if you want better convergence
PPO_BS    = 4096
PPO_NST   = 1024

train_env = make_vec_env(lambda: SeqEnv(Xtr_seq, ytr_seq, episodes_tr, K=5), n_envs=1)
ppo = PPO(
    "MlpPolicy", train_env, seed=42, verbose=0, device=rl_device,
    batch_size=PPO_BS, n_steps=PPO_NST, learning_rate=3e-4
)

t0 = time.time()
ppo.learn(total_timesteps=PPO_STEPS)
print(f"[TIME] PPO train: {time.time()-t0:.2f}s")

# Deterministic evaluation on TEST episodes
eval_env = SeqEnv(Xte_seq, yte_seq, episodes_te, K=5)
obs, _ = eval_env.reset(); preds = []; visited = 0
while True:
    a, _ = ppo.predict(obs, deterministic=True)
    preds.append(int(a))
    obs, _, done, _, _ = eval_env.step(a)
    if done:
        visited += 1
        if visited >= len(episodes_te): break
        obs, _ = eval_env.reset()
preds = np.array(preds[:len(yte_seq)])

# ================= H) Metrics =================
def metric_dict(y_true, y_pred_binary):
    ypred = np.asarray(y_pred_binary).reshape(-1).astype(int)
    return dict(
        Accuracy  = float(accuracy_score(y_true, ypred)),
        Precision = float(precision_score(y_true, ypred, average="weighted", zero_division=1)),
        Recall    = float(recall_score(y_true, ypred, average="weighted", zero_division=1)),
        F1        = float(f1_score(y_true, ypred, average="weighted", zero_division=1))
    )

results = {"PPO (Sequential)": metric_dict(yte_seq, preds)}
final_df = pd.DataFrame(results).T.reset_index().rename(columns={"index":"Model"})
print("\n=== Metrics (Accuracy, Precision, Recall, F1) ===")
print(final_df.to_string(index=False))

# Save
out_path = "metrics_dataco_ppo_only.csv"
final_df.to_csv(out_path, index=False)
print(f"\n[OK] Saved: {out_path}")
print(f"[TOTAL] {time.time()-t0_all:.1f}s")


[INFO] RL device: cpu
[INFO] Loaded Parquet (180519, 53)
[INFO] TME in 0.18s | train=(144415, 3), test=(36104, 3)
[INFO] Episodes: train=144390, test=36102
[TIME] PPO train: 3.51s

=== Metrics (Accuracy, Precision, Recall, F1) ===
           Model  Accuracy  Precision   Recall       F1
PPO (Sequential)  0.974019   0.975195 0.974019 0.973935

[OK] Saved: metrics_dataco_ppo_only.csv
[TOTAL] 7.4s
