<a href="https://colab.research.google.com/github/sanjanabayya30/Proj/blob/main/FDRL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install stable_baselines3

Collecting stable_baselines3
  Downloading stable_baselines3-2.6.0-py3-none-any.whl.metadata (4.8 kB)
Collecting gymnasium<1.2.0,>=0.29.1 (from stable_baselines3)
  Downloading gymnasium-1.1.1-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3.0,>=2.3->stable_baselines3)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3.0,>=2.3->stable_baselines3)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3.0,>=2.3->stable_baselines3)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3.0,>=2.3->stable_baselines3)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (f

In [None]:
!pip install shimmy>=2.0

In [None]:
# === STEP 1: Upload ZIP ===
from google.colab import files
uploaded = files.upload()  # Upload dataset.zip

import os, zipfile, numpy as np, time, csv
import gym
from gym import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import DummyVecEnv

# === STEP 2: Extract ===
zip_path = next(iter(uploaded))
extract_path = "/content/fjsp_dataset"
os.makedirs(extract_path, exist_ok=True)

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)
print(f"✅ Extracted {zip_path} into {extract_path}")

# === STEP 3: FJSSP Env ===
class FJSSPEnv(gym.Env):
    def __init__(self, data):
        super().__init__()
        self.jobs = data['jobs']
        self.num_jobs = len(self.jobs)
        all_machines = set(m for job in self.jobs for op in job for m in op[0])
        self.num_machines = max(all_machines) + 1
        self.state_dim = self.num_jobs * 4 + self.num_machines
        self.action_space = spaces.Discrete(self.num_jobs)
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.state_dim,), dtype=np.float32)
        self.reset()

    def reset(self):
        self.done_jobs = [0] * self.num_jobs
        self.machines_time = [0] * self.num_machines
        self.current_time = 0
        return self._get_obs()

    def _get_obs(self):
        state = []
        for j in range(self.num_jobs):
            op_idx = self.done_jobs[j]
            if op_idx >= len(self.jobs[j]):
                state.extend([j, -1, -1, 0])
                continue
            m_list, p_list = self.jobs[j][op_idx]
            best_m, best_pt, min_ct = -1, -1, float('inf')
            for i, m in enumerate(m_list):
                ct = max(self.current_time, self.machines_time[m]) + p_list[i]
                if ct < min_ct:
                    best_m, best_pt, min_ct = m, p_list[i], ct
            state.extend([j, best_m, best_pt, len(self.jobs[j]) - op_idx])
        state.extend(self.machines_time)
        return np.array(state, dtype=np.float32)

    def step(self, action):
        j = int(action)
        if j < 0 or j >= self.num_jobs or self.done_jobs[j] >= len(self.jobs[j]):
            return self._get_obs(), -100.0, False, {"makespan": max(self.machines_time)}
        op = self.jobs[j][self.done_jobs[j]]
        best_m, best_pt, min_ct = -1, -1, float('inf')
        for i, m in enumerate(op[0]):
            ct = max(self.current_time, self.machines_time[m]) + op[1][i]
            if ct < min_ct:
                best_m, best_pt, min_ct = m, op[1][i], ct
        start = max(self.current_time, self.machines_time[best_m])
        finish = start + best_pt
        self.machines_time[best_m] = finish
        self.done_jobs[j] += 1
        self.current_time = min(self.machines_time)
        done = all(self.done_jobs[j] == len(self.jobs[j]) for j in range(self.num_jobs))
        reward = -1.0
        if done:
            reward += -max(self.machines_time)
        return self._get_obs(), reward, done, {"makespan": max(self.machines_time)}

# === STEP 4: Greedy Baseline ===
def evaluate_greedy(env):
    obs = env.reset()
    done = False
    while not done:
        options = []
        for j in range(env.num_jobs):
            if env.done_jobs[j] < len(env.jobs[j]):
                m_list, p_list = env.jobs[j][env.done_jobs[j]]
                for i, m in enumerate(m_list):
                    ct = max(env.current_time, env.machines_time[m]) + p_list[i]
                    options.append((ct, j))
        if not options:
            break
        _, best_job = min(options)
        obs, _, done, _ = env.step(best_job)
    return max(env.machines_time)

# === STEP 5: Process All Datasets ===
results = []
total_episodes = 10  # 🟢 FAST mode

print("\n--- 🧘 Training PPO on All Datasets ---\n")
for root, _, files_in_dir in os.walk(extract_path):
    for fname in sorted(files_in_dir):
        if not fname.endswith(".npy"):
            continue
        fpath = os.path.join(root, fname)
        print(f"\n📁 Dataset: {fname}")

        try:
            raw = np.load(fpath, allow_pickle=True)
            if raw.ndim != 4 or raw.shape[1] != 2:
                print("⚠️ Skipping: invalid shape")
                continue
            mm, pm = raw[0][0], raw[0][1]
            jobs = [([int(mm[j][o])], [float(pm[j][o])]) for j in range(mm.shape[0]) for o in range(mm.shape[1])]
            job_structure = [[([int(mm[j][o])], [float(pm[j][o])]) for o in range(mm.shape[1])] for j in range(mm.shape[0])]
            data = {'jobs': job_structure}
        except Exception as e:
            print(f"❌ Failed to load: {e}")
            continue

        greedy_env = FJSSPEnv(data)
        greedy_makespan = evaluate_greedy(greedy_env)
        print(f"⚙️  Initial Makespan (Greedy): {greedy_makespan}")

        env = DummyVecEnv([lambda: FJSSPEnv(data)])
        model = PPO("MlpPolicy", env, verbose=0, n_steps=128, batch_size=64)
        for ep in range(total_episodes):
            obs = env.reset()
            done, ep_reward, steps = False, 0, 0
            while not done and steps < 1000:
                action, _ = model.predict(obs, deterministic=False)
                obs, reward, done_vec, info = env.step(action)
                done = done_vec[0]
                ep_reward += reward[0]
                steps += 1
            mk = info[0].get("makespan", 0)
            print(f"🌀 Episode {ep+1:2d}/{total_episodes} | Makespan: {mk} | Reward: {ep_reward:.1f}")
            model.learn(total_timesteps=1000, reset_num_timesteps=False)

        eval_env = FJSSPEnv(data)
        obs = eval_env.reset()
        done, steps = False, 0
        t0 = time.time()
        while not done and steps < 1000:
            action, _ = model.predict(obs, deterministic=True)
            obs, _, done, info = eval_env.step(action)
            steps += 1
        t1 = time.time()
        ppo_makespan = info.get("makespan", 0)
        avg_eval_time = t1 - t0
        gap = 100.0 * (ppo_makespan - greedy_makespan) / greedy_makespan if greedy_makespan > 0 else 0
        print(f"✅ PPO Final Makespan: {ppo_makespan}")
        print(f"⏱ Avg Evaluation Time: {avg_eval_time:.4f} sec")
        print(f"📉 Gap vs Greedy: {gap:.2f}%")

        results.append({
            "dataset": fname,
            "greedy": greedy_makespan,
            "ppo": ppo_makespan,
            "time": avg_eval_time,
            "gap": gap
        })

# === STEP 6: Summary Table ===
print("\n\n=== 📊 Final Summary ===")
print(f"{'Dataset':25} {'Greedy':>8} {'PPO':>8} {'Gap (%)':>9} {'Time (s)':>10}")
print("-" * 60)
for r in results:
    print(f"{r['dataset'][:25]:25} {r['greedy']:8} {r['ppo']:8} {r['gap']:9.2f} {r['time']:10.4f}")

# === STEP 7: Export Summary to CSV ===
csv_path = "/content/fjsp_results_summary.csv"
with open(csv_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["Dataset", "Greedy Makespan", "PPO Makespan", "Gap (%)", "Eval Time (s)"])
    for r in results:
        writer.writerow([r['dataset'], r['greedy'], r['ppo'], f"{r['gap']:.2f}", f"{r['time']:.4f}"])

print(f"\n📁 Results saved to: {csv_path}")
files.download(csv_path)


Saving SELECTED_24_DATASETS.zip to SELECTED_24_DATASETS (1).zip
✅ Extracted SELECTED_24_DATASETS (1).zip into /content/fjsp_dataset

--- 🧘 Training PPO on All Datasets ---


📁 Dataset: dmu20_15.npy
⚙️  Initial Makespan (Greedy): 73.0




🌀 Episode  1/10 | Makespan: 73.0 | Reward: -10073.0
🌀 Episode  2/10 | Makespan: 73.0 | Reward: -10973.0
🌀 Episode  3/10 | Makespan: 73.0 | Reward: -17973.0
🌀 Episode  4/10 | Makespan: 73.0 | Reward: -29473.0
🌀 Episode  5/10 | Makespan: 73.0 | Reward: -27573.0
🌀 Episode  6/10 | Makespan: 73.0 | Reward: -24873.0
🌀 Episode  7/10 | Makespan: 73.0 | Reward: -33973.0
🌀 Episode  8/10 | Makespan: 73.0 | Reward: -26873.0
🌀 Episode  9/10 | Makespan: 73.0 | Reward: -27673.0
🌀 Episode 10/10 | Makespan: 73.0 | Reward: -22973.0
✅ PPO Final Makespan: 24.0
⏱ Avg Evaluation Time: 0.5895 sec
📉 Gap vs Greedy: -67.12%

📁 Dataset: dmu20_20.npy
⚙️  Initial Makespan (Greedy): 76.0




🌀 Episode  1/10 | Makespan: 76.0 | Reward: -17276.0
🌀 Episode  2/10 | Makespan: 76.0 | Reward: -18376.0
🌀 Episode  3/10 | Makespan: 76.0 | Reward: -34176.0
🌀 Episode  4/10 | Makespan: 76.0 | Reward: -51576.0
🌀 Episode  5/10 | Makespan: 76.0 | Reward: -46176.0
🌀 Episode  6/10 | Makespan: 76.0 | Reward: -25876.0
🌀 Episode  7/10 | Makespan: 76.0 | Reward: -33576.0
🌀 Episode  8/10 | Makespan: 76.0 | Reward: -27076.0
🌀 Episode  9/10 | Makespan: 76.0 | Reward: -22276.0
🌀 Episode 10/10 | Makespan: 76.0 | Reward: -46976.0
✅ PPO Final Makespan: 20.0
⏱ Avg Evaluation Time: 0.5146 sec
📉 Gap vs Greedy: -73.68%

📁 Dataset: dmu30_15.npy
⚙️  Initial Makespan (Greedy): 74.0




🌀 Episode  1/10 | Makespan: 74.0 | Reward: -23624.0
🌀 Episode  2/10 | Makespan: 74.0 | Reward: -20724.0
🌀 Episode  3/10 | Makespan: 74.0 | Reward: -55648.0
🌀 Episode  4/10 | Makespan: 74.0 | Reward: -49424.0
🌀 Episode  5/10 | Makespan: 74.0 | Reward: -44324.0
🌀 Episode  6/10 | Makespan: 74.0 | Reward: -21924.0
🌀 Episode  7/10 | Makespan: 74.0 | Reward: -54224.0
🌀 Episode  8/10 | Makespan: 74.0 | Reward: -44824.0
🌀 Episode  9/10 | Makespan: 74.0 | Reward: -36724.0
🌀 Episode 10/10 | Makespan: 74.0 | Reward: -39624.0
✅ PPO Final Makespan: 16.0
⏱ Avg Evaluation Time: 0.5753 sec
📉 Gap vs Greedy: -78.38%

📁 Dataset: dmu30_20.npy
⚙️  Initial Makespan (Greedy): 124.0




🌀 Episode  1/10 | Makespan: 124.0 | Reward: -35424.0
🌀 Episode  2/10 | Makespan: 124.0 | Reward: -40798.0
🌀 Episode  3/10 | Makespan: 124.0 | Reward: -37824.0
🌀 Episode  4/10 | Makespan: 124.0 | Reward: -41293.0
🌀 Episode  5/10 | Makespan: 124.0 | Reward: -40996.0
🌀 Episode  6/10 | Makespan: 108.0 | Reward: -40996.0
🌀 Episode  7/10 | Makespan: 124.0 | Reward: -41491.0
🌀 Episode  8/10 | Makespan: 124.0 | Reward: -41392.0
🌀 Episode  9/10 | Makespan: 124.0 | Reward: -42382.0
🌀 Episode 10/10 | Makespan: 124.0 | Reward: -41194.0
✅ PPO Final Makespan: 20.0
⏱ Avg Evaluation Time: 0.5666 sec
📉 Gap vs Greedy: -83.87%

📁 Dataset: dmu40_15.npy
⚙️  Initial Makespan (Greedy): 92.0




🌀 Episode  1/10 | Makespan: 92.0 | Reward: -28892.0
🌀 Episode  2/10 | Makespan: 92.0 | Reward: -40798.0
🌀 Episode  3/10 | Makespan: 92.0 | Reward: -34592.0
🌀 Episode  4/10 | Makespan: 92.0 | Reward: -40699.0
🌀 Episode  5/10 | Makespan: 92.0 | Reward: -40897.0
🌀 Episode  6/10 | Makespan: 92.0 | Reward: -40592.0
🌀 Episode  7/10 | Makespan: 92.0 | Reward: -40699.0
🌀 Episode  8/10 | Makespan: 92.0 | Reward: -40699.0
🌀 Episode  9/10 | Makespan: 92.0 | Reward: -40897.0
🌀 Episode 10/10 | Makespan: 92.0 | Reward: -41095.0
✅ PPO Final Makespan: 18.0
⏱ Avg Evaluation Time: 0.5858 sec
📉 Gap vs Greedy: -80.43%

📁 Dataset: dmu40_20.npy
⚙️  Initial Makespan (Greedy): 123.0




🌀 Episode  1/10 | Makespan: 122.0 | Reward: -21988.0
🌀 Episode  2/10 | Makespan: 122.0 | Reward: -24661.0
🌀 Episode  3/10 | Makespan: 113.0 | Reward: -23770.0
🌀 Episode  4/10 | Makespan: 122.0 | Reward: -23473.0
🌀 Episode  5/10 | Makespan: 122.0 | Reward: -27928.0
🌀 Episode  6/10 | Makespan: 122.0 | Reward: -29314.0
🌀 Episode  7/10 | Makespan: 122.0 | Reward: -29314.0
🌀 Episode  8/10 | Makespan: 122.0 | Reward: -31195.0
🌀 Episode  9/10 | Makespan: 122.0 | Reward: -32284.0
🌀 Episode 10/10 | Makespan: 122.0 | Reward: -32383.0
✅ PPO Final Makespan: 31.0
⏱ Avg Evaluation Time: 0.5630 sec
📉 Gap vs Greedy: -74.80%

📁 Dataset: dmu50_15.npy
⚙️  Initial Makespan (Greedy): 85.0




🌀 Episode  1/10 | Makespan: 85.0 | Reward: -28126.0
🌀 Episode  2/10 | Makespan: 85.0 | Reward: -27334.0
🌀 Episode  3/10 | Makespan: 85.0 | Reward: -28126.0
🌀 Episode  4/10 | Makespan: 72.0 | Reward: -29314.0
🌀 Episode  5/10 | Makespan: 71.0 | Reward: -29215.0
🌀 Episode  6/10 | Makespan: 70.0 | Reward: -31492.0
🌀 Episode  7/10 | Makespan: 71.0 | Reward: -30700.0
🌀 Episode  8/10 | Makespan: 72.0 | Reward: -31393.0
🌀 Episode  9/10 | Makespan: 72.0 | Reward: -31096.0
🌀 Episode 10/10 | Makespan: 85.0 | Reward: -31591.0
✅ PPO Final Makespan: 15.0
⏱ Avg Evaluation Time: 0.7037 sec
📉 Gap vs Greedy: -82.35%

📁 Dataset: dmu50_20.npy
⚙️  Initial Makespan (Greedy): 136.0




🌀 Episode  1/10 | Makespan: 136.0 | Reward: -9217.0
🌀 Episode  2/10 | Makespan: 134.0 | Reward: -11197.0
🌀 Episode  3/10 | Makespan: 136.0 | Reward: -15553.0
🌀 Episode  4/10 | Makespan: 116.0 | Reward: -17137.0
🌀 Episode  5/10 | Makespan: 136.0 | Reward: -16939.0
🌀 Episode  6/10 | Makespan: 125.0 | Reward: -16246.0
🌀 Episode  7/10 | Makespan: 129.0 | Reward: -14959.0
🌀 Episode  8/10 | Makespan: 136.0 | Reward: -16642.0
🌀 Episode  9/10 | Makespan: 136.0 | Reward: -14365.0
🌀 Episode 10/10 | Makespan: 136.0 | Reward: -15553.0
✅ PPO Final Makespan: 29.0
⏱ Avg Evaluation Time: 0.6142 sec
📉 Gap vs Greedy: -78.68%

📁 Dataset: tai100_20.npy
⚙️  Initial Makespan (Greedy): 302.0




🌀 Episode  1/10 | Makespan: 178.0 | Reward: -1000.0
🌀 Episode  2/10 | Makespan: 194.0 | Reward: -1000.0
🌀 Episode  3/10 | Makespan: 194.0 | Reward: -1198.0
🌀 Episode  4/10 | Makespan: 168.0 | Reward: -1099.0
🌀 Episode  5/10 | Makespan: 202.0 | Reward: -1000.0
🌀 Episode  6/10 | Makespan: 190.0 | Reward: -1000.0
🌀 Episode  7/10 | Makespan: 176.0 | Reward: -1099.0
🌀 Episode  8/10 | Makespan: 202.0 | Reward: -1000.0
🌀 Episode  9/10 | Makespan: 179.0 | Reward: -1297.0
🌀 Episode 10/10 | Makespan: 174.0 | Reward: -1000.0
✅ PPO Final Makespan: 37.0
⏱ Avg Evaluation Time: 1.0198 sec
📉 Gap vs Greedy: -87.75%

📁 Dataset: tai15_15.npy
⚙️  Initial Makespan (Greedy): 64.0




🌀 Episode  1/10 | Makespan: 64.0 | Reward: -16989.0
🌀 Episode  2/10 | Makespan: 64.0 | Reward: -13289.0
🌀 Episode  3/10 | Makespan: 64.0 | Reward: -18289.0
🌀 Episode  4/10 | Makespan: 64.0 | Reward: -13389.0
🌀 Episode  5/10 | Makespan: 64.0 | Reward: -19089.0
🌀 Episode  6/10 | Makespan: 64.0 | Reward: -25389.0
🌀 Episode  7/10 | Makespan: 64.0 | Reward: -22789.0
🌀 Episode  8/10 | Makespan: 64.0 | Reward: -16089.0
🌀 Episode  9/10 | Makespan: 64.0 | Reward: -20689.0
🌀 Episode 10/10 | Makespan: 64.0 | Reward: -22089.0
✅ PPO Final Makespan: 18.0
⏱ Avg Evaluation Time: 0.5422 sec
📉 Gap vs Greedy: -71.88%

📁 Dataset: tai20_15.npy
⚙️  Initial Makespan (Greedy): 58.0




🌀 Episode  1/10 | Makespan: 58.0 | Reward: -22758.0
🌀 Episode  2/10 | Makespan: 58.0 | Reward: -16158.0
🌀 Episode  3/10 | Makespan: 58.0 | Reward: -19358.0
🌀 Episode  4/10 | Makespan: 58.0 | Reward: -16858.0
🌀 Episode  5/10 | Makespan: 58.0 | Reward: -26258.0
🌀 Episode  6/10 | Makespan: 58.0 | Reward: -33658.0
🌀 Episode  7/10 | Makespan: 58.0 | Reward: -26358.0
🌀 Episode  8/10 | Makespan: 58.0 | Reward: -61658.0
🌀 Episode  9/10 | Makespan: 58.0 | Reward: -25158.0
🌀 Episode 10/10 | Makespan: 58.0 | Reward: -40258.0
✅ PPO Final Makespan: 14.0
⏱ Avg Evaluation Time: 0.5392 sec
📉 Gap vs Greedy: -75.86%

📁 Dataset: tai20_20.npy
⚙️  Initial Makespan (Greedy): 88.0




🌀 Episode  1/10 | Makespan: 88.0 | Reward: -11288.0
🌀 Episode  2/10 | Makespan: 88.0 | Reward: -27988.0
🌀 Episode  3/10 | Makespan: 88.0 | Reward: -19788.0
🌀 Episode  4/10 | Makespan: 88.0 | Reward: -34388.0
🌀 Episode  5/10 | Makespan: 88.0 | Reward: -40988.0
🌀 Episode  6/10 | Makespan: 88.0 | Reward: -42188.0
🌀 Episode  7/10 | Makespan: 88.0 | Reward: -35488.0
🌀 Episode  8/10 | Makespan: 88.0 | Reward: -30288.0
🌀 Episode  9/10 | Makespan: 88.0 | Reward: -26488.0
🌀 Episode 10/10 | Makespan: 88.0 | Reward: -47488.0
✅ PPO Final Makespan: 19.0
⏱ Avg Evaluation Time: 0.6023 sec
📉 Gap vs Greedy: -78.41%

📁 Dataset: tai30_15.npy
⚙️  Initial Makespan (Greedy): 82.0




🌀 Episode  1/10 | Makespan: 82.0 | Reward: -34032.0
🌀 Episode  2/10 | Makespan: 82.0 | Reward: -40932.0
🌀 Episode  3/10 | Makespan: 82.0 | Reward: -43732.0
🌀 Episode  4/10 | Makespan: 82.0 | Reward: -51932.0
🌀 Episode  5/10 | Makespan: 82.0 | Reward: -47432.0
🌀 Episode  6/10 | Makespan: 82.0 | Reward: -46432.0
🌀 Episode  7/10 | Makespan: 82.0 | Reward: -55549.0
🌀 Episode  8/10 | Makespan: 82.0 | Reward: -43632.0
🌀 Episode  9/10 | Makespan: 82.0 | Reward: -55549.0
🌀 Episode 10/10 | Makespan: 82.0 | Reward: -48032.0
✅ PPO Final Makespan: 22.0
⏱ Avg Evaluation Time: 0.6974 sec
📉 Gap vs Greedy: -73.17%

📁 Dataset: tai30_20.npy
⚙️  Initial Makespan (Greedy): 138.0




🌀 Episode  1/10 | Makespan: 138.0 | Reward: -24538.0
🌀 Episode  2/10 | Makespan: 138.0 | Reward: -32338.0
🌀 Episode  3/10 | Makespan: 138.0 | Reward: -41095.0
🌀 Episode  4/10 | Makespan: 138.0 | Reward: -41194.0
🌀 Episode  5/10 | Makespan: 138.0 | Reward: -40798.0
🌀 Episode  6/10 | Makespan: 138.0 | Reward: -41293.0
🌀 Episode  7/10 | Makespan: 138.0 | Reward: -41491.0
🌀 Episode  8/10 | Makespan: 138.0 | Reward: -41095.0
🌀 Episode  9/10 | Makespan: 138.0 | Reward: -42382.0
🌀 Episode 10/10 | Makespan: 138.0 | Reward: -41491.0
✅ PPO Final Makespan: 23.0
⏱ Avg Evaluation Time: 0.5991 sec
📉 Gap vs Greedy: -83.33%

📁 Dataset: tai50_15.npy
⚙️  Initial Makespan (Greedy): 95.0




🌀 Episode  1/10 | Makespan: 95.0 | Reward: -26839.0
🌀 Episode  2/10 | Makespan: 93.0 | Reward: -28621.0
🌀 Episode  3/10 | Makespan: 95.0 | Reward: -28621.0
🌀 Episode  4/10 | Makespan: 90.0 | Reward: -29116.0
🌀 Episode  5/10 | Makespan: 93.0 | Reward: -28918.0
🌀 Episode  6/10 | Makespan: 93.0 | Reward: -29116.0
🌀 Episode  7/10 | Makespan: 93.0 | Reward: -32482.0
🌀 Episode  8/10 | Makespan: 90.0 | Reward: -30106.0
🌀 Episode  9/10 | Makespan: 91.0 | Reward: -30997.0
🌀 Episode 10/10 | Makespan: 90.0 | Reward: -33670.0
✅ PPO Final Makespan: 14.0
⏱ Avg Evaluation Time: 0.5976 sec
📉 Gap vs Greedy: -85.26%

📁 Dataset: tai50_20.npy
⚙️  Initial Makespan (Greedy): 184.0




🌀 Episode  1/10 | Makespan: 156.0 | Reward: -10801.0
🌀 Episode  2/10 | Makespan: 168.0 | Reward: -11890.0
🌀 Episode  3/10 | Makespan: 168.0 | Reward: -13078.0
🌀 Episode  4/10 | Makespan: 155.0 | Reward: -14365.0
🌀 Episode  5/10 | Makespan: 168.0 | Reward: -12385.0
🌀 Episode  6/10 | Makespan: 166.0 | Reward: -14464.0
🌀 Episode  7/10 | Makespan: 152.0 | Reward: -14860.0
🌀 Episode  8/10 | Makespan: 165.0 | Reward: -15949.0
🌀 Episode  9/10 | Makespan: 150.0 | Reward: -17137.0
🌀 Episode 10/10 | Makespan: 154.0 | Reward: -14167.0
✅ PPO Final Makespan: 19.0
⏱ Avg Evaluation Time: 0.6041 sec
📉 Gap vs Greedy: -89.67%


=== 📊 Final Summary ===
Dataset                     Greedy      PPO   Gap (%)   Time (s)
------------------------------------------------------------
dmu20_15.npy                  73.0     24.0    -67.12     0.5895
dmu20_20.npy                  76.0     20.0    -73.68     0.5146
dmu30_15.npy                  74.0     16.0    -78.38     0.5753
dmu30_20.npy                 124.0   

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
C2

In [3]:

from google.colab import files
uploaded = files.upload()  # Upload dataset.zip

# Install shimmy to support OpenAI Gym with Stable-Baselines3
!pip install shimmy>=2.0

import os, zipfile, numpy as np, time, csv
import gym
from gym import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import DummyVecEnv

# === STEP 2: Extract ===
zip_path = next(iter(uploaded))
extract_path = "/content/fjsp_dataset"
os.makedirs(extract_path, exist_ok=True)

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)
print(f"✅ Extracted {zip_path} into {extract_path}")

# === STEP 3: FJSSP Env ===
class FJSSPEnv(gym.Env):
    def __init__(self, data):
        super().__init__()
        self.jobs = data['jobs']
        self.num_jobs = len(self.jobs)
        all_machines = set(m for job in self.jobs for op in job for m in op[0])
        self.num_machines = max(all_machines) + 1
        self.state_dim = self.num_jobs * 4 + self.num_machines
        self.action_space = spaces.Discrete(self.num_jobs)
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.state_dim,), dtype=np.float32)
        self.reset()

    def reset(self):
        self.done_jobs = [0] * self.num_jobs
        self.machines_time = [0] * self.num_machines
        self.current_time = 0
        return self._get_obs()

    def _get_obs(self):
        state = []
        for j in range(self.num_jobs):
            op_idx = self.done_jobs[j]
            if op_idx >= len(self.jobs[j]):
                state.extend([j, -1, -1, 0])
                continue
            m_list, p_list = self.jobs[j][op_idx]
            best_m, best_pt, min_ct = -1, -1, float('inf')
            for i, m in enumerate(m_list):
                ct = max(self.current_time, self.machines_time[m]) + p_list[i]
                if ct < min_ct:
                    best_m, best_pt, min_ct = m, p_list[i], ct
            state.extend([j, best_m, best_pt, len(self.jobs[j]) - op_idx])
        state.extend(self.machines_time)
        return np.array(state, dtype=np.float32)

    def step(self, action):
        j = int(action)
        if j < 0 or j >= self.num_jobs or self.done_jobs[j] >= len(self.jobs[j]):
            return self._get_obs(), -100.0, False, {"makespan": max(self.machines_time)}
        op = self.jobs[j][self.done_jobs[j]]
        best_m, best_pt, min_ct = -1, -1, float('inf')
        for i, m in enumerate(op[0]):
            ct = max(self.current_time, self.machines_time[m]) + op[1][i]
            if ct < min_ct:
                best_m, best_pt, min_ct = m, op[1][i], ct
        start = max(self.current_time, self.machines_time[best_m])
        finish = start + best_pt
        self.machines_time[best_m] = finish
        self.done_jobs[j] += 1
        self.current_time = min(self.machines_time)
        done = all(self.done_jobs[j] == len(self.jobs[j]) for j in range(self.num_jobs))
        reward = -1.0
        if done:
            reward += -max(self.machines_time)
        return self._get_obs(), reward, done, {"makespan": max(self.machines_time)}

# === STEP 4: Greedy Baseline ===
def evaluate_greedy(env):
    obs = env.reset()
    done = False
    while not done:
        options = []
        for j in range(env.num_jobs):
            if env.done_jobs[j] < len(env.jobs[j]):
                m_list, p_list = env.jobs[j][env.done_jobs[j]]
                for i, m in enumerate(m_list):
                    ct = max(env.current_time, env.machines_time[m]) + p_list[i]
                    options.append((ct, j))
        if not options:
            break
        _, best_job = min(options)
        obs, _, done, _ = env.step(best_job)
    return max(env.machines_time)

# === STEP 5: Process All Datasets ===
results = []
total_episodes = 10  # 🟢 FAST mode

print("\n--- 🧘 Training PPO on All Datasets ---\n")
for root, _, files_in_dir in os.walk(extract_path):
    for fname in sorted(files_in_dir):
        if not fname.endswith(".npy"):
            continue
        fpath = os.path.join(root, fname)
        print(f"\n📁 Dataset: {fname}")

        try:
            raw = np.load(fpath, allow_pickle=True)
            if raw.ndim != 4 or raw.shape[1] != 2:
                print("⚠️ Skipping: invalid shape")
                continue
            mm, pm = raw[0][0], raw[0][1]
            jobs = [([int(mm[j][o])], [float(pm[j][o])]) for j in range(mm.shape[0]) for o in range(mm.shape[1])]
            job_structure = [[([int(mm[j][o])], [float(pm[j][o])]) for o in range(mm.shape[1])] for j in range(mm.shape[0])]
            data = {'jobs': job_structure}
        except Exception as e:
            print(f"❌ Failed to load: {e}")
            continue

        greedy_env = FJSSPEnv(data)
        greedy_makespan = evaluate_greedy(greedy_env)
        print(f"⚙️  Initial Makespan (Greedy): {greedy_makespan}")

        env = DummyVecEnv([lambda: FJSSPEnv(data)])
        model = PPO("MlpPolicy", env, verbose=0, n_steps=128, batch_size=64)
        for ep in range(total_episodes):
            obs = env.reset()
            done, ep_reward, steps = False, 0, 0
            while not done and steps < 1000:
                action, _ = model.predict(obs, deterministic=False)
                obs, reward, done_vec, info = env.step(action)
                done = done_vec[0]
                ep_reward += reward[0]
                steps += 1
            mk = info[0].get("makespan", 0)
            print(f"🌀 Episode {ep+1:2d}/{total_episodes} | Makespan: {mk} | Reward: {ep_reward:.1f}")
            model.learn(total_timesteps=1000, reset_num_timesteps=False)

        eval_env = FJSSPEnv(data)
        obs = eval_env.reset()
        done, steps = False, 0
        t0 = time.time()
        while not done and steps < 1000:
            action, _ = model.predict(obs, deterministic=True)
            obs, _, done, info = eval_env.step(action)
            steps += 1
        t1 = time.time()
        ppo_makespan = info.get("makespan", 0)
        avg_eval_time = t1 - t0
        gap = 100.0 * (greedy_makespan - ppo_makespan) / greedy_makespan if greedy_makespan > 0 else 0
        print(f"✅ PPO Final Makespan: {ppo_makespan}")
        print(f"⏱ Avg Evaluation Time: {avg_eval_time:.4f} sec")
        print(f"📉 Gap vs Greedy: {gap:.2f}%")

        results.append({
            "dataset": fname,
            "greedy": greedy_makespan,
            "ppo": ppo_makespan,
            "time": avg_eval_time,
            "gap": gap
        })

# === STEP 6: Summary Table ===
print("\n\n=== 📊 Final Summary ===")
print(f"{'Dataset':25} {'Greedy':>8} {'PPO':>8} {'Gap (%)':>9} {'Time (s)':>10}")
print("-" * 60)
for r in results:
    print(f"{r['dataset'][:25]:25} {r['greedy']:8} {r['ppo']:8} {r['gap']:9.2f} {r['time']:10.4f}")

# === STEP 7: Export Summary to CSV ===
csv_path = "/content/fjsp_results_summary.csv"
with open(csv_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["Dataset", "Greedy Makespan", "PPO Makespan", "Gap (%)", "Eval Time (s)"])
    for r in results:
        writer.writerow([r['dataset'], r['greedy'], r['ppo'], f"{r['gap']:.2f}", f"{r['time']:.4f}"])

print(f"\n📁 Results saved to: {csv_path}")
files.download(csv_path)

Saving SELECTED_24_DATASETS.zip to SELECTED_24_DATASETS (1).zip
✅ Extracted SELECTED_24_DATASETS (1).zip into /content/fjsp_dataset

--- 🧘 Training PPO on All Datasets ---


📁 Dataset: dmu20_15.npy
⚙️  Initial Makespan (Greedy): 73.0




🌀 Episode  1/10 | Makespan: 73.0 | Reward: -8973.0
🌀 Episode  2/10 | Makespan: 73.0 | Reward: -19273.0
🌀 Episode  3/10 | Makespan: 73.0 | Reward: -16473.0
🌀 Episode  4/10 | Makespan: 73.0 | Reward: -27073.0
🌀 Episode  5/10 | Makespan: 73.0 | Reward: -14473.0
🌀 Episode  6/10 | Makespan: 73.0 | Reward: -33573.0
🌀 Episode  7/10 | Makespan: 73.0 | Reward: -28273.0
🌀 Episode  8/10 | Makespan: 73.0 | Reward: -21473.0
🌀 Episode  9/10 | Makespan: 73.0 | Reward: -25373.0
🌀 Episode 10/10 | Makespan: 73.0 | Reward: -39773.0
✅ PPO Final Makespan: 28.0
⏱ Avg Evaluation Time: 0.7030 sec
📉 Gap vs Greedy: 61.64%

📁 Dataset: dmu20_20.npy
⚙️  Initial Makespan (Greedy): 76.0




🌀 Episode  1/10 | Makespan: 76.0 | Reward: -15676.0
🌀 Episode  2/10 | Makespan: 76.0 | Reward: -32076.0
🌀 Episode  3/10 | Makespan: 76.0 | Reward: -24276.0
🌀 Episode  4/10 | Makespan: 76.0 | Reward: -30476.0
🌀 Episode  5/10 | Makespan: 76.0 | Reward: -32776.0
🌀 Episode  6/10 | Makespan: 76.0 | Reward: -38576.0
🌀 Episode  7/10 | Makespan: 76.0 | Reward: -31876.0
🌀 Episode  8/10 | Makespan: 76.0 | Reward: -60796.0
🌀 Episode  9/10 | Makespan: 76.0 | Reward: -35576.0
🌀 Episode 10/10 | Makespan: 76.0 | Reward: -30376.0
✅ PPO Final Makespan: 22.0
⏱ Avg Evaluation Time: 0.5123 sec
📉 Gap vs Greedy: 71.05%

📁 Dataset: dmu30_15.npy
⚙️  Initial Makespan (Greedy): 74.0
🌀 Episode  1/10 | Makespan: 74.0 | Reward: -18924.0
🌀 Episode  2/10 | Makespan: 74.0 | Reward: -42524.0
🌀 Episode  3/10 | Makespan: 74.0 | Reward: -34824.0
🌀 Episode  4/10 | Makespan: 74.0 | Reward: -33124.0
🌀 Episode  5/10 | Makespan: 74.0 | Reward: -37724.0
🌀 Episode  6/10 | Makespan: 74.0 | Reward: -55648.0
🌀 Episode  7/10 | Make

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>