<a href="https://colab.research.google.com/github/ruth22soft/RL_model_compression/blob/main/Cartpole_model_compression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install gym torch numpy matplotlib




In [None]:
pip install gymnasium




In [None]:
import gymnasium as gym
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.utils.prune as prune
import random
import numpy as np
from collections import deque
import time
import sys
import os

# --- DQN Model ---
class DQN(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_dim, 128)
        self.fc2 = nn.Linear(128, 128)
        self.out = nn.Linear(128, action_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.out(x)

# --- Replay Buffer ---
class ReplayBuffer:
    def __init__(self, capacity):
        self.buffer = deque(maxlen=capacity)

    def push(self, state, action, reward, next_state, done):
        self.buffer.append((state, action, reward, next_state, done))

    def sample(self, batch_size):
        batch = random.sample(self.buffer, batch_size)
        states, actions, rewards, next_states, dones = zip(*batch)
        return np.array(states), actions, rewards, np.array(next_states), dones

    def __len__(self):
        return len(self.buffer)

# --- Training ---
def train_dqn(env, episodes=300):
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.n
    model = DQN(state_dim, action_dim)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.MSELoss()
    buffer = ReplayBuffer(10000)
    gamma = 0.99
    batch_size = 64
    epsilon = 1.0
    epsilon_decay = 0.995
    epsilon_min = 0.01

    rewards_history = []

    for ep in range(episodes):
        state, _ = env.reset()
        done = False
        total_reward = 0

        while not done:
            # --- Epsilon-greedy ---
            if random.random() < epsilon:
                action = env.action_space.sample()
            else:
                with torch.no_grad():
                    # Fix: Add unsqueeze(0) to add a batch dimension
                    action = torch.argmax(model(torch.tensor(state, dtype=torch.float32).unsqueeze(0))).item()

            next_state, reward, terminated, truncated, _ = env.step(action)
            done = terminated or truncated
            buffer.push(state, action, reward, next_state, done)
            state = next_state
            total_reward += reward

            # --- Update DQN ---
            if len(buffer) > batch_size:
                states, actions, rewards, next_states, dones = buffer.sample(batch_size)
                states = torch.tensor(states, dtype=torch.float32)
                next_states = torch.tensor(next_states, dtype=torch.float32)
                actions = torch.tensor(actions)
                rewards = torch.tensor(rewards, dtype=torch.float32)
                dones = torch.tensor(dones, dtype=torch.float32)

                q_values = model(states).gather(1, actions.unsqueeze(1)).squeeze()
                next_q_values = model(next_states).max(1)[0]
                target = rewards + gamma * next_q_values * (1 - dones)

                loss = criterion(q_values, target)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

        rewards_history.append(total_reward)
        epsilon = max(epsilon * epsilon_decay, epsilon_min)

        if (ep+1) % 20 == 0:
            print(f"Episode {ep+1}, Reward: {total_reward}, Epsilon: {epsilon:.2f}")

    return model, rewards_history

# --- Apply Pruning ---
def apply_pruning(model, amount=0.3):
    for layer in [model.fc1, model.fc2, model.out]:
        prune.l1_unstructured(layer, name="weight", amount=amount)
        prune.remove(layer, 'weight')  # make pruning permanent

# --- Evaluate Model ---
def evaluate_model(env, model, episodes=50):
    device = torch.device("cpu")
    model.to(device)
    model.eval()

    total_rewards = []
    inference_times = []

    for _ in range(episodes):
        state, _ = env.reset()
        done = False
        ep_reward = 0

        while not done:
            start = time.time()
            with torch.no_grad():
                # Fix: Add unsqueeze(0) to add a batch dimension
                action = torch.argmax(model(torch.tensor(state, dtype=torch.float32).to(device).unsqueeze(0))).item()
            end = time.time()
            inference_times.append(end - start)

            next_state, reward, terminated, truncated, _ = env.step(action)
            done = terminated or truncated
            state = next_state
            ep_reward += reward

        total_rewards.append(ep_reward)

    avg_reward = np.mean(total_rewards)
    avg_inference_time = np.mean(inference_times)

    # Model size in MB
    model_file = "temp_model.pth"
    # Ensure model is on CPU before saving state_dict for size calculation
    model_cpu = model.to("cpu")
    torch.save(model_cpu.state_dict(), model_file)
    model_size = os.path.getsize(model_file) / (1024 * 1024)
    os.remove(model_file)

    return avg_reward, avg_inference_time, model_size

# --- Main ---
env = gym.make("CartPole-v1")

# 1️⃣ Train baseline model
baseline_model, rewards = train_dqn(env)
baseline_avg_reward, baseline_inference_time, baseline_size_mb = evaluate_model(env, baseline_model)
print(f"\n--- Baseline ---\nAverage Reward: {baseline_avg_reward:.2f}\nInference Time per Step: {baseline_inference_time*1000:.4f} ms\nModel Size: {baseline_size_mb:.4f} MB")

# 2️⃣ Apply pruning
apply_pruning(baseline_model, amount=0.3)

# 3️⃣ Apply dynamic quantization
quantized_model = torch.quantization.quantize_dynamic(
    baseline_model, {nn.Linear}, dtype=torch.qint8
)

# 4️⃣ Evaluate quantized + pruned model
quant_avg_reward, quant_inference_time, quant_size_mb = evaluate_model(env, quantized_model)
print(f"\n--- Pruned + Quantized ---\nAverage Reward: {quant_avg_reward:.2f}\nInference Time per Step: {quant_inference_time*1000:.4f} ms\nModel Size: {quant_size_mb:.4f} MB")

Episode 20, Reward: 17.0, Epsilon: 0.90
Episode 40, Reward: 56.0, Epsilon: 0.82
Episode 60, Reward: 45.0, Epsilon: 0.74
Episode 80, Reward: 20.0, Epsilon: 0.67
Episode 100, Reward: 78.0, Epsilon: 0.61
Episode 120, Reward: 63.0, Epsilon: 0.55
Episode 140, Reward: 84.0, Epsilon: 0.50
Episode 160, Reward: 47.0, Epsilon: 0.45
Episode 180, Reward: 251.0, Epsilon: 0.41
Episode 200, Reward: 81.0, Epsilon: 0.37
Episode 220, Reward: 81.0, Epsilon: 0.33
Episode 240, Reward: 91.0, Epsilon: 0.30
Episode 260, Reward: 129.0, Epsilon: 0.27
Episode 280, Reward: 70.0, Epsilon: 0.25
Episode 300, Reward: 310.0, Epsilon: 0.22

--- Baseline ---
Average Reward: 382.82
Inference Time per Step: 0.1290 ms
Model Size: 0.0693 MB


For migrations of users: 
1. Eager mode quantization (torch.ao.quantization.quantize, torch.ao.quantization.quantize_dynamic), please migrate to use torchao eager mode quantize_ API instead 
2. FX graph mode quantization (torch.ao.quantization.quantize_fx.prepare_fx,torch.ao.quantization.quantize_fx.convert_fx, please migrate to use torchao pt2e quantization API instead (prepare_pt2e, convert_pt2e) 
3. pt2e quantization has been migrated to torchao (https://github.com/pytorch/ao/tree/main/torchao/quantization/pt2e) 
see https://github.com/pytorch/ao/issues/2259 for more details
  quantized_model = torch.quantization.quantize_dynamic(
For migrations of users: 
1. Eager mode quantization (torch.ao.quantization.quantize, torch.ao.quantization.quantize_dynamic), please migrate to use torchao eager mode quantize_ API instead 
2. FX graph mode quantization (torch.ao.quantization.quantize_fx.prepare_fx,torch.ao.quantization.quantize_fx.convert_fx, please migrate to use torchao pt2e quantizati


--- Pruned + Quantized ---
Average Reward: 18.96
Inference Time per Step: 0.2937 ms
Model Size: 0.0221 MB


In [None]:
# Install necessary dependencies for Box2D environments
!pip install swig
!pip install "gymnasium[box2d]"

import gymnasium as gym
import torch
import torch.nn as nn
import torch.optim as optim
import random
import numpy as np
from collections import deque
import time
import os

# --- DQN Model ---
class DQN(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_dim, 256)
        self.fc2 = nn.Linear(256, 256)
        self.out = nn.Linear(256, action_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.out(x)

# --- Replay Buffer ---
class ReplayBuffer:
    def __init__(self, capacity):
        self.buffer = deque(maxlen=capacity)

    def push(self, state, action, reward, next_state, done):
        self.buffer.append((state, action, reward, next_state, done))

    def sample(self, batch_size):
        batch = random.sample(self.buffer, batch_size)
        states, actions, rewards, next_states, dones = zip(*batch)
        return np.array(states), actions, rewards, np.array(next_states), dones

    def __len__(self):
        return len(self.buffer)

# --- Training Function ---
def train_dqn(env, episodes=300):
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.n
    model = DQN(state_dim, action_dim)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.MSELoss()
    buffer = ReplayBuffer(10000)
    gamma = 0.99
    batch_size = 64
    epsilon = 1.0
    epsilon_decay = 0.995
    epsilon_min = 0.01

    rewards_history = []

    for ep in range(episodes):
        state, _ = env.reset()
        done = False
        total_reward = 0

        while not done:
            if random.random() < epsilon:
                action = env.action_space.sample()
            else:
                with torch.no_grad():
                    action = torch.argmax(model(torch.tensor(state, dtype=torch.float32))).item()

            next_state, reward, terminated, truncated, _ = env.step(action)
            done = terminated or truncated
            buffer.push(state, action, reward, next_state, done)
            state = next_state
            total_reward += reward

            if len(buffer) > batch_size:
                states, actions, rewards, next_states, dones = buffer.sample(batch_size)
                states = torch.tensor(states, dtype=torch.float32)
                next_states = torch.tensor(next_states, dtype=torch.float32)
                actions = torch.tensor(actions)
                rewards = torch.tensor(rewards, dtype=torch.float32)
                dones = torch.tensor(dones, dtype=torch.float32)

                q_values = model(states).gather(1, actions.unsqueeze(1)).squeeze()
                next_q_values = model(next_states).max(1)[0]
                target = rewards + gamma * next_q_values * (1 - dones)

                loss = criterion(q_values, target)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

        rewards_history.append(total_reward)
        epsilon = max(epsilon * epsilon_decay, epsilon_min)
        if (ep+1) % 20 == 0:
            print(f"Episode {ep+1}, Reward: {total_reward}, Epsilon: {epsilon:.2f}")

    return model, rewards_history

# --- Evaluation: model size, inference time, average reward ---
def evaluate_model(env, model, episodes=50):
    device = torch.device("cpu")
    model.to(device)
    model.eval()

    total_rewards = []
    inference_times = []

    for ep in range(episodes):
        state, _ = env.reset()
        done = False
        ep_reward = 0

        while not done:
            start = time.time()
            with torch.no_grad():
                action = torch.argmax(model(torch.tensor(state, dtype=torch.float32).to(device))).item()
            end = time.time()
            inference_times.append(end - start)

            next_state, reward, terminated, truncated, _ = env.step(action)
            done = terminated or truncated
            state = next_state
            ep_reward += reward

        total_rewards.append(ep_reward)

    avg_reward = np.mean(total_rewards)
    avg_inference_time = np.mean(inference_times)

    # Model size in MB
    model_file = "temp_model.pth"
    torch.save(model.state_dict(), model_file)
    model_size = os.path.getsize(model_file) / (1024 * 1024)
    os.remove(model_file)

    return avg_reward, avg_inference_time, model_size

# --- Main ---
if __name__ == "__main__":
    env = gym.make("LunarLander-v3")
    model, rewards_history = train_dqn(env, episodes=300)

    avg_reward, avg_inference_time, model_size = evaluate_model(env, model, episodes=50)
    print(f"\n--- Baseline Evaluation ---")
    print(f"Average Reward: {avg_reward:.2f}")
    print(f"Average Inference Time per Step: {avg_inference_time*1000:.4f} ms")
    print(f"Model Size: {model_size:.4f} MB")

Collecting swig
  Using cached swig-4.4.0-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (3.5 kB)
Using cached swig-4.4.0-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.9 MB)
Installing collected packages: swig
Successfully installed swig-4.4.0
Collecting box2d-py==2.3.5 (from gymnasium[box2d])
  Using cached box2d-py-2.3.5.tar.gz (374 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: box2d-py
  Building wheel for box2d-py (setup.py) ... [?25l[?25hdone
  Created wheel for box2d-py: filename=box2d_py-2.3.5-cp312-cp312-linux_x86_64.whl size=2399003 sha256=59510676bd7f82661f818d983dddc36f8257ccec41185c697b72c9fcc4e6e1ee
  Stored in directory: /root/.cache/pip/wheels/2a/e9/60/774da0bcd07f7dc7761a8590fa2d065e4069568e78dcdc3318
Successfully built box2d-py
Installing collected packages: box2d-py
Successfully installed box2d-py-2.3.5


  from pkg_resources import resource_stream, resource_exists
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
  return datetime.utcnow().replace(tzinfo=utc)


Episode 20, Reward: -146.82355593685656, Epsilon: 0.90
Episode 40, Reward: -90.54508252688015, Epsilon: 0.82
Episode 60, Reward: -26.813587929539338, Epsilon: 0.74
Episode 80, Reward: -3.8993270585102664, Epsilon: 0.67
Episode 100, Reward: -53.76979022028717, Epsilon: 0.61
Episode 120, Reward: -90.24706358136815, Epsilon: 0.55
Episode 140, Reward: -20.02237741367246, Epsilon: 0.50
Episode 160, Reward: -16.89932916219827, Epsilon: 0.45
Episode 180, Reward: -58.62099605737181, Epsilon: 0.41
Episode 200, Reward: -127.75485764092473, Epsilon: 0.37
Episode 220, Reward: 12.53084152829546, Epsilon: 0.33
Episode 240, Reward: 33.27659252822602, Epsilon: 0.30
Episode 260, Reward: -39.94565030067594, Epsilon: 0.27
Episode 280, Reward: -115.63799953769578, Epsilon: 0.25
Episode 300, Reward: -69.78464827939963, Epsilon: 0.22

--- Baseline Evaluation ---
Average Reward: -52.75
Average Inference Time per Step: 0.1354 ms
Model Size: 0.2666 MB


In [None]:
import gymnasium as gym
import torch
import torch.nn as nn
import torch.optim as optim
import random
import numpy as np
from collections import deque
import time
import os
import torch.nn.utils.prune as prune
import copy

# --- DQN Model ---
class DQN(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_dim, 256)
        self.fc2 = nn.Linear(256, 256)
        self.out = nn.Linear(256, action_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.out(x)

# --- Replay Buffer ---
class ReplayBuffer:
    def __init__(self, capacity):
        self.buffer = deque(maxlen=capacity)

    def push(self, state, action, reward, next_state, done):
        self.buffer.append((state, action, reward, next_state, done))

    def sample(self, batch_size):
        batch = random.sample(self.buffer, batch_size)
        states, actions, rewards, next_states, dones = zip(*batch)
        return np.array(states), actions, rewards, np.array(next_states), dones

    def __len__(self):
        return len(self.buffer)

# --- Training Function ---
def train_dqn(env, episodes=300):
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.n
    model = DQN(state_dim, action_dim)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.MSELoss()
    buffer = ReplayBuffer(10000)
    gamma = 0.99
    batch_size = 64
    epsilon = 1.0
    epsilon_decay = 0.995
    epsilon_min = 0.01

    rewards_history = []

    for ep in range(episodes):
        state, _ = env.reset()
        done = False
        total_reward = 0

        while not done:
            if random.random() < epsilon:
                action = env.action_space.sample()
            else:
                with torch.no_grad():
                    action = torch.argmax(model(torch.tensor(state, dtype=torch.float32).unsqueeze(0))).item()

            next_state, reward, terminated, truncated, _ = env.step(action)
            done = terminated or truncated
            buffer.push(state, action, reward, next_state, done)
            state = next_state
            total_reward += reward

            if len(buffer) > batch_size:
                states, actions, rewards, next_states, dones = buffer.sample(batch_size)
                states = torch.tensor(states, dtype=torch.float32)
                next_states = torch.tensor(next_states, dtype=torch.float32)
                actions = torch.tensor(actions)
                rewards = torch.tensor(rewards, dtype=torch.float32)
                dones = torch.tensor(dones, dtype=torch.float32)

                q_values = model(states).gather(1, actions.unsqueeze(1)).squeeze()
                next_q_values = model(next_states).max(1)[0]
                target = rewards + gamma * next_q_values * (1 - dones)

                loss = criterion(q_values, target)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

        rewards_history.append(total_reward)
        epsilon = max(epsilon * epsilon_decay, epsilon_min)
        if (ep+1) % 20 == 0:
            print(f"Episode {ep+1}, Reward: {total_reward:.2f}, Epsilon: {epsilon:.2f}")

    return model, rewards_history

# --- Evaluation: model size, inference time, average reward, and accuracy ---
def evaluate_model(env, model, episodes=50, success_threshold=200):
    device = torch.device("cpu")
    model.to(device)
    model.eval()

    total_rewards = []
    inference_times = []
    successful_episodes = 0

    for ep in range(episodes):
        state, _ = env.reset()
        done = False
        ep_reward = 0

        while not done:
            start = time.time()
            with torch.no_grad():
                action = torch.argmax(model(torch.tensor(state, dtype=torch.float32).to(device).unsqueeze(0))).item()
            end = time.time()
            inference_times.append(end - start)

            next_state, reward, terminated, truncated, _ = env.step(action)
            done = terminated or truncated
            state = next_state
            ep_reward += reward

        total_rewards.append(ep_reward)
        if ep_reward >= success_threshold:
            successful_episodes += 1

    avg_reward = np.mean(total_rewards)
    avg_inference_time = np.mean(inference_times)
    accuracy = (successful_episodes / episodes) * 100 if episodes > 0 else 0

    # Model size in MB
    model_file = "temp_model.pth"
    torch.save(model.state_dict(), model_file)
    model_size = os.path.getsize(model_file) / (1024 * 1024)
    os.remove(model_file)

    return avg_reward, avg_inference_time, model_size, accuracy

# --- Compression Techniques ---
def apply_pruning(model, amount=0.3):
    """
    Prune 'amount' of weights in each linear layer (unstructured pruning)
    """
    for name, module in model.named_modules():
        if isinstance(module, nn.Linear):
            prune.l1_unstructured(module, name='weight', amount=amount)
            prune.remove(module, 'weight') # Make pruning permanent
    return model

def apply_quantization(model):
    """
    Convert model to 8-bit integers (dynamic quantization)
    """
    # Ensure model is in eval mode before quantization
    model.eval()
    model_quantized = torch.quantization.quantize_dynamic(
        model, {nn.Linear}, dtype=torch.qint8
    )
    return model_quantized

# --- Main ---
if __name__ == "__main__":
    env = gym.make("LunarLander-v3")

    print("\n--- Training Base Model ---")
    base_model, rewards_history = train_dqn(env, episodes=300)

    print("\n--- Evaluating Base Model ---")
    base_reward, base_time, base_size, base_accuracy = evaluate_model(env, base_model, episodes=50)
    print(f"Base Model -> Reward: {base_reward:.2f}, Accuracy: {base_accuracy:.2f}%, Inference: {base_time*1000:.4f} ms, Size: {base_size:.4f} MB")

    # --- Apply Pruning ---
    print("\n--- Applying Pruning (30%) ---")
    # Create a deep copy of the base model before pruning to keep the original for other operations
    pruning_model = copy.deepcopy(base_model)
    pruned_model = apply_pruning(pruning_model, amount=0.3)
    pruned_reward, pruned_time, pruned_size, pruned_accuracy = evaluate_model(env, pruned_model, episodes=50)
    print(f"Pruned Model -> Reward: {pruned_reward:.2f}, Accuracy: {pruned_accuracy:.2f}%, Inference: {pruned_time*1000:.4f} ms, Size: {pruned_size:.4f} MB")

    # --- Apply Quantization ---
    print("\n--- Applying Quantization ---")
    # Create a deep copy of the base model before quantization
    quantization_model = copy.deepcopy(base_model)
    quantized_model = apply_quantization(quantization_model)
    quant_reward, quant_time, quant_size, quant_accuracy = evaluate_model(env, quantized_model, episodes=50)
    print(f"Quantized Model -> Reward: {quant_reward:.2f}, Accuracy: {quant_accuracy:.2f}%, Inference: {quant_time*1000:.4f} ms, Size: {quant_size:.4f} MB")

    # --- Apply Pruning + Quantization ---
    print("\n--- Applying Pruning + Quantization ---")
    # Create a deep copy of the base model, then prune it, then quantize the pruned model
    pruned_quant_base = copy.deepcopy(base_model)
    pruned_then_quant_model = apply_pruning(pruned_quant_base, amount=0.3)
    pruned_then_quant_model = apply_quantization(pruned_then_quant_model)
    pq_reward, pq_time, pq_size, pq_accuracy = evaluate_model(env, pruned_then_quant_model, episodes=50)
    print(f"Pruned+Quantized Model -> Reward: {pq_reward:.2f}, Accuracy: {pq_accuracy:.2f}%, Inference: {pq_time*1000:.4f} ms, Size: {pq_size:.4f} MB")


--- Training Base Model ---
Episode 20, Reward: -121.42, Epsilon: 0.90
Episode 40, Reward: -175.49, Epsilon: 0.82
Episode 60, Reward: -98.90, Epsilon: 0.74
Episode 80, Reward: -80.10, Epsilon: 0.67
Episode 100, Reward: -96.96, Epsilon: 0.61
Episode 120, Reward: -55.43, Epsilon: 0.55
Episode 140, Reward: -45.16, Epsilon: 0.50
Episode 160, Reward: -72.01, Epsilon: 0.45
Episode 180, Reward: -42.45, Epsilon: 0.41
Episode 200, Reward: -25.54, Epsilon: 0.37
Episode 220, Reward: 33.45, Epsilon: 0.33
Episode 240, Reward: 1.91, Epsilon: 0.30
Episode 260, Reward: 170.40, Epsilon: 0.27
Episode 280, Reward: -7.42, Epsilon: 0.25
Episode 300, Reward: -7.36, Epsilon: 0.22

--- Evaluating Base Model ---
Base Model -> Reward: 100.33, Accuracy: 42.00%, Inference: 0.1491 ms, Size: 0.2666 MB

--- Applying Pruning (30%) ---
Pruned Model -> Reward: 67.94, Accuracy: 42.00%, Inference: 0.1287 ms, Size: 0.2666 MB

--- Applying Quantization ---


For migrations of users: 
1. Eager mode quantization (torch.ao.quantization.quantize, torch.ao.quantization.quantize_dynamic), please migrate to use torchao eager mode quantize_ API instead 
2. FX graph mode quantization (torch.ao.quantization.quantize_fx.prepare_fx,torch.ao.quantization.quantize_fx.convert_fx, please migrate to use torchao pt2e quantization API instead (prepare_pt2e, convert_pt2e) 
3. pt2e quantization has been migrated to torchao (https://github.com/pytorch/ao/tree/main/torchao/quantization/pt2e) 
see https://github.com/pytorch/ao/issues/2259 for more details
  model_quantized = torch.quantization.quantize_dynamic(
For migrations of users: 
1. Eager mode quantization (torch.ao.quantization.quantize, torch.ao.quantization.quantize_dynamic), please migrate to use torchao eager mode quantize_ API instead 
2. FX graph mode quantization (torch.ao.quantization.quantize_fx.prepare_fx,torch.ao.quantization.quantize_fx.convert_fx, please migrate to use torchao pt2e quantizati

Quantized Model -> Reward: 109.70, Accuracy: 50.00%, Inference: 0.3304 ms, Size: 0.0721 MB

--- Applying Pruning + Quantization ---
Pruned+Quantized Model -> Reward: 25.41, Accuracy: 28.00%, Inference: 0.3108 ms, Size: 0.0721 MB
