In [1]:
import os, random, math, time, warnings
warnings.filterwarnings("ignore")
import numpy as np
import torch, torch.nn as nn
from torch.utils.data import DataLoader, Subset
from torchvision import transforms
import gymnasium as gym
from gymnasium import spaces
import pennylane as qml
from pennylane import numpy as pnp
from stable_baselines3 import PPO
from medmnist import ChestMNIST

In [2]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
SEED   = 42
torch.manual_seed(SEED)  
np.random.seed(SEED)
random.seed(SEED)

In [3]:
BATCH_SZ = 64
IMG_SIZE  = 32   
NUM_CLASSES = 3

In [4]:
transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
])

In [5]:
train = ChestMNIST(split="train", download=True, as_rgb=True, transform=transform)
test  = ChestMNIST(split="test", download=True, as_rgb=True, transform=transform)

In [6]:
#Apenas 3 classes classificadas nesse momento
train_idx = [i for i,(x,y) in enumerate(train) if y[0] < NUM_CLASSES]
test_idx  = [i for i,(x,y) in enumerate(test)  if y[0] < NUM_CLASSES]

In [7]:
train_loader = DataLoader(Subset(train, train_idx), batch_size=BATCH_SZ, shuffle=True)
test_loader  = DataLoader(Subset(test,  test_idx),  batch_size=BATCH_SZ, shuffle=False)

In [8]:
# ---------- 3. Hiper‑parâmetros de RL / PQC ----------
N_QUBITS   = 4
MAX_DEPTH  = 8          # nº máximo de gates que o agente pode adicionar
ACTION_SET = (['rx','ry','rz'] + ['cnot'])   # gates disponíveis
N_ACTIONS  = len(ACTION_SET) * N_QUBITS

dev = qml.device("default.qubit", wires=N_QUBITS)

In [None]:
class CircuitBuilderEnv(gym.Env):
    """
    State: Size Vector MAX_DEPTH*2
        [gate_id_norm, wire_norm] each position(0 if None)
    Action: int 0..N_ACTIONS-1 -> gate_id, wire_id
    Reward: (0..1) in mini-batch using hibrid model
    """
    def __init__(self):
        super().__init__()
        self.action_space = spaces.Discrete(N_ACTIONS)
        self.observation_space = spaces.Box(low=0, high=1, shape=(MAX_DEPTH*2,), dtype=np.float32)
        #self.reset()
        self.val_iter = iter(train_loader)

    #--- building circuit --
    def _add_gate(self, gate, wire):
        self.circuit.append((gate, wire))
    
    def _encode_state(self):
        vec = np.zeros((MAX_DEPTH*2,), dtype=np.float32)
        print(f"vec _encode_state {vec}")
        for i, (g,w) in enumerate(self.circuit):
            g_id = ACTION_SET.index(g)
            vec[i*2]   = g_id / len(ACTION_SET)
            vec[i*2+1] = w / N_QUBITS
        print(f"vec _encode_state end {vec}")
        return vec
    
    #--QNode template (recompile each time reset) --
    def _build_qnode(self):
        def pqc(inputs, weights):
            import torch
            q_weights = weights.reshape(max_layers, n_qubits)
            for q in range(N_QUBITS):
                qml.RY(inputs[q], wires=q) # sample embedding

            # gates generated by agent
            for idx, (g,w) in enumerate(self.circuit):
                if g == 'rx':
                    qml.RX(q_weights[idx], wires=w)
                elif g == 'ry':
                    qml.RY(q_weights[idx], wires=w)
                elif g == 'rz':
                    qml.RZ(q_weights[idx], wires=w)
                elif g == 'cnot':
                    qml.CNOT(wires=[w, (w+1)%N_QUBITS])
            print(f"circuit: {self.circuit}")
            return [qml.expval(qml.PauliZ(w)) for w in range(N_QUBITS)]
        return qml.QNode(pqc, dev, interface="torch")
   

In [None]:
#-- Train RL agent --
def train_agent():
    env = CircuitBuilderEnv()

    model_rl = PPO("MlpPolicy", env, 
                    learning_rate=3e-4,n_steps=512,
                    batch_size=64,gamma=0.95,
                    verbose=1,seed=SEED)
    
    print("Training RL agent...")
    model_rl.learn(total_timesteps=10000)
    model_rl.save("rl_agent")
    print("RL agent trained!")
    return model_rl

#-- Test RL agent --
def train_final_agent():
    best_env = train_agent()
    best_circuit = best_env.circuit

    final_model = best_env._build_model().to(DEVICE)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(final_model.parameters(), lr=1e-3)

    EPOCHS_SUP = 3 #upgrade to production
    for epoch in range(EPOCHS_SUP):
        final_model.train()
        for image, labels in train_loader:
            image = image.to(DEVICE)
            labels = labels.squeeze().long().to(DEVICE)

            optimizer.zero_grad()
            logits = final_model(image)
            loss = criterion(logits, labels)
            loss.backward()
            optimizer.step()
        #--Sample evaluation--
        final_model.eval()
        correct = total = 0
        with torch.no_grad():
            for image, labels in test_loader:
                image = image.to(DEVICE)
                labels = labels.squeeze().long().to(DEVICE)
                logits = final_model(image)
                preds = logits.argmax(dim=1)
                correct += (preds == labels).sum().item()
                total += labels.size(0)
        acc = correct / total
        print(f"Epoch {epoch+1}/{EPOCHS_SUP}, Test Accuracy: {acc:.4f}")
    print("Final model trained!")


if __name__ == "__main__":
    #-- Train RL agent --
    train_agent()

    #-- Train final model --
    train_final_agent()
