In [1]:
import os, random, math, time, warnings
warnings.filterwarnings("ignore")
import numpy as np
import torch, torch.nn as nn
from torch.utils.data import DataLoader, Subset
from torchvision import transforms
import gymnasium as gym
from gymnasium import spaces
import pennylane as qml
from pennylane import numpy as pnp
from stable_baselines3 import PPO
from medmnist import ChestMNIST

In [2]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
SEED   = 42
torch.manual_seed(SEED)  
np.random.seed(SEED)
random.seed(SEED)

In [3]:
BATCH_SZ = 64
IMG_SIZE  = 32   
NUM_CLASSES = 3

In [4]:
transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
])

In [5]:
train = ChestMNIST(split="train", download=True, as_rgb=True, transform=transform)
test  = ChestMNIST(split="test", download=True, as_rgb=True, transform=transform)

In [6]:
#Apenas 3 classes classificadas nesse momento
train_idx = [i for i,(x,y) in enumerate(train) if y[0] < NUM_CLASSES]
test_idx  = [i for i,(x,y) in enumerate(test)  if y[0] < NUM_CLASSES]

In [7]:
train_loader = DataLoader(Subset(train, train_idx), batch_size=BATCH_SZ, shuffle=True)
test_loader  = DataLoader(Subset(test,  test_idx),  batch_size=BATCH_SZ, shuffle=False)

In [8]:
# ---------- 3. Hiper‑parâmetros de RL / PQC ----------
N_QUBITS   = 4
MAX_DEPTH  = 8          # nº máximo de gates que o agente pode adicionar
ACTION_SET = (['rx','ry','rz'] + ['cnot'])   # gates disponíveis
N_ACTIONS  = len(ACTION_SET) * N_QUBITS

dev = qml.device("default.qubit", wires=N_QUBITS)

In [None]:
class CircuitBuilderEnv(gym.Env):
    """
    State: Size Vector MAX_DEPTH*2
        [gate_id_norm, wire_norm] each position(0 if None)
    Action: int 0..N_ACTIONS-1 -> gate_id, wire_id
    Reward: (0..1) in mini-batch using hibrid model
    """
    def __init__(self):
        super().__init__()
        self.action_space = spaces.Discrete(N_ACTIONS)
        self.observation_space = spaces.Box(low=0, high=1, shape=(MAX_DEPTH*2,), dtype=np.float32)
        #self.reset()
        self.val_iter = iter(train_loader)

    #--- building circuit --
    def _add_gate(self, gate, wire):
        self.circuit.append((gate, wire))
    
    def _encode_state(self):
        vec = np.zeros((MAX_DEPTH*2,), dtype=np.float32)
        print(f"vec _encode_state {vec}")
        for i, (g,w) in enumerate(self.circuit):
            g_id = ACTION_SET.index(g)
            vec[i*2]   = g_id / len(ACTION_SET)
            vec[i*2+1] = w / N_QUBITS
        print(f"vec _encode_state end {vec}")
        return vec
    
    #--QNode template (recompile each time reset) --
    def _build_qnode(self):
        def pqc(inputs, weights):
            import torch
            weights = torch.atleast_1d(weights) 
            for q in range(N_QUBITS):
                qml.RY(inputs[q], wires=q) # sample embedding

            # gates generated by agent
            for idx, (g,w) in enumerate(self.circuit):
                if g == 'rx':
                    qml.RX(weights[idx], wires=w)
                elif g == 'ry':
                    qml.RY(weights[idx], wires=w)
                elif g == 'rz':
                    qml.RZ(weights[idx], wires=w)
                elif g == 'cnot':
                    qml.CNOT(wires=[w, (w+1)%N_QUBITS])
            print(f"circuit: {self.circuit}")
            return [qml.expval(qml.PauliZ(w)) for w in range(N_QUBITS)]
        return qml.QNode(pqc, dev, interface="torch")

    #--CNN-QNN hibrid (recreated each time change circuit) --
    def _build_model(self):
        qnode = self._build_qnode()
        print(f"qnode: {qnode}")
        weights_shapes = {"weights":len(self.circuit)}
        q_layer = qml.qnn.TorchLayer(qnode,weight_shapes=weights_shapes).to(DEVICE)
        class HybridNet(nn.Module):
            def __init__(self, q_layer):
                super().__init__()
                self.conv = nn.Sequential(
                    nn.Conv2d(3,8,kernel_size=3,stride=1,padding=1),
                    nn.ReLU(),
                    nn.Conv2d(8,16,kernel_size=3,stride=1,padding=1),
                    nn.ReLU(),
                    nn.MaxPool2d(2),
                )
                flat = 16 * (IMG_SIZE//4) * (IMG_SIZE//4)
                self.fc1 = nn.Linear(flat, N_QUBITS)
                #repeat
                self.qlay = q_layer
                self._initialize_fc1()
                self.fc2 = nn.Linear(N_QUBITS, NUM_CLASSES)

            def _initialize_fc1(self):
                with torch.no_grad():
                    dummy_input = torch.zeros(1, 3, IMG_SIZE, IMG_SIZE).to(DEVICE)
                    x = self.conv(dummy_input)
                    flat_size = x.view(1, -1).shape[1]
                self.fc1 = nn.Linear(flat_size, N_QUBITS)
            
            def forward(self, x):
                x = self.conv(x)
                x = x.view(x.size(0), -1)
                x = torch.tanh(self.fc1(x))
                print(f"Before quantum layer: {x.shape}")
                x = self.qlay(x)
                print(f"After quantum layer: {x.shape}")
                return self.fc2(x)
        return HybridNet(q_layer)
    
    def seed(self, seed=None):
        self.np_random, seed = gym.utils.seeding.np_random(seed)
        return [seed]
    #-- GYM api --
    def reset(self, *, seed: int | None = None, options: dict | None = None):
        super().reset(seed=seed)                 # registra a seed internamente
        self.circuit, self.steps = [], 0
        self.model = self._build_model()
        obs = self._encode_state().astype(np.float32)
        print(f"reset Obs: {obs.shape}")
        return obs, {}
    
    def step(self, action):
        print(f"step: {action}")
        gate_id = action // N_QUBITS
        wire_id = action % N_QUBITS
        gate = ACTION_SET[gate_id]
        if self.steps < MAX_DEPTH:
            self._add_gate(gate, wire_id)
            print(f"add gate: {gate} wire: {wire_id}")
            self.steps += 1
            print(f"steps: {self.steps}")
            self.model = self._build_model()
            print(f"Model: {self.model}")
        
        reward = self._quick_eval()
        done = (self.steps == MAX_DEPTH)

        #return self._encode_state(), reward, done, {}
        obs = self._encode_state().astype(np.float32)
        print(f"step Obs: {obs.shape}")
        terminated = done
        truncated  = False
        return obs, reward, terminated, truncated, {}
    #-- quick eval (mini batchs)--
    @torch.no_grad()
    def _quick_eval(self):
        try:
            image, labels = next(self.val_iter)
        except StopIteration:
            self.val_iter = iter(train_loader)
            image, labels = next(self.val_iter)

        image = image.to(DEVICE)
        labels = labels.squeeze().long().to(DEVICE)
        self.model.to(DEVICE).eval()
        try:
            print(f"Image shape: {image.shape}")
            logits = self.model(image)
            print(f"Model output logits shape: {logits.shape}")
        except Exception as e:
            print("Erro dentro de model(image):", e)
            raise
        preds = logits.argmax(dim=1)
        print(f"Preds shape: {preds.shape}")
        acc = (preds == labels).float().mean().item()
        print(f"Accuracy: {acc}")
        return acc


In [106]:
env = CircuitBuilderEnv()

In [107]:
model_rl = PPO("MlpPolicy", env, 
                learning_rate=3e-4,n_steps=512,
                batch_size=64,gamma=0.95,
                verbose=1,seed=SEED)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [108]:
model_rl.learn(total_timesteps=10000)

qnode: <QNode: device='<default.qubit device (wires=4) at 0x1f35d467c90>', interface='torch', diff_method='best'>
vec _encode_state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
vec _encode_state end [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
reset Obs: (16,)
step: 13
add gate: cnot wire: 1
steps: 1
qnode: <QNode: device='<default.qubit device (wires=4) at 0x1f35d467c90>', interface='torch', diff_method='best'>
Model: HybridNet(
  (conv): Sequential(
    (0): Conv2d(3, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc1): Linear(in_features=4096, out_features=4, bias=True)
  (qlay): <Quantum Torch Layer: func=pqc>
  (fc2): Linear(in_features=4, out_features=3, bias=True)
)
Image shape: torch.Size([64, 3, 32, 32])
Before quantum layer: torch.Size([64, 4])
circuit: [('cnot', np

RuntimeError: shape '[64, -1]' is invalid for input of size 4

In [None]:
#-- Train RL agent --
def train_agent():
    env = CircuitBuilderEnv()

    model_rl = PPO("MlpPolicy", env, 
                    learning_rate=3e-4,n_steps=512,
                    batch_size=64,gamma=0.95,
                    verbose=1,seed=SEED)
    
    print("Training RL agent...")
    model_rl.learn(total_timesteps=10000)
    model_rl.save("rl_agent")
    print("RL agent trained!")
    return model_rl

#-- Test RL agent --
def train_final_agent():
    best_env = train_agent()
    best_circuit = best_env.circuit

    final_model = best_env._build_model().to(DEVICE)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(final_model.parameters(), lr=1e-3)

    EPOCHS_SUP = 3 #upgrade to production
    for epoch in range(EPOCHS_SUP):
        final_model.train()
        for image, labels in train_loader:
            image = image.to(DEVICE)
            labels = labels.squeeze().long().to(DEVICE)

            optimizer.zero_grad()
            logits = final_model(image)
            loss = criterion(logits, labels)
            loss.backward()
            optimizer.step()
        #--Sample evaluation--
        final_model.eval()
        correct = total = 0
        with torch.no_grad():
            for image, labels in test_loader:
                image = image.to(DEVICE)
                labels = labels.squeeze().long().to(DEVICE)
                logits = final_model(image)
                preds = logits.argmax(dim=1)
                correct += (preds == labels).sum().item()
                total += labels.size(0)
        acc = correct / total
        print(f"Epoch {epoch+1}/{EPOCHS_SUP}, Test Accuracy: {acc:.4f}")
    print("Final model trained!")


if __name__ == "__main__":
    #-- Train RL agent --
    train_agent()

    #-- Train final model --
    train_final_agent()


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Training RL agent...
reset Obs: (16,)


AttributeError: 'numpy.int64' object has no attribute 'steps'