In [1]:
import os
os.environ["TF_USE_LEGACY_KERAS"] = "1"

In [2]:
from qiskit import QuantumCircuit
from qiskit.visualization import plot_histogram
from qiskit_machine_learning.algorithms import VQC
from sklearn.preprocessing import LabelEncoder
from qiskit.circuit.library import PauliFeatureMap, ZFeatureMap, ZZFeatureMap
from qiskit_machine_learning.circuit.library import RawFeatureVector


import numpy as np
import gym

# Carica il dataset Iris
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Caricamento dei dati
data = load_iris()
X = data['data']
y = data['target']

# Preprocessing
scaler = StandardScaler()
X = scaler.fit_transform(X)
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Creazione di un circuito iniziale

In [3]:
num_features = X.shape[1]

feature_map = ZZFeatureMap(feature_dimension=num_features, reps=2, entanglement="linear")
feature_map.draw(output='mpl')
feature_map.decompose().draw(output='mpl')
print("Feature map dimension:", feature_map.num_parameters)
print(feature_map)


Feature map dimension: 4
     ┌────────────────────────────────────┐
q_0: ┤0                                   ├
     │                                    │
q_1: ┤1                                   ├
     │  ZZFeatureMap(x[0],x[1],x[2],x[3]) │
q_2: ┤2                                   ├
     │                                    │
q_3: ┤3                                   ├
     └────────────────────────────────────┘


In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np

class QNetwork(nn.Module):
    def __init__(self, input_shape, num_actions):
        super(QNetwork, self).__init__()
        self.fc1 = nn.Linear(input_shape, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, num_actions)
    
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

# Creazione del modello
input_shape = 4  # Numero di features dell'input (es. dimensioni dell'osservazione)
num_actions = 2  # Numero di azioni possibili

model = QNetwork(input_shape, num_actions)

# Ottimizzatore e perdita
optimizer = optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.MSELoss()


In [5]:
class DQNAgent:
    def __init__(self, model, optimizer, loss_fn, num_actions, epsilon=1.0, epsilon_decay=0.995, epsilon_min=0.01, gamma=0.99):
        self.model = model
        self.optimizer = optimizer
        self.loss_fn = loss_fn
        self.num_actions = num_actions
        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay
        self.epsilon_min = epsilon_min
        self.gamma = gamma
        self.memory = []
        self.batch_size = 32
        self.max_memory_size = 1000
    
    def remember(self, state, action, reward, next_state, done):
        if len(self.memory) > self.max_memory_size:
            self.memory.pop(0)
        self.memory.append((state, action, reward, next_state, done))
    
    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return np.random.choice(self.num_actions)
        state = torch.FloatTensor(state).unsqueeze(0)
        q_values = self.model(state)
        return torch.argmax(q_values).item()
    
    def replay(self):
        if len(self.memory) < self.batch_size:
            return
        
        batch = np.random.choice(len(self.memory), self.batch_size, replace=False)
        for i in batch:
            state, action, reward, next_state, done = self.memory[i]
            
            state = torch.FloatTensor(state).unsqueeze(0)
            next_state = torch.FloatTensor(next_state).unsqueeze(0)
            target = reward
            
            if not done:
                target = reward + self.gamma * torch.max(self.model(next_state)).item()
            
            target_f = self.model(state)
            target_f[0][action] = target
            
            self.optimizer.zero_grad()
            output = self.model(state)
            loss = self.loss_fn(output, target_f)
            loss.backward()
            self.optimizer.step()
        
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay


In [6]:
print(X_train[0])
print(y_train[0])

[-1.50652052  1.24920112 -1.56757623 -1.3154443 ]
0


In [7]:

import gym
import gym_quantcircuit
import numpy as np

env = gym.make('quantcircuit-v0')
agent = DQNAgent(model, optimizer, loss_fn, num_actions)

episodes = 100
num_qubits = 4
test_goal_state = [0j] * (2**num_qubits - 1) + [1+0j]
env.var_init(num_qubits,
             unitary=False,
             gate_group='pauli',
             connectivity='fully_connected',
             X_train=X_train,
             Y_train=y_train,
             X_test=X_test,
             Y_test=y_test,
             feature_map=feature_map,
             goal_state=test_goal_state)

for e in range(episodes):
    print(f"Episode {e+1}/{episodes}")
    state = env.reset()
    done = False
    total_reward = 0
    env.render()
    
    while not done:
        action = env.sample()
        env.gate_list[action]
        print(env.gate_list[action])
        next_state, reward, done, _ = env.step(action)
        env.render()
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        total_reward += reward
        
        if done:
            print(f"Episode {e+1}/{episodes} - Reward: {total_reward}")
            break
    
    agent.replay()

Numero di qubit per la porta id : 1
Numero di qubit per la porta h : 1
Numero di qubit per la porta x : 1
Numero di qubit per la porta z : 1
Numero di qubit per la porta cx : 2
Episode 1/100
((0, 2), <bound method QuantumCircuit.cx of <qiskit.circuit.quantumcircuit.QuantumCircuit object at 0x103db61d0>>)
Numero di qubit per la porta id : 1
Numero di qubit per la porta h : 1
Numero di qubit per la porta x : 1
Numero di qubit per la porta z : 1
Numero di qubit per la porta cx : 2
La reward è:0
((1,), <bound method QuantumCircuit.z of <qiskit.circuit.quantumcircuit.QuantumCircuit object at 0x112793df0>>)
La reward è:0
((2,), <bound method QuantumCircuit.id of <qiskit.circuit.quantumcircuit.QuantumCircuit object at 0x112793df0>>)
La reward è:0
((1, 3), <bound method QuantumCircuit.cx of <qiskit.circuit.quantumcircuit.QuantumCircuit object at 0x112793df0>>)
La reward è:0
((3, 0), <bound method QuantumCircuit.cx of <qiskit.circuit.quantumcircuit.QuantumCircuit object at 0x112793df0>>)
La rew

KeyboardInterrupt: 