## BABY agent-like but not quite

In [1]:
def multiply_squares(a: int, b: int) -> dict:
    result = (a**2) * (b**2)
    parity = "even" if result % 2 == 0 else "odd"
    return {"result": result, "parity": parity}

In [2]:
class MathAgent:
    def __init__(self, tool_function):
        self.tool = tool_function

    def chat(self, message: str) -> str:
        """
        A simple parser: it looks for numbers in the message.
        Example: 'compute for a=3 b=4'
        """
        import re

        # Extract integers
        nums = list(map(int, re.findall(r"-?\d+", message)))

        if len(nums) < 2:
            return "Please provide two integers a and b."

        a, b = nums[0], nums[1]

        # Call the tool (your custom function)
        output = self.tool(a, b)

        result = output["result"]
        parity = output["parity"]

        # Response
        return (
            f"Given a = {a} and b = {b}, I computed:\n\n"
            f"• a² = {a**2}\n"
            f"• b² = {b**2}\n"
            f"• a²·b² = {result}\n\n"
            f"This number is **{parity}**."
        )

In [3]:
agent = MathAgent(multiply_squares)
print(agent.chat("hey, can you compute this for a=3 and b=5?"))


Given a = 3 and b = 5, I computed:

• a² = 9
• b² = 25
• a²·b² = 225

This number is **odd**.


In [4]:
agent.chat("a=3 and b=5?")

'Given a = 3 and b = 5, I computed:\n\n• a² = 9\n• b² = 25\n• a²·b² = 225\n\nThis number is **odd**.'

## More serious agent with RL

In [5]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim


## Defnine the function and train

In [6]:
def evolve_two_level(delta, omega, T=1.0):
    """
    Returns the transition probability from |0> to |1>.
    """
    # Pauli matrices
    sx = np.array([[0, 1], [1, 0]], dtype=complex)
    sz = np.array([[1, 0], [0, -1]], dtype=complex)
    
    H = (delta/2) * sz + omega * sx

    # Time evolution: U = exp(-i H T)
    eigvals, eigvecs = np.linalg.eigh(H)
    U = eigvecs @ np.diag(np.exp(-1j * eigvals * T)) @ eigvecs.conj().T

    psi0 = np.array([1,0], dtype=complex)
    psif = U @ psi0

    prob = np.abs(psif[1])**2
    return float(prob)


In [7]:
class QuantumEnv:
    def __init__(self):
        # Discrete action space
        self.deltas = np.linspace(-5, 5, 11)
        self.omegas = np.linspace(0, 5, 6)
        
        # All (Δ, Ω) combinations
        self.actions = [(d,o) for d in self.deltas for o in self.omegas]
        self.n_actions = len(self.actions)

    def reset(self):
        return np.array([0.0])  # dummy state

    def step(self, action_idx):
        delta, omega = self.actions[action_idx]
        reward = evolve_two_level(delta, omega)
        done = True  # one-step episode
        return np.array([0.0]), reward, done, {}


In [8]:
class DQN(nn.Module):
    def __init__(self, n_actions):
        super().__init__()
        self.fc = nn.Sequential(
            nn.Linear(1, 64),
            nn.ReLU(),
            nn.Linear(64, n_actions)
        )
    def forward(self, x):
        return self.fc(x)


In [9]:
env = QuantumEnv()
n_actions = env.n_actions

policy_net = DQN(n_actions)
target_net = DQN(n_actions)
target_net.load_state_dict(policy_net.state_dict())

optimizer = optim.Adam(policy_net.parameters(), lr=1e-3)

gamma = 0.99
eps = 1.0
eps_decay = 0.995

def select_action(state, eps):
    if np.random.rand() < eps:
        return np.random.randint(n_actions)
    with torch.no_grad():
        q = policy_net(torch.tensor(state, dtype=torch.float32))
        return q.argmax().item()

# Training
for episode in range(2000):
    state = env.reset()
    action = select_action(state, eps)
    next_state, reward, done, _ = env.step(action)

    # Compute TD target
    q_values = policy_net(torch.tensor(state, dtype=torch.float32))
    q_value = q_values[action]
    
    with torch.no_grad():
        target_q = reward  # because done=True after 1 step

    loss = (q_value - target_q)**2
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Decay epsilon
    eps *= eps_decay
    eps = max(eps, 0.05)

# Sync target network
target_net.load_state_dict(policy_net.state_dict())


<All keys matched successfully>

In [10]:
def agent_solve():
    state = np.array([0.0])
    with torch.no_grad():
        qvals = policy_net(torch.tensor(state, dtype=torch.float32))
        best_action = qvals.argmax().item()
    
    delta, omega = env.actions[best_action]
    prob = evolve_two_level(delta, omega)

    print(f"Best Δ = {delta:.3f}")
    print(f"Best Ω = {omega:.3f}")
    print(f"Transition probability P = {prob:.4f}")
    return delta, omega, prob


In [11]:
delta_opt, omega_opt, prob_opt = agent_solve()

Best Δ = 0.000
Best Ω = 5.000
Transition probability P = 0.9195


In [18]:
class QuantumChatAgent:
    def __init__(self, policy_net, env):
        self.policy_net = policy_net
        self.env = env
        
        # Compute optimal action once at initialization
        self.state = np.array([0.0])
        with torch.no_grad():
            qvals = self.policy_net(torch.tensor(self.state, dtype=torch.float32))
        self.best_action = qvals.argmax().item()
        self.best_delta, self.best_omega = env.actions[self.best_action]
        self.best_prob = evolve_two_level(self.best_delta, self.best_omega)
    
    # -------------------------------
    # 1. Compute probability 
    # -------------------------------
    def prob(self, delta, omega):
        return evolve_two_level(delta, omega)
    
    # -------------------------------
    # 2. Compare with optimal
    # -------------------------------
    def compare(self, delta, omega):
        p = self.prob(delta, omega)
        if p > self.best_prob:
            return f"Surprisingly, Δ={delta}, Ω={omega} gives higher probability ({p:.4f}) than my learned optimum ({self.best_prob:.4f})."
        elif np.isclose(p, self.best_prob, atol=1e-3):
            return f"Δ={delta}, Ω={omega} gives probability {p:.4f}, which is basically as good as the optimal I learned ({self.best_prob:.4f})."
        else:
            return f"Δ={delta}, Ω={omega} gives probability {p:.4f}, which is lower than my optimal value {self.best_prob:.4f}."
    
    # -------------------------------
    # 3. General chat interface
    # -------------------------------
    def chat(self, message):
        msg = message.lower()
        
        # ask for best parameters
        if "best" in msg:
            return (f"My optimal values are:\n"
                    f"Δ = {self.best_delta:.3f}, Ω = {self.best_omega:.3f}\n"
                    f"with transition probability {self.best_prob:.4f}.")
        
        # ask for probability
        if "prob" in msg or "probability" in msg:
            # extract numbers from message
            import re
            nums = re.findall(r"[-+]?\d*\.\d+|\d+", msg)
            if len(nums) >= 2:
                delta = float(nums[0])
                omega = float(nums[1])
                p = self.prob(delta, omega)
                return f"For Δ={delta} and Ω={omega}, the transition probability is {p:.4f}."
            else:
                return "To compute a probability, please give me Δ and Ω."
        
        # ask for comparison
        if "compare" in msg or "better" in msg:
            import re
            nums = re.findall(r"[-+]?\d*\.\d+|\d+", msg)
            if len(nums) >= 2:
                delta = float(nums[0])
                omega = float(nums[1])
                return self.compare(delta, omega)
            else:
                return "Please give Δ and Ω so I can compare them to the optimal values."
        
        return "I can compute probabilities, compare to optimal, or tell you my best learned control values. Ask me something like: 'What is the probability for Δ=2, Ω=1?' or 'What are your best parameters?'."


In [19]:
chat_agent = QuantumChatAgent(policy_net, env)


In [20]:
print(chat_agent.chat("What are your best parameters?"))


My optimal values are:
Δ = 0.000, Ω = 5.000
with transition probability 0.9195.


In [21]:
print(chat_agent.chat("Give me the probability for delta=2 and omega=1.5"))


For Δ=2.0 and Ω=1.5, the transition probability is 0.6557.


In [24]:
print(chat_agent.chat("Compare delta=0 omega 5.0"))


Δ=0.0, Ω=5.0 gives probability 0.9195, which is basically as good as the optimal I learned (0.9195).


In [23]:
print(chat_agent.chat("hello there"))


I can compute probabilities, compare to optimal, or tell you my best learned control values. Ask me something like: 'What is the probability for Δ=2, Ω=1?' or 'What are your best parameters?'.
