In [1]:
import torch
import torch.nn.functional as F
from torch import nn
from torch.nn.init import xavier_uniform_
from hrr import *
from argparse import Namespace
from experiments.deep_heisenberg.setup_dataloader import setup_dataloader
cfg = Namespace(**dict(
    ngates = 8000,
    npaulis = 128,
))
dataloader = setup_dataloader(cfg)
for circuit in dataloader: break # (8000, 128)

def transpose_for_scores(x: torch.Tensor, nh, hhs) -> torch.Tensor:
    attention_head_size = hhs//nh
    new_x_shape = x.size()[:-1] + (nh, attention_head_size)
    x = x.view(new_x_shape)
    return x.permute(0, 2, 1, 3)

In [2]:
# # symmetric encoder
# qubits = torch.tensor([0,1,2,2]) # YZ the same in symmetric encoder
# all_paulis = F.one_hot(torch.cartesian_prod(qubits,qubits,qubits),num_classes=3)
# all_paulis = torch.cat([all_paulis, -all_paulis])
# all_paulis.view(all_paulis.shape[0],-1);

In [101]:
# class PositionalEncoding(nn.Module):

#     def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):
#         super().__init__()
#         self.dropout = nn.Dropout(p=dropout)

#         position = torch.arange(max_len).unsqueeze(1)
#         div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
#         pe = torch.zeros(max_len, 1, d_model)
#         pe[:, 0, 0::2] = torch.sin(position * div_term)
#         pe[:, 0, 1::2] = torch.cos(position * div_term)
#         self.register_buffer('pe', pe)

#     def forward(self, x):
#         """
#         Arguments:
#             x: Tensor, shape ``[seq_len, batch_size, embedding_dim]``
#         """
#         x = x + self.pe[:x.size(0)]
#         return self.dropout(x)

# import math
# max_len = 8000
# d_model = 8

# position = torch.arange(max_len).unsqueeze(1)
# div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
# pe = torch.zeros(max_len, 1, d_model)
# pe[:, 0, 0::2] = torch.sin(position * div_term)
# pe[:, 0, 1::2] = torch.cos(position * div_term)


In [83]:
from mygqe.network import PauliEnergy

model = PauliEnergy(9, 8, 4, 8000, 128)
model(circuit.permute(1,0,2))[:,0,:].shape

torch.Size([128, 9])

tensor([-1.0000e+09])

In [14]:
# hidden_size, hidden_hidden_size, num_heads, max_length, batch_size
# hidden_size == num_gate_features, hidden_hidden_size == attention_hidden_size
# number of paulis == bs
hs, hhs, nh, ml, bs = 9, 8, 4, 8000, 128
gate_embeddings = nn.Embedding(bs, hs) # original embedding is one hot
positional_bias = nn.Parameter(torch.empty(1, ml, hs))
xavier_uniform_(positional_bias)
# pauli_embs = gate_embeddings(circuit)
pauli_embs = circuit.permute(1,0,2).float()
pauli_embs += positional_bias
x = pauli_embs.transpose_(0, 1)

query = nn.Linear(hs, hhs)
key = nn.Linear(hs, hhs)
value = nn.Linear(hs, hhs)

# k, q, v = torch.randn(bs, nh, ml, hs//nh), torch.randn(bs, nh, ml, hs//nh), torch.randn(bs, nh, ml, hs//nh)
q = transpose_for_scores(query(x), nh, hhs)
k = transpose_for_scores(key(x), nh, hhs)
v = transpose_for_scores(value(x), nh, hhs)

bind = binding(k, v, dim=-1).sum(dim=-2, keepdims=True)  # (B, h, 1, H')
vp = unbinding(bind, q, dim=-1)  # (B, h, T, H')
scale = cosine_similarity(v, vp, dim=-1, keepdim=True)  # (B, h, T, 1)

# scale = scale + (1. - mask) * (-1e9)
weight = nn.Softmax(dim=-2)(scale)
weighted_value = weight * v

# weighted_value = merge(weighted_value)
context_layer = weighted_value.permute(0, 2, 1, 3).contiguous()
new_context_layer_shape = context_layer.size()[:-2] + (hhs,)
context_layer = context_layer.view(new_context_layer_shape)
out = nn.Linear(hhs, hs)(context_layer)
out.shape

torch.Size([8000, 128, 9])

In [76]:

''.join(map(lambda x: {0:"I",1:"X",2:"Y",3:"Z"}[x], torch.arange(4).tolist()))

'IXYZ'

### Grads

In [6]:
m = torch.distributions.Categorical(torch.tensor([0.9,0.1,0.3]))

tensor(0)

In [None]:
torch.tensor([0,1,1])

In [58]:
m.sample()


tensor(0)

In [None]:
def my_grads(self, sampler: Sampler):
    indices = []
    seed = random.randint(0, sys.maxsize)

    def get_operator():
        index = sampler.sample_indices(1)[0]
        indices.append(index)
        return sampler.get(index)

    def get_operator_inv():
        index = sampler.sample_indices(1)[0]
        return sampler.get(index)

    t_1 = np.array(self.ancilla_mes_method.get_values(self.my_get_prepare(sampler, get_operator, False), ntotal=self.shot, seed=seed))
    t_2 = np.array(self.ancilla_mes_method.get_values(self.my_get_prepare(sampler, get_operator_inv, True),ntotal=self.shot, seed=seed))
    
    return (t_1 + t_2) / 2, indices

In [None]:
def my_get_prepare(self, sampler, get_operator, inverse):
    def prepare():
        qc = self.initializer.initialize(init_circuit(self.nqubit + 1, tool=self.tool), targets=self._targets)
        qc.h(self._ancilla)

        operator = get_operator()
        self._add_swift_operator(qc, operator, inverse)
        return qc
    return prepare

#### Deprecated

In [3]:
import torch
from torch import nn
from torch.nn.init import xavier_uniform_
npaulis = 128
ngates = 8000
nfeatures = 12
nheads = 4
paulis = torch.arange(npaulis).repeat(ngates, 1)
gate_embeddings = nn.Embedding(npaulis, nfeatures) # original embedding is one hot
positional_bias = nn.Parameter(torch.empty(ngates, 1, nfeatures))
xavier_uniform_(positional_bias)
attn = nn.MultiheadAttention(
    embed_dim=nfeatures,
    num_heads=nheads,
    batch_first=True
)

In [109]:
idxx = torch.tensor([ 73, 125, 125, 105, 121, 105,  19, 109,  77])


tensor([ 73, 125, 125, 105, 121, 105,  19, 109,  77])

In [7]:
from hrr import *
hs, hhs, nh, ml, bs = 12, 24, 4, 8000, 128

k, q, v = torch.randn(bs, nh, ml, hs//nh), torch.randn(bs, nh, ml, hs//nh), torch.randn(bs, nh, ml, hs//nh)

bind = binding(k, v, dim=-1).sum(dim=-2, keepdims=True)  # (B, h, 1, H')

vp = unbinding(bind, q, dim=-1)  # (B, h, T, H')
scale = cosine_similarity(v, vp, dim=-1, keepdim=True)  # (B, h, T, 1)

# scale = scale + (1. - mask) * (-1e9)
weight = nn.Softmax(dim=-2)(scale)
weighted_value = weight * v

# weighted_value = merge(weighted_value)
context_layer = weighted_value.permute(0, 2, 1, 3).contiguous()
new_context_layer_shape = context_layer.size()[:-2] + (hs,)
context_layer = context_layer.view(new_context_layer_shape)
out = nn.Linear(hs, hs)(context_layer)
out.shape

torch.Size([128, 8000, 12])

In [3]:
pauli_embs = gate_embeddings(paulis)
pauli_embs += positional_bias
x = pauli_embs.transpose_(0, 1)
pauli_embs = attn(x, x, x)[0].transpose_(0, 1)
pauli_embs.shape

RuntimeError: [enforce fail at alloc_cpu.cpp:75] err == 0. DefaultCPUAllocator: can't allocate memory: you tried to allocate 131072000000 bytes. Error code 12 (Cannot allocate memory)

In [None]:

results = []
for pauli in paulis:
    result = []
    for c in pauli.p_string:
        r = [0] * 3
        if c == 'I':
            r[0] = 1
        elif c == 'X':
            r[1] = 1
        else:
            r[2] = 1
        result.append(r)
    results.append(result)
torch.tensor(results, dtype=torch.float32)

In [1]:
from transformers import BertConfig

ModuleNotFoundError: No module named 'transformers'