In [208]:
using Yao, FLOYao

using LinearAlgebra
using Random
using SparseArrays
using Yao
using BenchmarkTools
using Printf

nq = 50
layers = 2
g = chain(nq)
for _ in 1:layers
    for i in 1:nq-1
        push!(g, rot(kron(nq, i => X, i+1 => X), 0.)) #Implemented using qiskit's definition (https://qiskit.org/documentation/stubs/qiskit.circuit.library.RXXGate.html)
    end
    for i in 1:nq-1
        push!(g, rot(kron(nq, i => X, i+1 => Y), 0.))
    end
    # for i in 1:N-1
    #     push!(g, rot(kron(N, i => Y, i+1 => Y), 0.))
    # end
    # Not needed YY = XX and IZ and ZI
    for i in 1:nq
        push!(g, put(nq, i => Rz(0.)))
    end
end

⊗ = kron

function covariance_matrix(reg::MajoranaReg)
    nq = nqubits(reg)
    G = I(nq) ⊗ [0 1; -1 0]
    return reg.state * G * reg.state'
end

function majoranaindices2kron(nq, i, j) #Returns γ_iγ_j, ASSUME i<j (ok i actually have no clue what's going on due to conflicts in definition of γ_i (FLOYao docs vs Jozsa Miyake paper vs Bill's paper))
    p = []
    c = (i % 2 == j % 2) ? 1 : -1 #deal with this c later..., once i figure out how to scale kronblocks so that they stay as kronblock objects...
    #it's fine, this outputs an Add object now so i can pass it through expect'
    a = min(i, j)
    b = max(i, j)
    first = (a+1) ÷ 2 
    last = (b+1) ÷ 2 
    if first == last #This means i=j-1 and j is even
        c = 1
        push!(p, first => Z)
    else
        if i % 2 == 0
            push!(p, first => X)
            c *= 1
        else
            push!(p, first => Y)
            c *= -1
        end
        for k in first+1:last-1
            push!(p, k => Z)
            c *= -1
        end
        if j % 2 == 0
            push!(p, last => Y)
        else
            push!(p, last => X)
        end
    end
    return c*kron(nq, p...)
end

function majorana_commutator(nq, i, j) #Uses anti-commutation of Majorana operators
    return Add(majoranaindices2kron(nq, i, j))
end

function print_matrix(A)
    for i in 1:size(A, 1)
        for j in 1:size(A, 2)
            @printf("%6.2f ", A[i, j])  # Prints each element with 4 decimal places, padded to 6 characters
        end
        println()  # Move to the next line after each row
    end
end

function print_vec_matrix(A)
    for i in 1:size(A, 1)
        for j in 1:size(A, 2)
            for element in A[i, j]
                @printf("%.2f ", element)
            end
            println()
        end
        println()
    end
end

nparams = nparameters(g)
dim = 2*nq
sampled = Dict{BitStr, Vector}() #Keeps track of which x has been sampled before, so it doesn't recompute everything  (NOT REALLY SURE IF THIS WILL WORK WELL...)

function update!(reg::MajoranaReg, theta, b, temp_m, temp_grad_m, cur_m, cur_grad_m, probabilities, grad_probabilities) #Evolves all matrices and probabilities and gradients by nq steps, in-place
    nq = nqubits(reg)
    nparams = nparameters(g)
    dim = 2*nq
    t_tot = 0
    for i in 1:nq
        t = time()
        if i > 1
            cur_m = deepcopy(temp_m)
            cur_grad_m = deepcopy(temp_grad_m)
            cur_prob = deepcopy(probabilities[i-1])
            cur_grad_prob = deepcopy(grad_probabilities[i-1, :])
            ni = b[i-1]
            for p in 1:dim
                for q in p+1:dim
                    temp_grad_m[p,q] .-= (-1)^ni * ((-cur_grad_prob * cur_m[2*(i-1)-1,p] * cur_m[2*(i-1),q]) .+ (cur_prob * (cur_grad_m[2*(i-1)-1,p]*cur_m[2*(i-1),q] .+ cur_m[2*(i-1)-1,p] * cur_grad_m[2*(i-1),q]))) / (2*cur_prob^2)
                    temp_grad_m[p,q] .+= (-1)^ni * ((-cur_grad_prob * cur_m[2*(i-1)-1,q] * cur_m[2*(i-1),p]) .+ (cur_prob * (cur_grad_m[2*(i-1)-1,q]*cur_m[2*(i-1),p] .+ cur_m[2*(i-1)-1,q] * cur_grad_m[2*(i-1),p]))) / (2*cur_prob^2)
                    temp_grad_m[q,p] = -temp_grad_m[p,q] #this is correct but the way i'm defining temp[j,k] is not correct...
                    # println("$p $q")
                    # println(temp_grad_m[p,q])
                end
            end
            for p in 1:dim
                temp_grad_m[p,p] = zeros(nparams) #i think this is correct but i don't really need to use it?? actually i don't know anymore, because shouldn't the main diagonal of a covariance matrix not be 0s????? maybe change it to ones for testing
            end
            for p in 1:dim
                for q in p+1:dim
                    temp_m[p,q] -= (-1)^ni * (cur_m[2*(i-1)-1,p] * cur_m[2*(i-1),q]) / (2*cur_prob)
                    temp_m[p,q] += (-1)^ni * (cur_m[2*(i-1)-1,q] * cur_m[2*(i-1),p]) / (2*cur_prob)
                    temp_m[q,p] = -temp_m[p,q]
                end
            end
            for p in 1:dim
                temp_m[p,p] = 0.0 #i think this is correct but i don't really need to use it?? actually i don't know anymore, because shouldn't the main diagonal of a covariance matrix not be 0s????? maybe change it to ones for testing
            end
            ni = b[i]
            probabilities[i] = (1+(-1)^ni * temp_m[2*i-1, 2*i]) / 2
            grad_probabilities[i, :] = (-1)^ni * temp_grad_m[2*i-1, 2*i] / 2
        else
            dispatch!(g, theta)
            temp_m = covariance_matrix(apply(reg, g))
            println(temp_m)
            ni = b[i]
            probabilities[i] = (1+(-1)^ni * temp_m[2*i-1, 2*i]) / 2
            for p in 1:dim
                for q in p+1:dim
                    ham = majorana_commutator(nq, p, q)
                    temp_grad_m[p,q] = expect'(ham, reg => g)[2]
                    temp_grad_m[q,p] = -temp_grad_m[p,q] #this is correct but the way i'm defining temp[j,k] is not correct...
                end
            end
            for p in 1:dim
                temp_grad_m[p,p] = zeros(nparams) #i think this is correct but i don't really need to use it?? actually i don't know anymore, because shouldn't the main diagonal of a covariance matrix not be 0s????? maybe change it to ones for testing
            end
            grad_probabilities[i, :] = (-1)^ni * temp_grad_m[2*i-1, 2*i] / 2
        end
        diff = time() - t
        t_tot += diff
        println("iteration $i: $diff")
    end
    println("total time: $t_tot")
end 

function update_opt!(reg::MajoranaReg, theta, b, temp_m, temp_grad_m, probabilities, grad_probabilities) #Evolves all matrices and probabilities and gradients by nq steps, in-place and optimally
    nq = nqubits(reg)
    nparams = nparameters(g)
    dim = 2*nq
    t_tot = 0
    for i in 1:nq
        t = time()
        if i > 1
            ni = b[i-1]
            cur_prob = probabilities[i-1]
            cur_grad_prob = grad_probabilities[:, i-1]
            cur_prefactor = (-1)^ni / (2*cur_prob)
            cur_grad_prefactor = (-1)^ni / (2*cur_prob^2)
            for p in 2*(i-1)+1:dim
                for q in p+1:dim
                    # if p == (dim - 1) && q == dim
                    #     tt = time()
                    # end
                    temp_grad_m[:,p,q] .-= cur_grad_prefactor * ((-cur_grad_prob * temp_m[2*(i-1)-1,p] * temp_m[2*(i-1),q]) .+ (cur_prob * (temp_grad_m[:,2*(i-1)-1,p]*temp_m[2*(i-1),q] .+ temp_m[2*(i-1)-1,p] * temp_grad_m[:,2*(i-1),q])))
                    temp_grad_m[:,p,q] .+= cur_grad_prefactor * ((-cur_grad_prob * temp_m[2*(i-1)-1,q] * temp_m[2*(i-1),p]) .+ (cur_prob * (temp_grad_m[:,2*(i-1)-1,q]*temp_m[2*(i-1),p] .+ temp_m[2*(i-1)-1,q] * temp_grad_m[:,2*(i-1),p])))
                    # if p == (dim - 1) && q == dim
                    #     diff = (time() - tt)*10^6
                    #     println("time taken (microsec): ", diff)
                    # end
                end
            end
            for p in 2*(i-1)+1:dim
                for q in p+1:dim
                    # if p == (dim - 1) && q == dim
                    #     tt = time()
                    # end
                    temp_m[p,q] -= cur_prefactor * (temp_m[2*(i-1)-1,p] * temp_m[2*(i-1),q])
                    temp_m[p,q] += cur_prefactor * (temp_m[2*(i-1)-1,q] * temp_m[2*(i-1),p])
                    # if p == (dim - 1) && q == dim
                    #     diff = (time() - tt)*10^6
                    #     println("time taken (microsec): ", diff)
                    # end
                end
            end
            ni = b[i]
            probabilities[i] = (1+(-1)^ni * temp_m[2*i-1, 2*i]) / 2
            grad_probabilities[:, i] = (-1)^ni * temp_grad_m[:,2*i-1, 2*i] / 2
        else
            dispatch!(g, theta)
            temp_m = covariance_matrix(apply(reg, g)) #covariance_matrix(reg::MajoranaReg), reg gets changed here
            ni = b[i]
            probabilities[i] = (1+(-1)^ni * temp_m[2*i-1, 2*i]) / 2
            for p in 1:dim
                for q in p+1:dim
                    ham = majorana_commutator(nq, p, q)
                    temp_grad_m[:,p,q] = expect'(ham, reg => g)[2]
                end
            end
            grad_probabilities[:, i] = (-1)^ni * temp_grad_m[:,2*i-1, 2*i] / 2
        end
        diff = time() - t
        t_tot += diff
        println("iteration $i: $diff")
    end
    println("total time: $t_tot")
end

function log_grad(reg::MajoranaReg, theta, b, temp_m, temp_grad_m, cur_m, cur_grad_m, probabilities, grad_probabilities) #Returns ∇_θlog(p_theta(X)), evaluated at theta (parameters of circuit) and b (measurement result); reg is the initial register (e.g. FLOYao.zero_state(nq))
    nq = nqubits(reg)
    update!(reg, theta, b, temp_m, temp_grad_m, cur_m, cur_grad_m, probabilities, grad_probabilities)
    s = zeros(length(theta))
    for i in 1:nq
        s += grad_probabilities[i, :] / probabilities[i]
    end
    # print_matrix(grad_probabilities)
    # println(probabilities)
    # println(prod(probabilities), " should match first number")
    basic_prob = probabilities
    return basic_prob, s
end

function log_grad_opt(reg::MajoranaReg, theta, b, temp_m, temp_grad_m, probabilities, grad_probabilities)
    nq = nqubits(reg)
    update_opt!(reg, theta, b, temp_m, temp_grad_m, probabilities, grad_probabilities)
    s = zeros(length(theta))
    for i in 1:nq
        s += grad_probabilities[:, i] / probabilities[i]
    end
    # print_matrix(grad_probabilities)
    # println(probabilities)
    # println(prod(probabilities), " should match first number")
    optimized_prob = probabilities
    return optimized_prob, s
end

iters = 1
p = rand(nparameters(g)).*2π
println("number of parameters: ", nparameters(g))
println(p)
dispatch!(g, p)
reg = apply(FLOYao.zero_state(nq), g)
results = measure(reg, nshots = iters)
bitstr = results[1]
println("measured outcome: $bitstr")
println("probability of measuring the above outcome: ", FLOYao.bitstring_probability(reg, bitstr))

# T = Float64
# println("type: $T")
# temp_m = Matrix{T}(undef, dim, dim)
# temp_grad_m = Matrix{Vector{T}}(undef, dim, dim)
# cur_m = Matrix{T}(undef, dim, dim)
# cur_grad_m = Matrix{Vector{T}}(undef, dim, dim)
# probabilities = Vector{T}(undef, nq)
# grad_probabilities = Matrix{T}(undef, nq, nparams)
# basic_prob, basic = log_grad(FLOYao.zero_state(nq), p, bitstr, temp_m, temp_grad_m, cur_m, cur_grad_m, probabilities, grad_probabilities)
# println(basic_prob)
# println("outputted probability, should match earlier number: ", prod(basic_prob))
# println(basic)

number of parameters: 296
[5.657658219949805, 3.59024517995719, 3.326890309822059, 4.214284270747161, 1.0146574823834753, 5.316734226231825, 1.045653435025085, 3.265510875668597, 3.86617772959334, 5.336673772501842, 3.904790039606535, 4.100733213278983, 3.1431366162221748, 2.2379882701023863, 5.203002687781611, 5.330072294492369, 3.641822864710408, 5.697056619909777, 5.498907760754488, 0.918503498902771, 2.568577438213106, 1.5702774097524614, 4.534787278901124, 4.477196954708929, 0.051066926851599505, 0.5983756126508817, 0.5168015276315606, 5.568587818963678, 3.4042779828852576, 0.25338553560428595, 0.5317203121147475, 2.138169916085101, 4.705737525712256, 3.711178539556723, 5.765969442426773, 1.5476687761457164, 1.0361497166365592, 5.827479458523875, 3.340415997706099, 2.993109728525077, 0.21702337341981867, 3.590117452907084, 5.8196274846850375, 2.798051043248435, 5.506453470337522, 3.713593699826292, 1.7637471181730309, 3.995155498390892, 2.588808601339725, 3.063193539692524, 1.3899

In [209]:
T = Float64
println("type: $T")
temp_m = Matrix{T}(undef, dim, dim)
temp_grad_m = Array{T}(undef, nparams, dim, dim)
probabilities = Vector{T}(undef, nq)
grad_probabilities = Matrix{T}(undef, nparams, nq)
optimized_prob, optimized = log_grad_opt(FLOYao.zero_state(nq), p, bitstr, temp_m, temp_grad_m, probabilities, grad_probabilities)
# println("probabilities equal? ", basic_prob == optimized_prob)
# println("grad(log p) equal? ", basic == optimized)
# println(optimized)

type: Float64
iteration 1: 7.768864870071411
iteration 2: 1.4596729278564453
iteration 3: 1.4209260940551758
iteration 4: 1.2885870933532715
iteration 5: 1.2265770435333252
iteration 6: 1.2004070281982422
iteration 7: 1.137178897857666
iteration 8: 1.0821170806884766
iteration 9: 1.05629301071167
iteration 10: 0.9977519512176514
iteration 11: 0.9492061138153076
iteration 12: 0.9082608222961426
iteration 13: 0.8620531558990479
iteration 14: 0.820720911026001
iteration 15: 0.7886989116668701
iteration 16: 0.7472269535064697
iteration 17: 0.7006070613861084
iteration 18: 0.6711230278015137
iteration 19: 0.7278430461883545
iteration 20: 0.6047759056091309
iteration 21: 0.5734779834747314
iteration 22: 0.5272960662841797
iteration 23: 0.5252280235290527
iteration 24: 0.6996059417724609
iteration 25: 0.5586600303649902
iteration 26: 0.4884340763092041
iteration 27: 0.3773930072784424
iteration 28: 0.33634090423583984
iteration 29: 0.3186509609222412
iteration 30: 0.30410218238830566
iteratio

([0.767226418569649, 0.7329191440384488, 0.5500320579277177, 0.6217798475272601, 0.5839837171880942, 0.8270816580719477, 0.5773189193216641, 0.5491392201279944, 0.6272856184357909, 0.4948338816776822  …  0.8746302889783905, 0.7317252508383181, 0.7137262782320282, 0.5809806021377999, 0.9224697600784842, 0.7274348239570277, 0.8106721955847629, 0.4267485344025068, 0.37550910010118227, 0.9999999999999997], [-0.18686248058777824, 0.4885839053936731, 0.16289875069237836, -0.2330871358812557, -0.362504650047298, 0.43098256145599734, -0.18894754712490203, 0.9796548370929408, -0.8082698772450292, -0.22930040240303748  …  -1.3204808803210311e-17, -2.6559185413032414e-17, -1.0161613813335126e-16, 1.2752214485036959e-17, 1.0723242676661534e-16, -2.8391292733070414e-17, 1.6198913058943322e-17, 7.75799119123028e-17, -1.272215208407092e-16, 8.105730729448899e-17])

In [210]:
# println(cos(first(p)))
# println(expect(majoranaindices2kron(nq,1,2), reg))
# println(-sin(first(p)))
# function grad(reg)
#     return expect'(majorana_commutator(nq,1,2), reg => g)[2]   
# end
# println(grad(FLOYao.zero_state(nq)))
# expect'(majorana_commutator(nq,1,2), FLOYao.zero_state(nq) => g)[2]

In [211]:
function prob(theta, x)
    circuit = dispatch(g, theta)
    r = apply(FLOYao.zero_state(nq), circuit)
    return FLOYao.bitstring_probability(r, x)
end

function log_prob(theta, x)
    return log(prob(theta, x))
end

eps_default = 1e-8
function fe_grad_prob(theta, x, eps = eps_default)
    temp_params = copy(theta)
    fe_grad = Vector{Float64}(undef, length(theta))
    for i in 1:nparameters(g)
        temp_params[i] += eps
        plus = log(prob(temp_params, x))
        temp_params[i] -= 2*eps
        minus = log(prob(temp_params, x))
        fe_grad[i] = (plus - minus) / (2*eps)
        temp_params[i] += eps #Resetting temp_params[i] back to original value
    end
    fe_grad
end

using LinearAlgebra

println(abs(prod(basic_prob) - prob(p, bitstr)))
println(basic)
fe = fe_grad_prob(p, bitstr)
println(fe)
norm(basic - fe)

0.998796472555007506495806560655905857128428432869349275658902645024907498963254
[0.03471278825840867]
[-0.18686244854077344, 0.48858395509708374, 0.1628987403807895, -0.2330871558415907, -0.3625046708439983, 0.4309824602761384, -0.1889475484290018, 0.9796547883415904, -0.8082698263052234, -0.2293003918485089, -1.1864177030962302, -0.5895013678498644, 0.7525101190862308, 0.7901224195274619, 0.8420322462345847, -1.2091156546189565, -0.9471921606865947, -0.007035418170567521, -0.8674145596683422, -0.28131648450854196, 0.340368640888316, 0.8206172015475365, -0.07677359614586383, 0.5493159864747214, 0.26948016769467004, -0.2661958711182757, 0.10725828772371772, 0.8359700354411896, -0.14907905968936272, 0.676602728831409, -0.23386456760566965, 0.061342199296411874, 0.8466165668243987, -0.27156945903666674, 1.735449596488313, 2.713029333271859, -4.3368907337767375, -0.1881931547021224, -0.9293635974574043, -1.6156723566992326, -0.583126661189669, 0.4451872430587527, 0.5409739263996157, -0.42

DimensionMismatch: DimensionMismatch: dimensions must match: a has dims (Base.OneTo(1),), b has dims (Base.OneTo(296),), mismatch at 1

In [212]:
function analytical_prob(theta, x)
    angle = first(theta) / 2
    ret = cos(angle)^2
    return x[1] == 0 ? ret : 1-ret
end

function analytical_grad(theta, x)
    angle = first(theta) / 2
    ret = sin(angle) * cos(angle)
    return x[1] == 0 ? -ret : ret
end

function analytical_log_grad(theta, x)
    return analytical_grad(theta, x) / analytical_prob(theta, x)
end

println("analytical probability: ", analytical_prob(p, bitstr))
analytical = analytical_log_grad(p, bitstr)
println("match with finite difference? ", first(fe) ≈ analytical)
println("match with algorithm output? ", first(optimized) ≈ analytical)
analytical

analytical probability: 0.905327304783741
match with finite difference? false
match with algorithm output? false


0.3233773226847024