Rough sketch of a proposal

In [1]:
using TensorKit, TensorOperations, BenchmarkTools, Strided

In [2]:
import LinearAlgebra
LinearAlgebra.BLAS.set_num_threads(1)
Strided.set_num_threads(1)

1

In [3]:
using Base.Threads
nthreads()

1

In [4]:
# I didn't want to parse the generated @tensor code again, so I want to simply run the tensor contraction code and record which contractions/additions/... are being done.
# For this, I used a "symbolictensormap" that keeps track of its structure and type

struct SymbolicTensorMap{A,B}
    structure::B
    SymbolicTensorMap(a,b) = new{a,typeof(b)}(b)
end
ttype(d::SymbolicTensorMap{A,B}) where {A,B} = A

TensorOperations.scalartype(a::SymbolicTensorMap) = TensorOperations.scalartype(ttype(a))

In [5]:
struct fast_init{S, N₁, N₂, I, A, F₁, F₂}
    codom::ProductSpace{S,N₁}
    dom::ProductSpace{S,N₂}
    rowr::TensorKit.SectorDict{I,TensorKit.FusionTreeDict{F₁,UnitRange{Int}}}
    colr::TensorKit.SectorDict{I,TensorKit.FusionTreeDict{F₂,UnitRange{Int}}}
    dims::Vector{Tuple{I,Int,Int}}
    function fast_init(codom::ProductSpace{S,N₁},
        dom::ProductSpace{S,N₂},stortype) where {S<:IndexSpace,N₁,N₂}

        I = sectortype(S)
        if I == Trivial
            d1 = dim(codom)
            d2 = dim(dom)

            return new{S, N₁, N₂, I, stortype,Nothing,Nothing}(codom,dom)
        end
        
        blocksectoriterator = blocksectors(codom ← dom)
        rowr, rowdims = TensorKit._buildblockstructure(codom, blocksectoriterator)
        colr, coldims = TensorKit._buildblockstructure(dom, blocksectoriterator)
        
        
        F₁ = TensorKit.fusiontreetype(I, N₁)
        F₂ = TensorKit.fusiontreetype(I, N₂)
       
        A = TensorKit.SectorDict{I,stortype}
        
       
        return new{S, N₁, N₂, I, A, F₁, F₂}(codom,dom,rowr,colr, [(c,rowdims[c], coldims[c]) for c in blocksectoriterator])
    end

    function (d::fast_init{S, N₁, N₂, I, A, Nothing, Nothing})() where {S, N₁, N₂, I, A}
        data = A(undef,(dim(d.codom), dim(d.dom)))
        return TensorMap{S,N₁,N₂,Trivial,A,Nothing,Nothing}(data, d.codom, d.dom)
    end

    function (d::fast_init{S, N₁, N₂, I, TensorKit.SectorDict{I,A}, F₁, F₂})() where {S, N₁, N₂, I, A, F₁, F₂}
        data::TensorKit.SectorDict{I,A} = TensorKit.SectorDict(c => A(undef,(rd,rc)) for (c,rd,rc) in d.dims)
        return TensorMap{S,N₁,N₂,I,TensorKit.SectorDict{I,A} ,F₁,F₂}(data, d.codom, d.dom, d.rowr, d.colr)
    end
    
end

In [6]:
virtspace = Rep[SU₂](i => 20 for i in 0:10);
ospace = Rep[SU₂](0 => 5,1 => 2);
pspace = Rep[SU₂](1 => 1);
insym = fast_init(virtspace*pspace,virtspace*pspace*pspace,Matrix{ComplexF64})
@benchmark insym()

BenchmarkTools.Trial: 10000 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m 2.400 μs[22m[39m … [35m 1.808 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m 0.00% …  0.00%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m 2.760 μs              [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m 0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m11.841 μs[22m[39m ± [32m49.747 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m63.22% ± 15.50%

  [34m█[39m[39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▁[39m▁[39m [39m [39m [39m▁
  [34m█[39m[39m▇[32m▄[39m[39m▄

In [7]:
@benchmark TensorMap(undef,ComplexF64,virtspace*pspace,virtspace*pspace*pspace)

BenchmarkTools.Trial: 10000 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m38.490 μs[22m[39m … [35m  4.607 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m 0.00% … 91.76%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m47.819 μs               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m 0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m93.349 μs[22m[39m ± [32m228.909 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m38.33% ± 16.19%

  [34m█[39m[39m▆[32m▃[39m[39m▁[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▁
  [34m█[39m[39m█[32m█[

In [8]:
# I go through the generated @tensor code once, and generate two sets of code. One which takes in symbolictensormaps and will be run in the constructor of the struct, one which will be run while actually applying the struct and executing the contraction

function subsplit(ex)
    a = Any[ex.head]
    b = Any[ex.head]
    c = []
    for (sa,sb,sc) in split_execution.(ex.args)
        append!(c,sc)
        push!(a,sa)
        push!(b,sb)
    end
    return (Expr(a...),Expr(b...),c)
end

function split_execution(ex::Expr)
    splitmap = Dict(GlobalRef(TensorOperations,:tensorcontract!) => (create_mediated_tensorcontract!,mediated_tensorcontract!),
                    GlobalRef(TensorOperations,:tensoralloc_contract) => (create_mediated_tensoralloc_contract,mediated_tensoralloc_contract),
                    GlobalRef(TensorOperations,:tensoradd!) => (create_mediated_tensoradd!,mediated_tensoradd!),
                    GlobalRef(TensorOperations,:tensoralloc_add) => (create_mediated_tensoralloc_add,mediated_tensoralloc_add),
                    GlobalRef(TensorOperations,:tensortrace!) => (create_mediated_tensortrace!,mediated_tensortrace!),)

    if ex.head == :(=) && length(ex.args) == 2
        if ex.args[2] isa Expr && ex.args[2].head == :call
            t = ex.args[2].args[1]

            if t in keys(splitmap)
                (mapped_1,mapped_2) = splitmap[t]
                nvar = gensym()
                a = quote
                    ($(ex.args[1]),$(nvar)) = $(mapped_1)($(ex.args[2].args[2:end]...))
                end
                b = quote
                    $(ex.args[1]) = $(mapped_2)($(nvar),$(ex.args[2].args[2:end]...))
                end
                return (a,b,[nvar])
            end
        end

        return subsplit(ex)
    elseif ex.head in (:block,)
        subsplit(ex)
    elseif ex isa Expr
        @show ex.head, ex.args
        return (ex,ex,[])
    end
end
split_execution(ex::Symbol) = (ex,ex,[])

split_execution (generic function with 2 methods)

In [9]:
function generate_permute_table(elt,sp_src,sp_dst, p1::IndexTuple{N₁},p2::IndexTuple{N₂}) where {N₁,N₂}
    
    blocksectoriterator_src = blocksectors(sp_src);
    rowr_src, rowdims = TensorKit._buildblockstructure(codomain(sp_src), blocksectoriterator_src)
    colr_src, coldims = TensorKit._buildblockstructure(domain(sp_src), blocksectoriterator_src)

    blocksectoriterator_dst = blocksectors(sp_dst);
    rowr_dst, rowdims = TensorKit._buildblockstructure(codomain(sp_dst), blocksectoriterator_dst)
    colr_dst, coldims = TensorKit._buildblockstructure(domain(sp_dst), blocksectoriterator_dst)

    ftreemap = (f1, f2)->permute(f1, f2, p1, p2);
    I = eltype(rowr_src.keys);

    N = length(p1)+length(p2);
    table = Tuple{elt,Int,UnitRange{Int},UnitRange{Int},NTuple{N,Int},Int,UnitRange{Int},UnitRange{Int},NTuple{N,Int}}[];
    for (i_src,(s_src,f1_list_src)) in enumerate(rowr_src)
        f2_list_src = colr_src[s_src];

        for (f1_src,r_src) in f1_list_src, (f2_src,c_src) in f2_list_src
            d_src = (dims(codomain(sp_src), f1_src.uncoupled)..., dims(domain(sp_src), f2_src.uncoupled)...)
            for ((f1_dst,f2_dst),α) in ftreemap(f1_src,f2_src)
                
                d_dst = (dims(codomain(sp_dst), f1_dst.uncoupled)..., dims(domain(sp_dst), f2_dst.uncoupled)...)

                s_dst = f1_dst.coupled;
                
                i_dst = searchsortedfirst(rowr_dst.keys,s_dst);

                r_dst = rowr_dst.values[i_dst][f1_dst];
                c_dst = colr_dst.values[i_dst][f2_dst];


                push!(table,(α,i_src,r_src,c_src,d_src,i_dst,r_dst,c_dst,d_dst));
            end
        end
    end

    (table,p1,p2)
end

function execute_permute_table!(t_dst,t_src,bulk,beta=false)
    (table,p1,p2) = bulk
    rmul!(t_dst,beta);

    @inbounds for (α,s_src,r_src,c_src,d_src,s_dst,r_dst,c_dst,d_dst) in table

        view_dst = sreshape(StridedView(t_dst.data.values[s_dst])[r_dst,c_dst],d_dst)
        view_src = sreshape(StridedView(t_src.data.values[s_src])[r_src,c_src],d_src);
        
        #TensorOperations.tensoradd!(view_dst,(p1,p2),view_src,:N,α,true)
        axpy!(α,permutedims(view_src,(p1...,p2...)), view_dst);
    end

    t_dst
end

execute_permute_table! (generic function with 2 methods)

In [10]:
function create_mediated_tensorcontract!(C::SymbolicTensorMap, pC, A::SymbolicTensorMap, pA, conjA, B::SymbolicTensorMap, pB, conjB, α=1, β=0 , backend=nothing)
    S = spacetype(A.structure)
    if !(BraidingStyle(sectortype(S)) isa SymmetricBraiding)
        throw(SectorMismatch("only tensors with symmetric braiding rules can be contracted; try `@planar` instead"))
    end
    #=
    copyA = false
    if BraidingStyle(sectortype(S)) isa Fermionic
        for i in cindA
            if !isdual(space(A, i))
                copyA = true
            end
        end
    end
    =#

    #A′ = permute(A, (oindA, cindA); copy=copyA)
    A_structure = conjA == :N ? A.structure : conj(codomain(A.structure))←conj(domain(A.structure))
    sp_dst_A =  ProductSpace{S,length(pA[1])}(map(n -> A_structure[n], pA[1])) ← ProductSpace{S,length(pA[2])}(map(n -> dual(A_structure[n]), pA[2]))
    fast_init_A = fast_init(codomain(sp_dst_A),domain(sp_dst_A),storagetype(ttype(A)))
    tbl_A = generate_permute_table(scalartype(ttype(A)),A_structure,sp_dst_A,pA[1],pA[2])

    #B′ = permute(B, (cindB, oindB))
    B_structure = conjB == :N ? B.structure : conj(codomain(B.structure))←conj(domain(B.structure))
    sp_dst_B =  ProductSpace{S,length(pB[1])}(map(n -> B_structure[n], pB[1])) ← ProductSpace{S,length(pB[2])}(map(n -> dual(B_structure[n]), pB[2]))
    fast_init_B = fast_init(codomain(sp_dst_B),domain(sp_dst_B),storagetype(ttype(B)))
    tbl_B = generate_permute_table(scalartype(ttype(B)),B_structure,sp_dst_B,pB[1],pB[2])
    
    #=
    if BraidingStyle(sectortype(S)) isa Fermionic
        for i in domainind(A′)
            if !isdual(space(A′, i))
                A′ = twist!(A′, i)
            end
        end
    end
    =#
    #=
    ipC = TupleTools.invperm((pC[1]..., pC[2]...))
    oindAinC = TupleTools.getindices(ipC, ntuple(n -> n, N₁))
    oindBinC = TupleTools.getindices(ipC, ntuple(n -> n + N₁, N₂))
    if has_shared_permute(C, (oindAinC, oindBinC))
        C′ = permute(C, (oindAinC, oindBinC))
        mul!(C′, A′, B′, α, β)
    else
        C′ = A′ * B′
        add_permute!(C, C′, (p₁, p₂), α, β)
    end
    return C
    =#

    
    fast_init_C′ = fast_init(codomain(sp_dst_A),domain(sp_dst_B),storagetype(ttype(C)));
    tbl_C′ = generate_permute_table(scalartype(ttype(C)),codomain(sp_dst_A)←domain(sp_dst_B),C.structure,pC[1],pC[2])

    (C,(fast_init_A,tbl_A,fast_init_B,tbl_B,fast_init_C′,tbl_C′))
end

function mediated_tensorcontract!(mediator,C, pC, A, pA, conjA, B, pB, conjB, α=1, β=0 , backend=nothing)
    (fast_init_A,tbl_A,fast_init_B,tbl_B,fast_init_C′,tbl_C′) = mediator

    tot_pA = (pA[1]...,pA[2]...)
    if tot_pA == ntuple(identity,length(tot_pA)) && length(pA[1]) == length(codomain(A)) && length(pA[2]) == length(domain(A))
        Ap = A
    else
        Ap = fast_init_A()
        execute_permute_table!(Ap,A,tbl_A)    
    end

    tot_pB = (pB[1]...,pB[2]...)
    if tot_pB == ntuple(identity,length(tot_pB)) && length(pB[1]) == length(codomain(B)) && length(pB[2]) == length(domain(B))
        Bp = B
                
    else

        Bp = fast_init_B()
        execute_permute_table!(Bp,B,tbl_B)
    end


    C′ = mul!(fast_init_C′(),Ap,Bp,α)
    execute_permute_table!(C,C′,tbl_C′,β)
    
    #@show pC[1],pC[2],length(codomain(C)),length(domain(C))


    C    
end

function create_mediated_tensoralloc_contract(TC, pC::Index2Tuple{N₁,N₂}, A::SymbolicTensorMap, pA, conjA, B::SymbolicTensorMap, pB, conjB, istemp=false, backend::TensorOperations.Backend...)  where {N₁,N₂}
    spaces1 = [TensorOperations.flag2op(conjA)(A.structure[p]) for p in pA[1]]
    spaces2 = [TensorOperations.flag2op(conjB)(B.structure[p]) for p in pB[2]]
    spaces = (spaces1..., spaces2...)

    S = spacetype(ttype(A))
    cod = ProductSpace{S,N₁}(getindex.(Ref(spaces), pC[1]))
    dom = ProductSpace{S,N₂}(dual.(getindex.(Ref(spaces), pC[2])))
    stortype = TensorKit.similarstoragetype(ttype(A),TC)
    C = SymbolicTensorMap(tensormaptype(S,N₁, N₂, stortype),dom → cod)

    (C,fast_init(cod,dom,stortype)) 
end

function mediated_tensoralloc_contract(mediator,TC, pC::Index2Tuple{N₁,N₂}, A, pA, conjA, B, pB, conjB, istemp=false, backend::TensorOperations.Backend...)  where {N₁,N₂}
    mediator()
end

mediated_tensoralloc_contract (generic function with 2 methods)

In [11]:
function create_mediated_tensoradd!(C, pC, A, conjA, α=1, β=1 , backend=nothing)
    (C,Nothing)
end

function mediated_tensoradd!(mediator,args...)
    TensorOperations.tensoradd!(args...)
end

function create_mediated_tensoralloc_add(TC, pC::Index2Tuple{N₁,N₂}, A::SymbolicTensorMap, conjA, istemp=false, backend::TensorOperations.Backend...)  where {N₁,N₂}
    spaces1 = [TensorOperations.flag2op(conjA)(A.structure[p]) for p in pC[1]]
    spaces2 = [TensorOperations.flag2op(conjA)(A.structure[p]) for p in pC[2]]
    spaces = (spaces1..., spaces2...)

    S = spacetype(ttype(A))
    cod = ProductSpace{S,N₁}(getindex.(Ref(spaces), pC[1]))
    dom = ProductSpace{S,N₂}(dual.(getindex.(Ref(spaces), pC[2])))
    stortype = TensorKit.similarstoragetype(ttype(A),TC)
    C = SymbolicTensorMap(tensormaptype(S,N₁, N₂, stortype),dom → cod)

    (C,fast_init(cod,dom,stortype))
end

function mediated_tensoralloc_add(mediator,TC, pC::Index2Tuple{N₁,N₂}, A, conjA, istemp=false, backend::TensorOperations.Backend...)  where {N₁,N₂}
    mediator()
end

mediated_tensoralloc_add (generic function with 2 methods)

In [12]:
function create_mediated_tensortrace!(C, pC, A, pA, conjA, α=1, β=0 , backend=nothing)
    (C,Nothing)
end

function mediated_tensortrace!(mediator,args...)
    TensorOperations.tensortrace!(args...)
end

mediated_tensortrace! (generic function with 1 method)

In [13]:
macro tightloop_tensor(name,args::Vararg{Expr})
    isempty(args) && throw(ArgumentError("No arguments passed to `@tensor`"))
    
    if length(args) == 1
        parser = TensorOperations.defaultparser
    else
        tensorexpr = args[end]
        kwargs = parse_tensor_kwargs(args[1:(end - 1)])
        parser = tensorparser(tensorexpr, kwargs...)
    end
    
    parsed = parser(args[end])
    
    (a,b,c) = split_execution(parsed)
    c_types = [gensym() for t in c]
    declaration = quote end
    for (c_v,c_t) in zip(c,c_types)
        declaration = quote
            $(declaration)
            $(c_v)::$(c_t)
        end
    end

    input_symbols =  TensorOperations.getinputtensorobjects(args[end])
    output_symbols =  TensorOperations.getoutputtensorobjects(args[end])
    
    arg_symbols = [input_symbols...,output_symbols...];
    kwarg_expr = Expr(:parameters,[Expr(:kw,s,nothing) for s in arg_symbols]...)
    abstract_eval_call = Expr(:parameters,[Expr(:kw,s,Expr(:call,:SymbolicTensorMap,Expr(:call,:getindex,s,1),Expr(:call,:getindex,s,2))) for s in arg_symbols]...)

    instantiated_struct_name = gensym()
    access_inner_fields = quote end
    for c_v in c
        access_inner_fields = quote
            $access_inner_fields
            $(c_v) = $(instantiated_struct_name).$(c_v)
        end
    end

    return esc(quote
        struct $(name){$(c_types...)}
            $(declaration)
            function $(name)($(kwarg_expr))
                tup = abstract_eval($(abstract_eval_call))
                new{typeof.(tup)...}(tup...)
            end
            
            function abstract_eval($(kwarg_expr))
                $(a)
                return tuple($(c...))
            end
            function ($(instantiated_struct_name)::$name)($(kwarg_expr))
                $(access_inner_fields)
                $(b)
            end
        end
    end)
end

@tightloop_tensor (macro with 1 method)

In [14]:
@tightloop_tensor ac_eff y[-1 -2;-3] := le[-1 2;1]*O[2 -2;3 4]*x[1 3;5]*re[5 4;-3]

(ex.head, ex.args) = (:call, Any[:(TensorOperations.promote_contract), :(TensorOperations.scalartype(le)), :(TensorOperations.scalartype(x))])
(ex.head, ex.args) = 

(:call, Any[:(TensorOperations.promote_contract), :(TensorOperations.scalartype(var"####y_A#298_A#299")), :(TensorOperations.scalartype(O))])
(ex.head, ex.args) = (:call, Any[:(TensorOperations.tensorfree!), Symbol("####y_A#298_A#299")])
(ex.head, ex.args) = (:call, Any[:(TensorOperations.promote_contract), :(TensorOperations.scalartype(var"##y_A#298")), :(TensorOperations.scalartype(re))])
(ex.head, ex.args) = (:call, Any[:(TensorOperations.tensorfree!), Symbol("##y_A#298")])


In [15]:
virtspace = Rep[SU₂](i => 20 for i in 0:10);
ospace = Rep[SU₂](0 => 5,1 => 2);
pspace = Rep[SU₂](1 => 1);

t_le = TensorMap(rand,ComplexF64,virtspace*ospace',virtspace);
t_re = TensorMap(rand,ComplexF64,virtspace*ospace,virtspace);
t_ac = TensorMap(rand,ComplexF64,virtspace*pspace,virtspace);
t_o = TensorMap(rand,ComplexF64,ospace*pspace,pspace*ospace);

factory = ac_eff(le = (typeof(t_le),space(t_le)),re = (typeof(t_re),space(t_re)),O = (typeof(t_o),space(t_o)),x = (typeof(t_ac),space(t_ac)));

ac_eff{fast_init{GradedSpace{SU2Irrep, TensorKit.SortedVectorDict{SU2Irrep, Int64}}, 2, 2, SU2Irrep, TensorKit.SortedVectorDict{SU2Irrep, Matrix{ComplexF64}}, FusionTree{SU2Irrep, 2, 0, 1, Nothing}, FusionTree{SU2Irrep, 2, 0, 1, Nothing}}, Tuple{fast_init{GradedSpace{SU2Irrep, TensorKit.SortedVectorDict{SU2Irrep, Int64}}, 2, 1, SU2Irrep, TensorKit.SortedVectorDict{SU2Irrep, Matrix{ComplexF64}}, FusionTree{SU2Irrep, 2, 0, 1, Nothing}, FusionTree{SU2Irrep, 1, 0, 0, Nothing}}, Tuple{Vector{Tuple{ComplexF64, Int64, UnitRange{Int64}, UnitRange{Int64}, Tuple{Int64, Int64, Int64}, Int64, UnitRange{Int64}, UnitRange{Int64}, Tuple{Int64, Int64, Int64}}}, Tuple{Int64, Int64}, Tuple{Int64}}, fast_init{GradedSpace{SU2Irrep, TensorKit.SortedVectorDict{SU2Irrep, Int64}}, 1, 2, SU2Irrep, TensorKit.SortedVectorDict{SU2Irrep, Matrix{ComplexF64}}, FusionTree{SU2Irrep, 1, 0, 0, Nothing}, FusionTree{SU2Irrep, 2, 0, 1, Nothing}}, Tuple{Vector{Tuple{ComplexF64, Int64, UnitRange{Int64}, UnitRange{Int64}, Tup

In [16]:
@benchmark $factory(le = $t_le, re=$t_re, x = $t_ac, O = $t_o)

BenchmarkTools.Trial: 1268 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m2.750 ms[22m[39m … [35m9.057 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m 0.00% … 32.62%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m3.278 ms             [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m 0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m3.916 ms[22m[39m ± [32m1.349 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m14.96% ± 18.05%

  [39m [39m [39m▂[39m▃[39m▆[39m█[39m▃[34m▁[39m[39m [39m [39m [39m [39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m▄[39m▅[39m█[39m█[39m█[39m█[39m█[34m█[39m

In [17]:
function slowcontract(;le=nothing,re=nothing,x=nothing,O=nothing)
    @tensor y[-1 -2;-3] := le[-1 2;1]*O[2 -2;3 4]*x[1 3;5]*re[5 4;-3]
end
@benchmark slowcontract(le = $t_le, re=$t_re, x = $t_ac, O = $t_o)

BenchmarkTools.Trial: 997 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m3.654 ms[22m[39m … [35m10.071 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m 0.00% … 41.74%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m4.234 ms              [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m 0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m4.995 ms[22m[39m ± [32m 1.572 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m14.85% ± 18.09%

  [39m [39m [39m [39m█[39m█[39m▆[34m▅[39m[39m [39m [39m [39m [39m [39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m▂[39m▄[39m▇[39m█[39m█[39m█[34m█[3

In [18]:
norm(slowcontract(le = t_le, re=t_re, x = t_ac, O = t_o)- factory(le = t_le, re=t_re, x = t_ac, O = t_o)) # bit worrying

2.419564129611806e-10

In [35]:
virtspace = Rep[SU₂×U₁×ℤ₂]((i,j,b) => 10 for i in 0:5,j in -3:3,b in 0:1);
ospace = Rep[SU₂×U₁×ℤ₂]((i,j,b) => 2 for i in 0:1,j in -1:1,b in 0:1);
pspace = Rep[SU₂×U₁×ℤ₂]((i,j,b) => 1 for i in 1:1,j in 0:0,b in 0:0);

t_le = TensorMap(rand,ComplexF64,virtspace*ospace',virtspace);
t_re = TensorMap(rand,ComplexF64,virtspace*ospace,virtspace);
t_ac = TensorMap(rand,ComplexF64,virtspace*pspace,virtspace);
t_o = TensorMap(rand,ComplexF64,ospace*pspace,pspace*ospace);

factory = ac_eff(le = (typeof(t_le),space(t_le)),re = (typeof(t_re),space(t_re)),O = (typeof(t_o),space(t_o)),x = (typeof(t_ac),space(t_ac)))

ac_eff{fast_init{GradedSpace{TensorKit.ProductSector{Tuple{SU2Irrep, U1Irrep, Z2Irrep}}, TensorKit.SortedVectorDict{TensorKit.ProductSector{Tuple{SU2Irrep, U1Irrep, Z2Irrep}}, Int64}}, 2, 2, TensorKit.ProductSector{Tuple{SU2Irrep, U1Irrep, Z2Irrep}}, TensorKit.SortedVectorDict{TensorKit.ProductSector{Tuple{SU2Irrep, U1Irrep, Z2Irrep}}, Matrix{ComplexF64}}, FusionTree{TensorKit.ProductSector{Tuple{SU2Irrep, U1Irrep, Z2Irrep}}, 2, 0, 1, Nothing}, FusionTree{TensorKit.ProductSector{Tuple{SU2Irrep, U1Irrep, Z2Irrep}}, 2, 0, 1, Nothing}}, Tuple{fast_init{GradedSpace{TensorKit.ProductSector{Tuple{SU2Irrep, U1Irrep, Z2Irrep}}, TensorKit.SortedVectorDict{TensorKit.ProductSector{Tuple{SU2Irrep, U1Irrep, Z2Irrep}}, Int64}}, 2, 1, TensorKit.ProductSector{Tuple{SU2Irrep, U1Irrep, Z2Irrep}}, TensorKit.SortedVectorDict{TensorKit.ProductSector{Tuple{SU2Irrep, U1Irrep, Z2Irrep}}, Matrix{ComplexF64}}, FusionTree{TensorKit.ProductSector{Tuple{SU2Irrep, U1Irrep, Z2Irrep}}, 2, 0, 1, Nothing}, FusionTree{T

In [36]:
@benchmark $factory(le = $t_le, re=$t_re, x = $t_ac, O = $t_o)

BenchmarkTools.Trial: 135 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m30.659 ms[22m[39m … [35m46.420 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m 0.00% … 14.42%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m37.223 ms              [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m13.73%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m37.262 ms[22m[39m ± [32m 3.457 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m10.21% ±  6.20%

  [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▁[39m▁[39m▁[39m█[34m▇[39m[39m▄[39m [39m▁[39m [39m▂[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m▅[39m▃[39m█[39m▆[39m▅[39m▃[39m

In [37]:
@benchmark slowcontract(le = $t_le, re=$t_re, x = $t_ac, O = $t_o)

BenchmarkTools.Trial: 34 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m144.670 ms[22m[39m … [35m174.785 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 4.19%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m150.053 ms               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m4.04%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m151.278 ms[22m[39m ± [32m  6.092 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m3.95% ± 0.72%

  [39m [39m [39m [39m [39m [39m▄[39m█[39m▄[39m▁[39m [39m▁[34m▄[39m[39m [32m▄[39m[39m [39m▄[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m▆[39m▁[39m▁[39m▁

In [38]:
norm(slowcontract(le = t_le, re=t_re, x = t_ac, O = t_o)- factory(le = t_le, re=t_re, x = t_ac, O = t_o)) # bit worrying

4.5690187993546645e-11