In [2]:
using LinearAlgebra, Combinatorics, StatsBase
using StaticArrays, Base.Threads, Random

In [18]:
using LinearAlgebra, Combinatorics, StatsBase
using StaticArrays, Base.Threads, Random
# 1. Code has many allocations -- potentially  bottleneck
#    5x speedup by using StaticArrays for small sizes

function __lis(::Val{k}) where k
    A = @SArray randn(k,k)
    B = @SArray randn(k,k)
    U, = qr(A + im* B) # random jxj unitary matrix with Haar measure
  # vec = map(_ -> exp(2*π*im*rand()), @SArray zeros(k))
       vec =  2*π*im* @SArray rand(k)
       U *= Diagonal(vec)
      #   U *= [exp(2*π*im*rand()) for i=1:n]
    
    v = (abs(tr(U)))^(2k)
end

function __lis(k::Int)
    U, = qr(randn(k,k)+im*randn(k,k)) # random jxj unitary matrix with Haar measure
    U *= Diagonal([exp(2*π*im*rand()) for i = 1:k])
    v = (abs(tr(U)))^(2k)
end
function lis(k)
    return false # if k <= 8 # bumping this up will increase compile time
        __lis(Val(k))
    else
        __lis(k)
    end
end
# 2. Two levels of parallelism
#   a. `1:k` 
#     - Easy to exploit
#     - no slow down or races from just adding @threads
#     - at most `k` speedup
#     - work is not balanced
#     - use `@spawn`
#   b. `1:t`
#     - harder to exploit -- reductions
function lis_mc(k,t=100_000)
    # monte carlo simulation
    # k is the permutation size
    c = zeros(k)
    @sync for j in 1:k
        Threads.@spawn begin 
            v = 0.0
            for _ in 1:t
                v += lis($j)
            end
            c[j] = v/t
        end
    end
    return c
end

LoadError: syntax: "function" at In[18]:23 expected "end", got "else"

In [3]:
function patiencesort(p)
    # p : Permutation
    # Returns length of longest increasing subsequence
    pile_tops = Int[]
    for α ∈ p       
 
      
        whichpile = searchsortedfirst( pile_tops, α, lt = <) # first pile where α is smaller
     
        
        if whichpile ≤ length(pile_tops)
            pile_tops[whichpile] = α   # put α on top of a pile  or ..
        else
            push!(pile_tops, α)        # create a new pile
        end
        
        
    end
    return length(pile_tops)
end

patiencesort (generic function with 1 method)

In [4]:
using LinearAlgebra, Combinatorics, StatsBase
using StaticArrays, Base.Threads
# 1. Code has many allocations -- potentially  bottleneck
#    5x speedup by using StaticArrays for small sizes
function __lis(::Val{j},k) where j
    A = @SArray randn(j, j)
    B = @SArray randn(j, j)
    U, = qr(A+im * B) # random jxj unitary matrix with Haar measure
    vec = map(_ -> exp(2*π*im*rand()), @SArray zeros(j))
    U *= Diagonal(vec)
    v = (abs(tr(U)))^(2k)
end
for k in 1:8
    precompile(__lis, (Val(k),))
end
function __lis(j::Int, k)
    U, = qr(randn(j,j)+im*randn(j,j)) # random jxj unitary matrix with Haar measure
    U *= Diagonal([exp(2*π*im*rand()) for i = 1:j])
    v = (abs(tr(U)))^(2k)
end
function lis(j, k)
    return if k <= 8 # bumping this up will increase compile time
        __lis(Val(j), k)
    else
        __lis(j, k)
    end
end
# 2. Two levels of parallelism
#   a. `1:k` 
#     - Easy to exploit
#     - no slow down or races from just adding @threads
#     - at most `k` speedup
#     - work is not balanced
#     - use `@spawn`
#   b. `1:t`
#     - harder to exploit -- reductions
#     - Can't use `@threads`
function lis_mc(k,t=100_000)
    # monte carlo simulation
    # k is the permutation size
    c = zeros(k)
    @sync for j in 1:k
        Threads.@spawn begin
            v = 0
            for _ in 1:t
                v += lis($j, k)
            end
            c[j] = sum(v)/t
        end
    end
    return c
end

lis_mc (generic function with 2 methods)

In [5]:
function lis_theory(k)
    # exactly counts permutations with lis ≤ j for j=1,2,...,k
    lis = [patiencesort(π) for π ∈ permutations(1:k)]
    [sum(lis.≤j) for j=1:k]
end

lis_theory (generic function with 1 method)

In [6]:
p = randperm(9)

9-element Array{Int64,1}:
 7
 6
 5
 1
 4
 3
 9
 8
 2

In [13]:
@time lis_mc(8 ,1000_000) # 4 threads

  6.584380 seconds (16.00 M allocations: 244.148 MiB, 0.26% gc time)


8-element Array{Float64,1}:
     1.0
  1425.788008182997
 16302.895494579834
 32487.781872141215
 40811.87007708943
 37642.63162654067
 39456.89771353588
 41072.227310259885

In [11]:
lis_theory(8)

8-element Array{Int64,1}:
     1
  1430
 15767
 33324
 39429
 40270
 40319
 40320

In [8]:
[lis_mc(4,1_000_000) lis_theory(4)]

4×2 Array{Float64,2}:
  1.0      1.0
 14.0198  14.0
 23.0154  23.0
 23.9907  24.0