In [1]:
using DataFrames, BenchmarkTools, Random, Statistics

In [2]:
Threads.nthreads()

2

In [3]:
Random.seed!(0);
N = 100_000;
const data = DataFrame(rowtype=rand(1:12, N));
data.x1 = data.rowtype .* randn(N);

In [4]:
first(data, 5)

Unnamed: 0_level_0,rowtype,x1
Unnamed: 0_level_1,Int64,Float64
1,1,-0.298115
2,3,-2.71766
3,10,-28.0064
4,6,4.39991
5,1,0.809952


In [5]:
function stats(df)
    m = MersenneTwister()
    median_val = 0
    for i in 1:100
        median_val += median(rand(m, df.x1, nrow(df)))
    end
    return (rowtype=df.rowtype[1], n=nrow(df),
            tid=Threads.threadid(), median=median_val / 100) 
end


stats (generic function with 1 method)

In [6]:
@time by(data, :rowtype, stats)

  3.028538 seconds (10.75 M allocations: 684.714 MiB, 8.52% gc time)


Unnamed: 0_level_0,rowtype,rowtype_1,n,tid,median
Unnamed: 0_level_1,Int64,Int64,Int64,Int64,Float64
1,1,1,8384,1,-0.0187663
2,3,3,8316,1,-0.0252967
3,10,10,8425,1,0.0436726
4,6,6,8340,1,-0.000717997
5,11,11,8332,1,-0.0429839
6,4,4,8202,1,-0.0351851
7,9,9,8360,1,-0.0274824
8,8,8,8125,1,-0.200808
9,12,12,8456,1,0.103124
10,5,5,8417,1,-0.0601838


In [7]:
@time by(data,:rowtype, stats);

  0.476513 seconds (13.85 k allocations: 157.342 MiB, 1.92% gc time)


In [8]:
function threaded_by(df::DataFrame, groupcol::Symbol, f::Function)
    groups = groupby(df, groupcol)
    f(view(groups[1], 1:2, :));      # 事前にコンパイルするために必要!
    res = Vector{NamedTuple}(undef, length(groups))
    Threads.@threads for g in 1:length(groups)
        rv = f(groups[g])
        res[g] = rv
    end
    DataFrame(rowtype=getfield.(res, groupcol), x1=res)
end

threaded_by (generic function with 1 method)

In [9]:
@time threaded_by(data, :rowtype, stats)

  0.448821 seconds (555.41 k allocations: 184.903 MiB, 3.74% gc time)


Unnamed: 0_level_0,rowtype,x1
Unnamed: 0_level_1,Int64,NamedTup…
1,1,"(rowtype = 1, n = 8384, tid = 1, median = -0.0183003)"
2,3,"(rowtype = 3, n = 8316, tid = 1, median = -0.0223301)"
3,10,"(rowtype = 10, n = 8425, tid = 1, median = 0.0532021)"
4,6,"(rowtype = 6, n = 8340, tid = 1, median = -0.00511231)"
5,11,"(rowtype = 11, n = 8332, tid = 1, median = -0.0316445)"
6,4,"(rowtype = 4, n = 8202, tid = 1, median = -0.0410587)"
7,9,"(rowtype = 9, n = 8360, tid = 2, median = -0.0264698)"
8,8,"(rowtype = 8, n = 8125, tid = 2, median = -0.173995)"
9,12,"(rowtype = 12, n = 8456, tid = 2, median = 0.0818977)"
10,5,"(rowtype = 5, n = 8417, tid = 2, median = -0.0600339)"


In [10]:
@time threaded_by(data, :rowtype, stats);

  0.282378 seconds (14.50 k allocations: 157.387 MiB, 3.53% gc time)


In [11]:
Threads.nthreads()

2

In [12]:
total = 0;
Threads.@threads for i in 1:1_000_000
    global total = total + 1
end
total

503925

In [13]:
total2 = 0;
s = Threads.SpinLock()
Threads.@threads for i in 1:1_000_000
    Threads.lock(s)
    global total2 = total2+1
    Threads.unlock(s)
end
total2

1000000