#### 一个julia并行计算的测试
在当前环境中设置：`export JULIA_NUM_THREAD=4`来设置可以用的线程

In [1]:
using SharedArrays, Distributed

In [8]:
Threads.nthreads()

4

In [8]:
procs()

15-element Array{Int64,1}:
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15

In [7]:
addprocs(2)

2-element Array{Int64,1}:
 14
 15

In [10]:
a = SharedArray{Float64}(1, 10)
@distributed for i = 1:10
    println(Threads.threadid())
    a[i] = i
end

Task (runnable) @0x00007f2947e6a980

      From worker 3:	1
      From worker 3:	1
      From worker 3:	1
      From worker 3:	1
      From worker 3:	1
      From worker 2:	1
      From worker 2:	1
      From worker 2:	1
      From worker 2:	1
      From worker 2:	1


In [11]:
a

1×10 SharedArray{Float64,2}:
 1.0  2.0  3.0  4.0  5.0  6.0  7.0  8.0  9.0  10.0

In [6]:
m, l0, u0 = 20, 0.8, [0.9, 1.0, 0.0]
target = [1.0, 0.8, 0.2]
malpha = -pi/5: pi/10: pi/5
mbeta = -pi/5: pi/10: pi/5
k1 = 1:5:10
k2 = 1:3:10
# res = zeros(length(malpha), length(mbeta), length(k1), length(k2))
res = SharedArray{Float64}(length(malpha), length(mbeta), length(k1), length(k2))
for i_alpha = 1:length(malpha)
    for i_beta = 1:length(mbeta)
        for i_k1 = 1:length(k1)
            @distributed for i_k2 = 1:length(k2)
                ctrl = [malpha[i_alpha], mbeta[i_beta], k1[i_k1], k2[i_k2]]
                tmp = sum(ctrl)
                res[i_alpha, i_beta, i_k1, i_k2] = tmp
                # println("Thread: ",Threads.threadid(), "res:", tmp)
            end
        end
    end
end

### 看起来DE相关的东西没办法用distributed
说明：<br />


In [8]:
using DifferentialEquations, ParameterizedFunctions
using SharedArrays, Distributed

In [None]:
addprocs(2)

In [16]:
function parameterized_lorenz(du,u,p,t)
  x,y,z = u
  σ,ρ,β = p
  du[1] = dx = σ*(y-x)
  du[2] = dy = x*(ρ-z) - y
  du[3] = dz = x*y - β*z
end
function acc_p(p)
    u0 = [1.0;0.0;0.0]
    tspan = (0.0,1.0)
    prob = ODEProblem(parameterized_lorenz,u0,tspan,p)
    sol = solve(prob)
    return sum(sol.u[:, 1])
end

# p = [10.0,28.0,8/3]
arr = 8.0:0.1:10
res = SharedArray{Float64}(1, length(arr))
@distributed for i in 1:length(arr)
    p = [arr[i], 28.0, 8/3]
    res[i] = acc_p(p)[1]
end

Task (runnable) @0x00007f5fcfbb5870

In [17]:
res

1×21 SharedArray{Float64,2}:
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0

### github issue给出的解答
实际上只是因为运行在其他works上的程序是独立的，是不会知道我们原来的进程做了什么的，所以必须要在必要的代码前面使用`@everywhere`表示这部分代码也会在其他的worker上执行，这样子在其他worker上启动的代码会更多一点，但不影响！

In [22]:
using Distributed
if(length(procs())<3)
    addprocs(2)
end
@everywhere using DifferentialEquations, SharedArrays, ParameterizedFunctions

In [23]:
procs()

5-element Array{Int64,1}:
 1
 2
 3
 4
 5

In [44]:
@everywhere function parameterized_lorenz(du,u,p,t)
  x,y,z = u
  σ,ρ,β = p
  du[1] = dx = σ*(y-x)
  du[2] = dy = x*(ρ-z) - y
  du[3] = dz = x*y - β*z
end
@everywhere function acc_p(p)
    u0 = [1.0;0.0;0.0]
    tspan = (0.0,1.0)
    prob = ODEProblem(parameterized_lorenz,u0,tspan,p)
    sol = solve(prob)
    return sum(sol.u[:, 1])
end

# p = [10.0,28.0,8/3]
arr = 8.0:0.0001:10
res = SharedArray{Float64}(1, length(arr))


1×20001 SharedArray{Float64,2}:
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0

In [52]:
@time @sync @distributed for i in 1:length(arr)
    p = [arr[i], 28.0, 8/3 + rand()]
    res[i] = acc_p(p)[1]
end

  1.143065 seconds (181.03 k allocations: 9.019 MiB)


Task (done) @0x00007f5fcf9f5ae0

In [51]:
@time for i in 1:length(arr)
    p = [arr[i], 28.0, 8/3 + rand()]
    res[i] = acc_p(p)[1]
end

  2.139751 seconds (14.25 M allocations: 1.231 GiB, 11.10% gc time)


#### 总结
在本例子中，当数量不太多，小于10000的时候，分布式计算的时间好像比单芯片的计算时间还多，所以还是对大规模的计算用分布式比较好

### 文件操作测试

In [11]:
 f= open("hello.txt", "a")

IOStream(<file hello.txt>)

In [15]:
write(f, "hello, world\n")

13

In [16]:
close(f)