/
CoreComputing.jl
401 lines (358 loc) · 13 KB
/
CoreComputing.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
#
# Parallel thread spliiter
#
splitter(first, nbatches, n) = first:nbatches:n
"""
reduce(output, output_threaded)
Most common reduction function, which sums the elements of the output.
Here, `output_threaded` is a vector containing `nbatches(cl)` copies of
the `output` variable (a scalar or an array). Custom reduction functions
must replace this one if the reduction operation is not a simple sum.
The `output_threaded` array is, by default, created automatically by copying
the given `output` variable `nbatches(cl)` times.
## Examples
Scalar reduction:
```julia-repl
julia> output = 0.; output_threaded = [ 1, 2 ];
julia> CellListMap.reduce(output,output_threaded)
3
```
Array reduction:
```julia-repl
julia> output = [0,0]; output_threaded = [ [1,1], [2,2] ];
julia> CellListMap.reduce(output,output_threaded)
2-element Vector{Int64}:
3
3
julia> output
2-element Vector{Int64}:
3
3
```
"""
reduce(output::T, output_threaded::Vector{T}) where {T} = sum(output_threaded)
function reduce(output::T, output_threaded::Vector{T}) where {T<:AbstractArray}
for ibatch in eachindex(output_threaded)
@. output += output_threaded[ibatch]
end
return output
end
function reduce(output, output_threaded)
T = typeof(output)
error("""
MethodError: no method matching reduce(::$(typeof(output)),::$(typeof(output_threaded)))
Please provide a method that appropriately reduces a `Vector{$T}`, with
the signature:
```
CellListMap.reduce(output::$T, output_threaded::Vector{$T})
```
The reduction function **must** return the `output` variable, even
if it is mutable.
See: https://m3g.github.io/CellListMap.jl/stable/parallelization/#Custom-reduction-functions
""")
end
"""
partition!(by, x::AbstractVector)
$(INTERNAL)
# Extended help
Function that reorders `x` vector by putting in the first positions the
elements with values satisfying `by(el)`. Returns the number of elements
that satisfy the condition.
"""
function partition!(by, x::AbstractVector)
iswap = 1
@inbounds for i in eachindex(x)
if by(x[i])
if iswap != i
x[iswap], x[i] = x[i], x[iswap]
end
iswap += 1
end
end
return iswap - 1
end
#
# Auxiliary functions to control the exibition of the progress meter
#
_next!(::Nothing) = nothing
_next!(p) = ProgressMeter.next!(p)
#
# Serial version for self-pairwise computations
#
function map_pairwise_serial!(
f::F, output, box::Box, cl::CellList{N,T};
show_progress::Bool=false
) where {F,N,T}
@unpack n_cells_with_real_particles = cl
p = show_progress ? Progress(n_cells_with_real_particles, dt=1) : nothing
ibatch = 1
for i in 1:n_cells_with_real_particles
cellᵢ = cl.cells[cl.cell_indices_real[i]]
output = inner_loop!(f, box, cellᵢ, cl, output, ibatch)
_next!(p)
end
return output
end
#
# Parallel version for self-pairwise computations
#
function batch(f::F, ibatch, nbatches, n_cells_with_real_particles, output_threaded, box, cl, p) where {F}
for i in splitter(ibatch, nbatches, n_cells_with_real_particles)
cellᵢ = cl.cells[cl.cell_indices_real[i]]
output_threaded[ibatch] = inner_loop!(f, box, cellᵢ, cl, output_threaded[ibatch], ibatch)
_next!(p)
end
end
# Note: the reason why in the next loop @spawn if followed by interpolated variables
# is to avoid allocations caused by the capturing of variables by the closures created
# by the macro. This may not be needed in the future, if the corresponding issue is solved.
# See: https://discourse.julialang.org/t/type-instability-because-of-threads-boxing-variables/78395
function map_pairwise_parallel!(
f::F1, output, box::Box, cl::CellList{N,T};
output_threaded=nothing,
reduce::F2=reduce,
show_progress::Bool=false
) where {F1,F2,N,T}
nbatches = cl.nbatches.map_computation
if isnothing(output_threaded)
output_threaded = [deepcopy(output) for i in 1:nbatches]
end
@unpack n_cells_with_real_particles = cl
nbatches = cl.nbatches.map_computation
p = show_progress ? Progress(n_cells_with_real_particles, dt=1) : nothing
@sync for ibatch in 1:nbatches
@spawn batch($f, $ibatch, $nbatches, $n_cells_with_real_particles, $output_threaded, $box, $cl, $p)
end
return reduce(output, output_threaded)
end
#
# Serial version for cross-interaction computations
#
function map_pairwise_serial!(
f::F, output, box::Box,
cl::CellListPair{N,T};
show_progress::Bool=false
) where {F,N,T}
p = show_progress ? Progress(length(cl.ref), dt=1) : nothing
for i in eachindex(cl.ref)
output = inner_loop!(f, output, i, box, cl)
_next!(p)
end
return output
end
#
# Parallel version for cross-interaction computations
#
function batch(f::F, ibatch, nbatches, output_threaded, box, cl, p) where {F}
for i in splitter(ibatch, nbatches, length(cl.ref))
output_threaded[ibatch] = inner_loop!(f, output_threaded[ibatch], i, box, cl)
_next!(p)
end
end
function map_pairwise_parallel!(
f::F1, output, box::Box,
cl::CellListPair{N,T};
output_threaded=nothing,
reduce::F2=reduce,
show_progress::Bool=false
) where {F1,F2,N,T}
nbatches = cl.target.nbatches.map_computation
if isnothing(output_threaded)
output_threaded = [deepcopy(output) for i in 1:nbatches]
end
p = show_progress ? Progress(length(cl.ref), dt=1) : nothing
@sync for ibatch in 1:nbatches
@spawn batch($f, $ibatch, $nbatches, $output_threaded, $box, $cl, $p)
end
return reduce(output, output_threaded)
end
#
# Inner loop for Orthorhombic cells is faster because we can guarantee that
# there are not repeated computations even if running over half of the cells.
#
inner_loop!(f::F, box::Box{<:OrthorhombicCellType}, cellᵢ, cl::CellList, output, ibatch) where {F<:Function} =
inner_loop!(f, neighbor_cells_forward, box, cellᵢ, cl, output, ibatch)
inner_loop!(f::F, box::Box{<:TriclinicCell}, cellᵢ, cl::CellList, output, ibatch) where {F<:Function} =
inner_loop!(f, neighbor_cells, box, cellᵢ, cl, output, ibatch)
function inner_loop!(
f::F,
_neighbor_cells::NC, # depends on cell type
box,
cellᵢ,
cl::CellList{N,T},
output,
ibatch
) where {F<:Function,NC<:Function,N,T}
@unpack cutoff_sqr, inv_rotation, nc = box
output = _current_cell_interactions!(box, f, cellᵢ, output)
for jcell in _neighbor_cells(box)
jc_linear = cell_linear_index(nc, cellᵢ.cartesian_index + jcell)
if cl.cell_indices[jc_linear] != 0
cellⱼ = cl.cells[cl.cell_indices[jc_linear]]
output = _vicinal_cell_interactions!(f, box, cellᵢ, cellⱼ, cl, output, ibatch)
end
end
return output
end
function _current_cell_interactions!(box::Box{<:OrthorhombicCellType}, f::F, cell, output) where {F<:Function}
@unpack cutoff_sqr, inv_rotation = box
# loop over list of non-repeated particles of cell ic. All particles are real
for i in 1:cell.n_particles-1
@inbounds pᵢ = cell.particles[i]
xpᵢ = pᵢ.coordinates
for j in i+1:cell.n_particles
@inbounds pⱼ = cell.particles[j]
xpⱼ = pⱼ.coordinates
d2 = norm_sqr(xpᵢ - xpⱼ)
if d2 <= cutoff_sqr
output = f(inv_rotation * xpᵢ, inv_rotation * xpⱼ, pᵢ.index, pⱼ.index, d2, output)
end
end
end
return output
end
function _current_cell_interactions!(box::Box{TriclinicCell}, f::F, cell, output) where {F<:Function}
@unpack cutoff_sqr, inv_rotation = box
# loop over all pairs, skip when i >= j, skip if neither particle is real
for i in 1:cell.n_particles
@inbounds pᵢ = cell.particles[i]
xpᵢ = pᵢ.coordinates
pᵢ.real || continue
for j in 1:cell.n_particles
@inbounds pⱼ = cell.particles[j]
(pᵢ.index >= pⱼ.index) && continue
xpⱼ = pⱼ.coordinates
d2 = norm_sqr(xpᵢ - xpⱼ)
if d2 <= cutoff_sqr
output = f(inv_rotation * xpᵢ, inv_rotation * xpⱼ, pᵢ.index, pⱼ.index, d2, output)
end
end
end
return output
end
#
# Compute interactions between vicinal cells
#
function _vicinal_cell_interactions!(f::F, box::Box, cellᵢ, cellⱼ, cl::CellList{N,T}, output, ibatch) where {F<:Function,N,T}
# project particles in vector connecting cell centers
Δc = cellⱼ.center - cellᵢ.center
Δc_norm = norm(Δc)
Δc = Δc / Δc_norm
pp = project_particles!(cl.projected_particles[ibatch], cellⱼ, cellᵢ, Δc, Δc_norm, box)
if length(pp) > 0
output = _vinicial_cells!(f, box, cellᵢ, pp, Δc, output)
end
return output
end
#
# The criteria form skipping computations is different then in Orthorhombic or Triclinic boxes
#
function _vinicial_cells!(f::F, box::Box{<:OrthorhombicCellType}, cellᵢ, pp, Δc, output) where {F<:Function}
@unpack cutoff, cutoff_sqr, inv_rotation = box
# Loop over particles of cell icell
for i in 1:cellᵢ.n_particles
@inbounds pᵢ = cellᵢ.particles[i]
# project particle in vector connecting cell centers
xpᵢ = pᵢ.coordinates
xproj = dot(xpᵢ - cellᵢ.center, Δc)
# Partition pp array according to the current projections
n = partition!(el -> abs(el.xproj - xproj) <= cutoff, pp)
# Compute the interactions
for j in 1:n
@inbounds pⱼ = pp[j]
xpⱼ = pⱼ.coordinates
d2 = norm_sqr(xpᵢ - xpⱼ)
if d2 <= cutoff_sqr
output = f(inv_rotation * xpᵢ, inv_rotation * xpⱼ, pᵢ.index, pⱼ.index, d2, output)
end
end
end
return output
end
function _vinicial_cells!(f::F, box::Box{<:TriclinicCell}, cellᵢ, pp, Δc, output) where {F<:Function}
@unpack cutoff, cutoff_sqr, inv_rotation = box
# Loop over particles of cell icell
for i in 1:cellᵢ.n_particles
@inbounds pᵢ = cellᵢ.particles[i]
# project particle in vector connecting cell centers
xpᵢ = pᵢ.coordinates
xproj = dot(xpᵢ - cellᵢ.center, Δc)
# Partition pp array according to the current projections
n = partition!(el -> abs(el.xproj - xproj) <= cutoff, pp)
# Compute the interactions
pᵢ.real || continue
for j in 1:n
@inbounds pⱼ = pp[j]
pᵢ.index >= pⱼ.index && continue
xpⱼ = pⱼ.coordinates
d2 = norm_sqr(xpᵢ - xpⱼ)
if d2 <= cutoff_sqr
output = f(inv_rotation * xpᵢ, inv_rotation * xpⱼ, pᵢ.index, pⱼ.index, d2, output)
end
end
end
return output
end
"""
project_particles!(projected_particles,cellⱼ,cellᵢ,Δc,Δc_norm,box)
$(INTERNAL)
# Extended help
Projects all particles of the cell `cellⱼ` into unnitary vector `Δc` with direction
connecting the centers of `cellⱼ` and `cellᵢ`. Modifies `projected_particles`, and
returns a view of `projected particles, where only the particles for which
the projection on the direction of the cell centers still allows the particle
to be within the cutoff distance of any point of the other cell.
"""
function project_particles!(
projected_particles, cellⱼ, cellᵢ,
Δc, Δc_norm, box::Box{UnitCellType,N}
) where {UnitCellType,N}
if box.lcell == 1
margin = box.cutoff + Δc_norm / 2 # half of the distance between centers
else
margin = box.cutoff * (1 + sqrt(N) / 2) # half of the diagonal of the cutoff-box
end
iproj = 0
@inbounds for j in 1:cellⱼ.n_particles
pⱼ = cellⱼ.particles[j]
xproj = dot(pⱼ.coordinates - cellᵢ.center, Δc)
if abs(xproj) <= margin
iproj += 1
projected_particles[iproj] = ProjectedParticle(pⱼ.index, xproj, pⱼ.coordinates)
end
end
pp = @view(projected_particles[1:iproj])
return pp
end
#
# Inner loop of cross-interaction computations. If the two sets
# are large, this might(?) be improved by computing two separate
# cell lists and using projection and partitioning.
#
function inner_loop!(
f, output, i, box,
cl::CellListPair{N,T}
) where {N,T}
@unpack nc, cutoff_sqr, inv_rotation, rotation = box
xpᵢ = box.rotation * wrap_to_first(cl.ref[i], box.input_unit_cell.matrix)
ic = particle_cell(xpᵢ, box)
for neighbor_cell in current_and_neighbor_cells(box)
jc_cartesian = neighbor_cell + ic
jc_linear = cell_linear_index(nc, jc_cartesian)
# If cellⱼ is empty, cycle
if cl.target.cell_indices[jc_linear] == 0
continue
end
cellⱼ = cl.target.cells[cl.target.cell_indices[jc_linear]]
# loop over particles of cellⱼ
for j in 1:cellⱼ.n_particles
@inbounds pⱼ = cellⱼ.particles[j]
xpⱼ = pⱼ.coordinates
d2 = norm_sqr(xpᵢ - xpⱼ)
if d2 <= cutoff_sqr
output = f(inv_rotation * xpᵢ, inv_rotation * xpⱼ, i, pⱼ.index, d2, output)
end
end
end
return output
end