Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
273 commits
Select commit Hold shift + click to select a range
4908653
collect-ordered m2 is implemented with orchestrator
orxfun Sep 10, 2025
9b0f71c
m computations use orchestrator
orxfun Sep 10, 2025
e0b3ede
par computations fixed to accept orchestrator
orxfun Sep 10, 2025
b1e2065
map computations fixed to use orchestrator
orxfun Sep 10, 2025
4c27f90
collect into methods are fixed for orchestrator for the m variant
orxfun Sep 10, 2025
4c3467e
par and map collections use orchestrator
orxfun Sep 10, 2025
d484929
tests fixed
orxfun Sep 10, 2025
db96d00
checkpoint: orch compiles
orxfun Sep 10, 2025
d49579d
m reduce is simplified
orxfun Sep 10, 2025
0428a74
M next and next_any simplified
orxfun Sep 10, 2025
2deae40
clean up
orxfun Sep 10, 2025
2022194
collect into takes par-map directly
orxfun Sep 10, 2025
e3a6857
m computation flattened and simplified
orxfun Sep 10, 2025
f64614c
collect map tests fixed
orxfun Sep 10, 2025
b19fb1e
m computation made redundant
orxfun Sep 10, 2025
e5ffff9
m tests moved to computational variants
orxfun Sep 10, 2025
09457f7
m computation clean up
orxfun Sep 10, 2025
f30f90d
collect arbitrary receives ParXap
orxfun Sep 10, 2025
72b1b2d
xap compute methods expect ParXap with orchestrator
orxfun Sep 10, 2025
fff239f
ParXap computations flattened and simplified
orxfun Sep 10, 2025
8f81f45
clean up
orxfun Sep 10, 2025
12a0983
par-xap tests are fixed for orchestrator
orxfun Sep 10, 2025
627f38d
xap uses direct reduce
orxfun Sep 10, 2025
3b681fb
xap-result first flattened and simplified
orxfun Sep 10, 2025
343d61d
seq_try_collect_into is implemented for par iter result
orxfun Sep 10, 2025
21888b7
par_collect_into for par-xap
orxfun Sep 10, 2025
b26d5cd
rename
orxfun Sep 11, 2025
e638b3f
clean up
orxfun Sep 11, 2025
eabd40f
pub fallible modules
orxfun Sep 11, 2025
d0138f0
x_try_collect_into_2 is defined
orxfun Sep 11, 2025
26306c9
x_try_collect_into_2
orxfun Sep 11, 2025
9799c5b
collect into is fixed to use orchestrator
orxfun Sep 11, 2025
eca8d9b
orchestrator checkpoint - passing
orxfun Sep 11, 2025
130ecf4
clean up
orxfun Sep 11, 2025
147546f
clean up
orxfun Sep 11, 2025
3b81cc5
clean
orxfun Sep 11, 2025
e0c8f92
x_try_collect_into2 is defined to simplifie x_try_collect_into
orxfun Sep 11, 2025
4c934ca
collect into implementations simplified
orxfun Sep 11, 2025
bf76a84
clean up
orxfun Sep 11, 2025
75e7ced
simplify collect into code
orxfun Sep 11, 2025
2c6b173
clean up
orxfun Sep 11, 2025
9e90f70
clean up and missing docs
orxfun Sep 11, 2025
3220618
flatten collect arbitrary
orxfun Sep 11, 2025
6b6c440
flatten collect arbitrary m
orxfun Sep 11, 2025
3cd84f1
flatten collect ordered
orxfun Sep 11, 2025
8989a18
further flatten computations
orxfun Sep 11, 2025
86e1119
module reorganization
orxfun Sep 11, 2025
6f7ab8c
minor
orxfun Sep 11, 2025
410840e
flatten x collection for infallible
orxfun Sep 11, 2025
cf8fa4c
flatten collect into computations
orxfun Sep 11, 2025
611fb96
clean up
orxfun Sep 11, 2025
e1d7529
alias parallel_runner_compute as prc
orxfun Sep 11, 2025
f074e2c
init scope and handle
orxfun Sep 11, 2025
88940a1
orchestrator is used to spawn threads
orxfun Sep 11, 2025
c7145c9
minor
orxfun Sep 11, 2025
412baf7
Merge pull request #108 from orxfun/update-parallel-runner-to-use-scope
orxfun Sep 11, 2025
66a992b
redesign thread-pool and orchestrator traits
orxfun Sep 11, 2025
484e890
Merge pull request #109 from orxfun/thread-pool-first-draft
orxfun Sep 11, 2025
9591fce
reorganize default os pool
orxfun Sep 11, 2025
0533c94
pool wip
orxfun Sep 12, 2025
35e63d2
thread pool gets a mut reference
orxfun Sep 12, 2025
b57f765
wip
orxfun Sep 12, 2025
03f5ab5
immut
orxfun Sep 12, 2025
2308366
wip
orxfun Sep 13, 2025
f15eead
NumSpawned new type
orxfun Sep 13, 2025
2a6eab3
use orchestrator run for ordered collection
orxfun Sep 13, 2025
03ac473
minor revision
orxfun Sep 13, 2025
54e7a56
orchestrator run is defined, ordered collection is simplified
orxfun Sep 13, 2025
d66b321
arbitrary collection is simplified
orxfun Sep 13, 2025
6a82e7f
Merge pull request #110 from orxfun/an-actual-std-thread-pool
orxfun Sep 13, 2025
9478a44
generic map for all pools
orxfun Sep 14, 2025
a9cb604
minor renaming
orxfun Sep 14, 2025
13b5211
map defined on orchestrator
orxfun Sep 14, 2025
59fc98f
collect ordered uses generic orchestrator map
orxfun Sep 14, 2025
0a00543
clean up
orxfun Sep 14, 2025
73220f6
rname orch methods
orxfun Sep 14, 2025
0191b27
collect arbitrary uses orchestrator map
orxfun Sep 14, 2025
3969f4d
todo test concurrent bag
orxfun Sep 14, 2025
53c1dd6
ParThreadPool::max_num_threads is defined
orxfun Sep 14, 2025
5823711
max_num_threads_for_computation is defined
orxfun Sep 14, 2025
fdc4646
map bag has capacity wrt the pool and computation
orxfun Sep 14, 2025
76ef97f
map_all is defined over all fallibility impl
orxfun Sep 14, 2025
3701cd8
clean up
orxfun Sep 14, 2025
ccf7af2
type alias to simplify fn signature
orxfun Sep 14, 2025
1d70444
map reduce is computed via orchestrator
orxfun Sep 14, 2025
44bd475
map reduce uses orchestrator map
orxfun Sep 15, 2025
11cfbf7
xap reduce uses orchestrator map
orxfun Sep 15, 2025
c7c1645
wip
orxfun Sep 15, 2025
8f1d1d9
next and next-any uses orchestrator map
orxfun Sep 15, 2025
1052723
thread pool clean up
orxfun Sep 15, 2025
de559c6
clean up handle and scope
orxfun Sep 15, 2025
ab1e76f
NumSpawned documentaiton
orxfun Sep 15, 2025
faf5450
ParThreadPoolCompute is separated from ParThreadPool
orxfun Sep 15, 2025
abf567a
clean up
orxfun Sep 15, 2025
27fa3d1
impl ParThreadPool for rayon::ThreadPool
orxfun Sep 15, 2025
f100124
ParThreadPool is implemented for all references
orxfun Sep 15, 2025
13f571c
RayonOrchestrator is defined
orxfun Sep 15, 2025
8ce491c
rayon thread pool tests implemented, passing
orxfun Sep 15, 2025
a24ee4c
auto impl orch for all &mut orch
orxfun Sep 15, 2025
0beb563
Merge pull request #111 from orxfun/map-over-pool-
orxfun Sep 15, 2025
09609ab
fix dependency
orxfun Sep 16, 2025
54d1690
feature flags added for rayon
orxfun Sep 16, 2025
f213f7f
mod reorganization
orxfun Sep 16, 2025
bd1b1ee
code reorganization
orxfun Sep 16, 2025
62d2843
ScopedThreadPoolOrchestrator is implemented
orxfun Sep 16, 2025
0234881
rayon orch tests extracted
orxfun Sep 16, 2025
5dae5a8
scoped threadpool tests added
orxfun Sep 16, 2025
b0477b7
default orchestrator is refactored
orxfun Sep 16, 2025
214cc5e
thread pool module flattened
orxfun Sep 16, 2025
929716c
std feature is defined
orxfun Sep 16, 2025
a49a51e
no-std orch tests
orxfun Sep 16, 2025
3122db5
no-std orchestrator
orxfun Sep 16, 2025
45359bc
no std thread pool
orxfun Sep 16, 2025
91d46f7
no-std generic values
orxfun Sep 16, 2025
6f761ed
no std fallibility
orxfun Sep 16, 2025
cd5845e
no-std heap sort
orxfun Sep 16, 2025
603d61b
no-std ordered collection
orxfun Sep 16, 2025
dbd2c4c
no std collect into
orxfun Sep 16, 2025
5f87cea
updates towards no std
orxfun Sep 16, 2025
8c897c9
minor
orxfun Sep 16, 2025
cd7455e
Merge pull request #112 from orxfun/enable-other-pools
orxfun Sep 16, 2025
f6ae78f
wip
orxfun Sep 16, 2025
e166d1f
re-init ParIterUsing
orxfun Sep 16, 2025
2243382
export ParIterUsing
orxfun Sep 16, 2025
0628967
default-fn mod reorganization
orxfun Sep 16, 2025
30de1f4
using xap map is implemented
orxfun Sep 16, 2025
41a051c
all transformations on using-xap are implemented
orxfun Sep 16, 2025
7894319
module refactoring
orxfun Sep 17, 2025
99ce399
reorganization
orxfun Sep 17, 2025
c73104b
init collect computations
orxfun Sep 17, 2025
17af237
thread reduce computation for using
orxfun Sep 17, 2025
f5f775c
init thread using modules
orxfun Sep 17, 2025
19fb23b
next thread computation for using
orxfun Sep 17, 2025
2374c70
thread next-any computation for using
orxfun Sep 17, 2025
cb9f7a5
collect_ordered thread computation for using
orxfun Sep 17, 2025
fef7a07
collect_arbitrary thread computation of using
orxfun Sep 17, 2025
05d9f8f
wip
orxfun Sep 17, 2025
1eba058
pool run in scope takes ownership of work
orxfun Sep 17, 2025
ad1a973
thread pool run closure uses num spawned before
orxfun Sep 17, 2025
baa911e
pool map closure uses num spawned
orxfun Sep 17, 2025
a9cee0d
rvert numspawned in closure
orxfun Sep 17, 2025
10294b2
first working collect-arbitrary for map with mutable variable
orxfun Sep 17, 2025
5fcd69a
unify run in scope with owned work
orxfun Sep 17, 2025
5cd6424
unify pool run fn for both regular and using computations
orxfun Sep 17, 2025
22cdbae
unify map_in_pool for using and regular computations
orxfun Sep 17, 2025
fa5c160
clean up
orxfun Sep 17, 2025
b6e4526
xap collect arbitrary using mutable variable
orxfun Sep 17, 2025
900c25b
using collect ordered with new orchestrator
orxfun Sep 17, 2025
3a0733e
orchestrator using next-any
orxfun Sep 17, 2025
2e77848
using next over orchestrator
orxfun Sep 17, 2025
e591d2d
using reduce with orchestrator
orxfun Sep 17, 2025
771d7ff
reduce and first implemented for xap using
orxfun Sep 17, 2025
7ab7bbc
clean up
orxfun Sep 17, 2025
02fb478
using map collect into
orxfun Sep 17, 2025
b15afff
xap collect into for using computations
orxfun Sep 17, 2025
1ec1faf
collect into implementations for vec and pinned vecs
orxfun Sep 17, 2025
9b4b71e
complete UParXap implementation
orxfun Sep 17, 2025
a8651f4
complete UParMap part-iter implementation
orxfun Sep 17, 2025
edc2c81
complete UPar implementation
orxfun Sep 17, 2025
aa9a637
using transformations fixed
orxfun Sep 17, 2025
21bfffb
wip
orxfun Sep 17, 2025
3efbf14
clean up old using code
orxfun Sep 17, 2025
43635ee
wip fn sync issue
orxfun Sep 17, 2025
b42cedf
fix tests
orxfun Sep 17, 2025
c16229b
fix thread index (0-based) in using computations
orxfun Sep 17, 2025
88a68a0
dependency update, std to core
orxfun Sep 17, 2025
e808cbd
upgrade pinned vec dependencies
orxfun Sep 17, 2025
e0b955b
re-enabled parallelization tests on pinned vectors
orxfun Sep 17, 2025
3a26e10
upgrade criterion dep
orxfun Sep 17, 2025
14ba446
upgrade dependencies
orxfun Sep 17, 2025
fd692fb
clean up
orxfun Sep 17, 2025
813547f
remove std dependency
orxfun Sep 17, 2025
3291955
no feature dependencies fixed
orxfun Sep 17, 2025
8bfd366
upgrade criterion dep
orxfun Sep 17, 2025
f9406a9
fix tests: checkpoint -> no-std compiles
orxfun Sep 17, 2025
10da62a
renaming
orxfun Sep 17, 2025
bcaeb09
renaming
orxfun Sep 17, 2025
39d90ba
renaming
orxfun Sep 17, 2025
e50629d
renaming
orxfun Sep 17, 2025
fb39581
renaming
orxfun Sep 17, 2025
2587833
reorganization
orxfun Sep 17, 2025
b61ea49
renaming
orxfun Sep 17, 2025
b02bf0e
renaming
orxfun Sep 17, 2025
5b01ee8
renaming
orxfun Sep 17, 2025
5abecdd
renaming
orxfun Sep 17, 2025
383c09a
document ParThreadPool
orxfun Sep 17, 2025
5d20330
documentation
orxfun Sep 17, 2025
3c76424
documentation
orxfun Sep 17, 2025
0ddb653
revise features and exports
orxfun Sep 17, 2025
ab9b0ca
std feature
orxfun Sep 17, 2025
709ab2f
set default features as std
orxfun Sep 17, 2025
be1628c
clippy fixes
orxfun Sep 17, 2025
d69f803
clippy fixes
orxfun Sep 17, 2025
ccc47c1
fmt
orxfun Sep 17, 2025
09a6f05
Merge pull request #113 from orxfun/using-computations-to-use-pool-ab…
orxfun Sep 17, 2025
22d20da
exclude rayon pool from miri tests
orxfun Sep 18, 2025
50add43
update benches script
orxfun Sep 18, 2025
8cc43e7
implement RunnerWithScopedPool
orxfun Sep 18, 2025
21cf80d
expose RunnerWithScopedPool
orxfun Sep 18, 2025
114746a
RunnerWithScopedPool tests added
orxfun Sep 18, 2025
26a69dc
implement runner with poolite pool
orxfun Sep 18, 2025
6a00f49
poolite runner tests added
orxfun Sep 18, 2025
5974c17
RunnerWithYastlPool is implemented
orxfun Sep 18, 2025
3d06102
yastl documentation
orxfun Sep 18, 2025
7160da2
yastl tests added
orxfun Sep 18, 2025
aaa7968
runner with pond pool is implemented
orxfun Sep 18, 2025
e34ed43
pond tests are added
orxfun Sep 18, 2025
2500106
pool dependency requires rayon-core, not rayon
orxfun Sep 18, 2025
0e2d384
fix double mod definition
orxfun Sep 18, 2025
aab9820
example benchmark for pools
orxfun Sep 18, 2025
3d7ad59
revise example
orxfun Sep 18, 2025
8716768
revise pools example
orxfun Sep 18, 2025
da2bf75
generic RunnerWithPool struct is introduced
orxfun Sep 18, 2025
490f886
pools bench example uses generic RunnerWithPool
orxfun Sep 18, 2025
8248ea5
refactoring to use a generic RunnerWithPool
orxfun Sep 18, 2025
75dab83
fix pool doc-tests
orxfun Sep 18, 2025
f3dc795
Document StdDefaultPool
orxfun Sep 18, 2025
ad8e76f
document SequentialPool
orxfun Sep 18, 2025
b50cf02
RunnerWithPool tests
orxfun Sep 18, 2025
32b0d30
document into_inner_pool
orxfun Sep 18, 2025
f5dc044
fix doc-test
orxfun Sep 18, 2025
1519605
clean up
orxfun Sep 18, 2025
fc7bd74
clippy fixes
orxfun Sep 18, 2025
e6681fd
fix doc
orxfun Sep 18, 2025
4a1a0ca
with_runner doc test
orxfun Sep 18, 2025
c04b8dd
with_executor is added to RunnerWithPool
orxfun Sep 18, 2025
39e4c6a
with_pool is documented
orxfun Sep 18, 2025
d8e0658
doc
orxfun Sep 18, 2025
d154682
with_pool documentation
orxfun Sep 18, 2025
288a239
with_pool transformation for fallible iterators
orxfun Sep 18, 2025
4083ea5
with_pool for optional iterator
orxfun Sep 18, 2025
2b81f83
with-pool transformation for using iterators
orxfun Sep 18, 2025
a672233
fix docs
orxfun Sep 18, 2025
51319ea
fix default
orxfun Sep 18, 2025
c3448eb
Merge pull request #114 from orxfun/new-pools
orxfun Sep 18, 2025
43b735a
clippy fix
orxfun Sep 18, 2025
5feb4d1
readme is updated for pools
orxfun Sep 18, 2025
a0c5bb9
update contributing
orxfun Sep 18, 2025
16bd926
Merge pull request #115 from orxfun/revise-readme-for-pools
orxfun Sep 18, 2025
f800454
performance note on pool performance
orxfun Sep 18, 2025
012ba2f
different thread pools are added to some of the benchmarks.
orxfun Sep 18, 2025
7466587
update readme
orxfun Sep 18, 2025
71ca681
Merge pull request #116 from orxfun/add-orx-parallel-with-different-p…
orxfun Sep 18, 2025
fc8f0ae
--all-features is added to benchmark script
orxfun Sep 18, 2025
2d6403e
increment version
orxfun Sep 19, 2025
9a2ef21
simplify yastl thread pool impl
orxfun Sep 19, 2025
0c38489
remove self-or dependency
orxfun Sep 19, 2025
517efa4
revise example parameters
orxfun Sep 19, 2025
bc409be
fix 32-bit error in test
orxfun Sep 19, 2025
4682901
version number is set
orxfun Sep 21, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .scripts/run_benchmark.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
original_bench=find
original_bench=find_iter_into_par
bench=$1

sed -i "s/$original_bench/$bench/g" Cargo.toml

rm -f benches/results/$bench.txt

cargo bench >> benches/results/$bench.txt
cargo bench --all-features >> benches/results/$bench.txt

sed -i "s/$bench/$original_bench/g" Cargo.toml
22 changes: 12 additions & 10 deletions .scripts/run_benchmarks.sh
Original file line number Diff line number Diff line change
@@ -1,36 +1,38 @@
# allBenches=(collect_filter sum)

allBenches=(
chain_collect_map
chain3_collect_map
chain4_collect_map
collect_filter
collect_filtermap
collect_flatmap
collect_iter_into_par
collect_long_chain
collect_map
collect_map_filter
collect_map_filter_hash_set
collect_result
collect_map_filter
collect_map
count_filtermap
count_flatmap
count_map
count_map_filter
count_map
drain_vec_collect_map_filter
find
find_any
find_flatmap
find_iter_into_par
find_map_filter
find
mut_for_each_iter
mut_for_each_slice
reduce
reduce_iter_into_par
reduce_long_chain
reduce_map
reduce_map_filter
sum
reduce_map
reduce
result_collect_map
result_reduce_map
sum_filtermap
sum_flatmap
sum_map_filter
sum
vec_deque_collect_map_filter
vec_deque_collect_map_filter_owned
)
Expand Down
48 changes: 31 additions & 17 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "orx-parallel"
version = "3.2.0"
version = "3.3.0"
edition = "2024"
authors = ["orxfun <orx.ugur.arikan@gmail.com>"]
readme = "README.md"
Expand All @@ -11,33 +11,47 @@ keywords = ["parallel", "concurrency", "performance", "thread", "iterator"]
categories = ["concurrency", "algorithms"]

[dependencies]
orx-pseudo-default = { version = "2.1.0", default-features = false }
orx-pinned-vec = { version = "3.17.0", default-features = false }
orx-fixed-vec = { version = "3.18.0", default-features = false }
orx-split-vec = { version = "3.18.0", default-features = false }
orx-pinned-concurrent-col = { version = "2.14.0", default-features = false }
orx-concurrent-bag = { version = "3.0.0", default-features = false }
orx-concurrent-ordered-bag = { version = "3.0.0", default-features = false }
orx-fixed-vec = { version = "3.19.0", default-features = false }
orx-split-vec = { version = "3.19.0", default-features = false }
orx-concurrent-iter = { version = "3.1.0", default-features = false }
orx-concurrent-bag = { version = "3.1.0", default-features = false }
orx-concurrent-ordered-bag = { version = "3.1.0", default-features = false }
orx-iterable = { version = "1.3.0", default-features = false }
orx-pinned-concurrent-col = { version = "2.15.0", default-features = false }
orx-priority-queue = { version = "1.7.0", default-features = false }
orx-concurrent-iter = { version = "3.1.0", default-features = false }
rayon = { version = "1.10.0", optional = true }
orx-pseudo-default = { version = "2.1.0", default-features = false }

# optional: generic iterator
rayon = { version = "1.11.0", optional = true, default-features = false }

# optional: thread pool
pond = { version = "0.3.1", optional = true, default-features = false }
poolite = { version = "0.7.1", optional = true, default-features = false }
rayon-core = { version = "1.13.0", optional = true, default-features = false }
scoped-pool = { version = "1.0.0", optional = true, default-features = false }
scoped_threadpool = { version = "0.1.9", optional = true, default-features = false }
yastl = { version = "0.1.2", optional = true, default-features = false }

[dev-dependencies]
chrono = "0.4.39"
clap = { version = "4.5.36", features = ["derive"] }
criterion = "0.5.1"
chrono = "0.4.42"
clap = { version = "4.5.47", features = ["derive"] }
criterion = "0.7.0"
orx-concurrent-option = { version = "1.5.0", default-features = false }
orx-concurrent-vec = "3.6.0"
rand = "0.9"
orx-concurrent-vec = "3.8.0"
rand = "0.9.2"
rand_chacha = "0.9"
rayon = "1.10.0"
rayon = "1.11.0"
test-case = "3.3.1"

[[bench]]
name = "find"
name = "find_iter_into_par"
harness = false

[package.metadata.docs.rs]
all-features = true

[features]
default = []
default = ["std"]
std = []
generic_iterator = ["rayon"]
100 changes: 93 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
* [Fallible Parallel Iterators](#fallible-parallel-iterators)
* [Using Mutable Variables](#using-mutable-variables)
* [Configurations](#configurations)
* [Underlying Approach and Parallel Runners](#underlying-approach-and-parallel-runners)
* [Runner: Pools and Executors](#runner-pools-and-executors)
* [Contributing](#contributing)

## Parallel Computation by Iterators
Expand Down Expand Up @@ -420,16 +420,101 @@ This is guaranteed by the fact that both consuming computation calls and configu
Additionally, maximum number of threads that can be used by parallel computations can be globally bounded by the environment variable `ORX_PARALLEL_MAX_NUM_THREADS`. Please see the corresponding [example](https://github.com/orxfun/orx-parallel/blob/main/examples/max_num_threads_config.rs) for details.


## Underlying Approach and Parallel Runners
## Runner: Pools and Executors

This crate defines parallel computation by combining two basic components.

* Pulling **inputs** in parallel is achieved through [`ConcurrentIter`](https://crates.io/crates/orx-concurrent-iter). Concurrent iterator implementations are lock-free, efficient and support pull-by-chunks optimization to reduce the parallelization overhead. A thread can pull any number of inputs from the concurrent iterator every time it becomes idle. This provides the means to dynamically decide on the chunk sizes.
* Writing **outputs** in parallel is handled using thread-safe containers such as [`ConcurrentBag`](https://crates.io/crates/orx-concurrent-bag) and [`ConcurrentOrderedBag`](https://crates.io/crates/orx-concurrent-ordered-bag). Similarly, these are lock-free collections that aim for high performance collection of results.
**Pulling inputs**
* Pulling inputs in parallel is achieved through [`ConcurrentIter`](https://crates.io/crates/orx-concurrent-iter). Concurrent iterator implementations are lock-free, efficient and support pull-by-chunks optimization to reduce the parallelization overhead. A thread can pull any number of inputs from the concurrent iterator every time it becomes idle. This provides the means to dynamically decide on the chunk sizes.
* Furthermore, this allows to reduce the overhead of defining creating tasks. To illustrate, provided that the computation will be handled by `n` threads, a closure holding a reference to the input concurrent iterator is defined to represent the computation. This same closure is passed to `n` threads; i.e., `n` spawn calls are made. Each of these threads keep pulling elements from the input until the computation is completed, without requiring to define another task.

Finally, [`ParallelRunner`](https://docs.rs/orx-parallel/latest/orx_parallel/runner/trait.ParallelRunner.html) trait manages parallelization of the given computation with desired configuration. The objective of the parallel runner is to optimize the chunk sizes to solve the tradeoff between impact of heterogeneity of individual computations and overhead of parallelization.
**Writing outputs**
* When we collect results, writing outputs is handled using lock-free containers such as [`ConcurrentBag`](https://crates.io/crates/orx-concurrent-bag) and [`ConcurrentOrderedBag`](https://crates.io/crates/orx-concurrent-ordered-bag) which aim for high performance collection of results.

Since it is a trait, parallel runner is customizable. It is possible to implement and use your *own runner* by calling [`with_runner`](https://docs.rs/orx-parallel/latest/orx_parallel/trait.ParIter.html#tymethod.with_runner) transformation method on the parallel iterator. Default parallel runner targets to be efficient in general. When we have a use case with special characteristics, we can implement a `ParallelRunner` optimized for this scenario and use with the parallel iterators.
There are two main decisions to be taken while executing these components:
* how many threads do we use?
* what is the chunk size; i.e., how many input items does a thread pull each time?

A [`ParallelRunner`](https://docs.rs/orx-parallel/latest/orx_parallel/trait.ParallelRunner) is a combination of a `ParThreadPool` and a `ParallelExecutor` that are responsible for these decisions, respectively.

### ParThreadPool: number of threads

[`ParThreadPool`](https://docs.rs/orx-parallel/latest/orx_parallel/trait.ParThreadPool) trait generalizes thread pools that can be used for parallel computations. This allows the parallel computation to be generic over thread pools.

When not explicitly set, [`DefaultPool`](https://docs.rs/orx-parallel/latest/orx_parallel/type.DefaultPool) is used:
* When **std** feature is enabled, default pool is the [`StdDefaultPool`](https://docs.rs/orx-parallel/latest/orx_parallel/struct.StdDefaultPool). In other words, all available native threads can be used by the parallel computation. This number can globally bounded by "ORX_PARALLEL_MAX_NUM_THREADS" environment variable when set.
* When working in a **no-std** environment, default pool is the [`SequentialPool`](https://docs.rs/orx-parallel/latest/orx_parallel/struct.SequentialPool). As the name suggests, this pool executes the parallel computation sequentially on the main thread. It can be considered as a placeholder to be overwritten by `with_pool` or `with_runner` methods to achieve parallelism.

*Note that thread pool defines the resource, or upper bound. This upper bound can further be bounded by the [`num_threads`](https://docs.rs/orx-parallel/latest/orx_parallel/trait.ParIter.html#tymethod.num_threads) configuration. Finally, parallel executor might choose not to use all available threads if it decides that the computation is small enough.*

To overwrite the defaults and explicitly set the thread pool to be used for the computation, [`with_pool`](https://docs.rs/orx-parallel/latest/orx_parallel/trait.ParIter.html#tymethod.with_pool) or [`with_runner`](https://docs.rs/orx-parallel/latest/orx_parallel/trait.ParIter.html#tymethod.with_runner) methods are used.

```rust
use orx_parallel::*;

let inputs: Vec<_> = (0..42).collect();

// uses the DefaultPool
// assuming "std" enabled, StdDefaultPool will be used; i.e., native threads
let sum = inputs.par().sum();

// equivalent to:
let sum2 = inputs.par().with_pool(StdDefaultPool::default()).sum();
assert_eq!(sum, sum2);

#[cfg(feature = "scoped_threadpool")]
{
let mut pool = scoped_threadpool::Pool::new(8);
// uses the scoped_threadpool::Pool created with 8 threads
let sum2 = inputs.par().with_pool(&mut pool).sum();
assert_eq!(sum, sum2);
}

#[cfg(feature = "rayon-core")]
{
let pool = rayon_core::ThreadPoolBuilder::new()
.num_threads(8)
.build()
.unwrap();
// uses the rayon-core::ThreadPool created with 8 threads
let sum2 = inputs.par().with_pool(&pool).sum();
assert_eq!(sum, sum2);
}

#[cfg(feature = "yastl")]
{
let pool = YastlPool::new(8);
// uses the yastl::Pool created with 8 threads
let sum2 = inputs.par().with_pool(&pool).sum();
assert_eq!(sum, sum2);
}
```

`ParThreadPool` implementations of several thread pools are provided in this crate as optional features (see [features](#features) section). Provided that the pool supports scoped computations, it is trivial to implement this trait in most cases (see [implementations](https://github.com/orxfun/orx-parallel/tree/main/src/runner/implementations) for examples).

In most of the cases, *rayon-core*, *scoped_threadpool* and *scoped_pool* perform better than others, and get close to native threads performance with `StdDefaultPool`.

Since parallel computations are generic over the thread pools, performances can be conveniently compared for specific use cases. Such an example benchmark can be found in [collect_filter_map](https://github.com/orxfun/orx-parallel/blob/main/benches/collect_filter_map.rs) file. To have quick tests, you may also use the example [benchmark_pools](https://github.com/orxfun/orx-parallel/blob/main/examples/benchmark_pools.rs).

### ParallelExecutor: chunk size

Once thread pool provides the computation resources, it is [`ParallelExecutor`](https://docs.rs/orx-parallel/latest/orx_parallel/trait.ParallelExecutor)'s task to distribute work to available threads. As mentioned above, all threads receive exactly the same closure. This closure continues to pull elements from the input concurrent iterator and operate on the inputs until all elements are processed.

The critical decision that parallel executor makes is the chunk size. Depending on the state of the computation, it can dynamically decide on number of elements to pull from the input iterator. The tradeoff it tries to solve is as follows:

* the larger the chunk size,
* the smaller the parallelization overhead; but also
* the larger the risk of imbalance in cases of heterogeneity.

## Features

* **std**: This is a **no-std** crate while *std* is included as a default feature. Please use `--no-default-features` flag for no-std use cases. **std** feature enables `StdDefaultPool` as the default thread provider which uses native threads.
* **rayon-core**: This feature enables using `rayon_core::ThreadPool` for parallel computations.
* **scoped_threadpool**: This feature enables using `scoped_threadpool::Pool`.
* **scoped-pool**: This feature enables using `scoped-pool::Pool`.
* **yastl**: This feature enables using `yastl::Pool`.
* **pond**: This feature enables using `pond::Pool`.
* **poolite**: This feature enables using `poolite::Pool`.

## Contributing

Expand All @@ -439,7 +524,8 @@ Please open an [issue](https://github.com/orxfun/orx-parallel/issues/new) or cre

* if you notice an error,
* have a question or think something could be improved,
* have an input collection or generator that needs to be parallelized, or
* have an input collection or generator that needs to be parallelized,
* want to use a particular thread pool with parallel iterators,
* having trouble representing a particular parallel computation with parallel iterators,
* or anything else:)

Expand Down
3 changes: 2 additions & 1 deletion benches/chain3_collect_map.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use criterion::{BenchmarkId, Criterion, black_box, criterion_group, criterion_main};
use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
use orx_parallel::*;
use orx_split_vec::SplitVec;
use rand::prelude::*;
use rand_chacha::ChaCha8Rng;
use rayon::iter::IntoParallelIterator;
use std::hint::black_box;

const TEST_LARGE_OUTPUT: bool = false;

Expand Down
3 changes: 2 additions & 1 deletion benches/chain4_collect_map.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use criterion::{BenchmarkId, Criterion, black_box, criterion_group, criterion_main};
use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
use orx_parallel::*;
use orx_split_vec::SplitVec;
use rand::prelude::*;
use rand_chacha::ChaCha8Rng;
use rayon::iter::IntoParallelIterator;
use std::hint::black_box;

const TEST_LARGE_OUTPUT: bool = false;

Expand Down
3 changes: 2 additions & 1 deletion benches/chain_collect_map.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use criterion::{BenchmarkId, Criterion, black_box, criterion_group, criterion_main};
use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
use orx_parallel::*;
use orx_split_vec::SplitVec;
use rand::prelude::*;
use rand_chacha::ChaCha8Rng;
use rayon::iter::IntoParallelIterator;
use std::hint::black_box;

const TEST_LARGE_OUTPUT: bool = false;

Expand Down
58 changes: 57 additions & 1 deletion benches/collect_filter.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use criterion::{BenchmarkId, Criterion, black_box, criterion_group, criterion_main};
use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
use orx_parallel::*;
use orx_split_vec::SplitVec;
use rand::prelude::*;
use rand_chacha::ChaCha8Rng;
use rayon::iter::IntoParallelIterator;
use std::hint::black_box;

const TEST_LARGE_OUTPUT: bool = false;

Expand Down Expand Up @@ -83,6 +84,11 @@ fn orx_into_split_vec(inputs: &[Output]) -> SplitVec<&Output> {
inputs.into_par().filter(filter).collect()
}

#[allow(dead_code)]
fn orx_into_vec_with<P: ParThreadPool>(inputs: &[Output], pool: P) -> Vec<&Output> {
inputs.into_par().with_pool(pool).filter(filter).collect()
}

fn run(c: &mut Criterion) {
let treatments = [65_536 * 2];

Expand Down Expand Up @@ -113,6 +119,56 @@ fn run(c: &mut Criterion) {
assert_eq!(&expected, &orx_into_split_vec(&input));
b.iter(|| orx_into_split_vec(black_box(&input)))
});

#[cfg(feature = "rayon-core")]
group.bench_with_input(
BenchmarkId::new("orx-vec (rayon-core::ThreadPool)", n),
n,
|b, _| {
let pool = rayon_core::ThreadPoolBuilder::new()
.num_threads(32)
.build()
.unwrap();
assert_eq!(&expected, &orx_into_vec_with(&input, &pool));
b.iter(|| orx_into_vec_with(black_box(&input), &pool))
},
);

#[cfg(feature = "scoped-pool")]
group.bench_with_input(
BenchmarkId::new("orx-vec (scoped-pool::Pool)", n),
n,
|b, _| {
let pool = scoped_pool::Pool::new(32);
assert_eq!(&expected, &orx_into_vec_with(&input, &pool));
b.iter(|| orx_into_vec_with(black_box(&input), &pool))
},
);

#[cfg(feature = "scoped_threadpool")]
group.bench_with_input(
BenchmarkId::new("orx-vec (scoped_threadpool::Pool)", n),
n,
|b, _| {
let pool = || scoped_threadpool::Pool::new(32);
assert_eq!(&expected, &orx_into_vec_with(&input, pool()));
b.iter(|| orx_into_vec_with(black_box(&input), pool()))
},
);

#[cfg(feature = "yastl")]
group.bench_with_input(BenchmarkId::new("orx-vec (yastl::Pool)", n), n, |b, _| {
let pool = YastlPool::new(32);
assert_eq!(&expected, &orx_into_vec_with(&input, &pool));
b.iter(|| orx_into_vec_with(black_box(&input), &pool))
});

#[cfg(feature = "pond")]
group.bench_with_input(BenchmarkId::new("orx-vec (pond::Pool)", n), n, |b, _| {
let pool = || PondPool::new_threads_unbounded(32);
assert_eq!(&expected, &orx_into_vec_with(&input, pool()));
b.iter(|| orx_into_vec_with(black_box(&input), pool()))
});
}

group.finish();
Expand Down
3 changes: 2 additions & 1 deletion benches/collect_filtermap.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use criterion::{BenchmarkId, Criterion, black_box, criterion_group, criterion_main};
use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
use orx_parallel::*;
use orx_split_vec::SplitVec;
use rand::prelude::*;
use rand_chacha::ChaCha8Rng;
use rayon::iter::IntoParallelIterator;
use std::hint::black_box;

const TEST_LARGE_OUTPUT: bool = false;

Expand Down
Loading