From 1cbbe3cc0fadb1bb5d2af65e54102a6233dabedd Mon Sep 17 00:00:00 2001 From: Oli Scherer Date: Wed, 11 Oct 2023 09:09:36 +0000 Subject: [PATCH 01/45] WIP --- text/0000-iter-fn.md | 207 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 207 insertions(+) create mode 100644 text/0000-iter-fn.md diff --git a/text/0000-iter-fn.md b/text/0000-iter-fn.md new file mode 100644 index 00000000000..a5d572546f6 --- /dev/null +++ b/text/0000-iter-fn.md @@ -0,0 +1,207 @@ +- Feature Name: `iter-fn` +- Start Date: 2023-10-10 +- RFC PR: [rust-lang/rfcs#0000](https://github.com/rust-lang/rfcs/pull/0000) +- Rust Issue: [rust-lang/rust#0000](https://github.com/rust-lang/rust/issues/0000) + +# Summary +[summary]: #summary + +Add `iter {}` blocks to the language. These blocks implement `Iterator` and +enable writing iterators in regular code by `yield`ing elements instead of having +to implement `Iterator` for a custom struct and manually writing an `Iterator::next` +method body. This is a change similar to adding `async {}` blocks that implement +`Future` instead of having to manually write futures and their state machines. + +Furthermore, add `iter fn` to the language. `iter fn foo(arg: X) -> Y` desugars to +`fn foo(arg: X) -> impl Iterator`. + +# Motivation +[motivation]: #motivation + +Writing iterators manually can be very painful. Many iterators can be written by +chaining `Iterator` methods, but some need to be written as a `struct` and have +`Iterator` implemented for them. Some of the code that is written this way pushes +people to instead not use iterators, but just run a `for` loop and write to mutable +state. With this RFC, you could write the `for` loop, without mutable state, and get +an iterator out of it again. + +As an example, here are three ways to write an iterator over something that contains integers, +only keep the odd integers, and multiply all of them by 2: + +```rust +// `Iterator` methods +fn odd_dup(values: impl Iterator) -> impl Iterator { + values.filter(|value| value.is_odd()).map(|value| value * 2) +} +// `struct` and manual `impl` +fn odd_dup(values: impl Iterator) -> impl Iterator { + struct Foo(T); + impl> Iterator for Foo { + type Item = u32; + fn next(&mut self) -> Option { + loop { + let value = self.0.next()?; + if value.is_odd() { + return Some(x * 2) + } + } + } + } + Foo(values) +} +// `iter block` +fn odd_dup(values: impl Iterator) -> impl Iterator { + iter { + for value in values { + if value.is_odd() { + yield value * 2; + } + } + } +} + +// `iter fn` +iter fn odd_dup(values: impl Iterator) -> u32 { + for value in values { + if value.is_odd() { + yield value * 2; + } + } +} +``` + +# Guide-level explanation +[guide-level-explanation]: #guide-level-explanation + +- Introducing new named concepts. +- Explaining the feature largely in terms of examples. +- Explaining how Rust programmers should *think* about the feature, and how it should impact the way they use Rust. It should explain the impact as concretely as possible. +- If applicable, provide sample error messages, deprecation warnings, or migration guidance. +- If applicable, describe the differences between teaching this to existing Rust programmers and new Rust programmers. +- Discuss how this impacts the ability to read, understand, and maintain Rust code. Code is read and modified far more often than written; will the proposed feature make code easier to maintain? + +For implementation-oriented RFCs (e.g. for compiler internals), this section should focus on how compiler contributors should think about the change, and give examples of its concrete impact. For policy RFCs, this section should provide an example-driven introduction to the policy, and explain its impact in concrete terms. + +## Returning/finishing an iterator + +`iter` blocks' trailing expression must be of unit type or the block must diverge before reaching its end. + +### Diverging iterators + +For example, an `iter` block that produces the sequence `0, 1, 0, 1, 0, 1, ...`, will never return `None` +from `next`, and only drop its captured data when the iterator is dropped. + +```rust +iter { + loop { + yield 0; + yield 1; + } +} +``` + +If an `iter` panics, the behavior is very similar to `return`, except that `next` doesn't return `None`, but unwinds. + +## Error handling + +Within `iter` blocks, the `?` operator desugars differently from how it desugars outside of `iter` blocks. +Instead of returning the `Err` variant, `foo?` yields the `Err` variant and then `return`s immediately afterwards. +This has the effect of it being an iterator with `Iterator::Item`'s type being `Result`, and once a `Some(Err(e))` +is produced via `?`, the iterator returns `None` next. + +`iter` blocks do not need to have a trailing `Ok(x)` expression, because returning from an `iter` block will make the `Iterator` return `None` from now, which needs no value. + +# Reference-level explanation +[reference-level-explanation]: #reference-level-explanation + +This is the technical portion of the RFC. Explain the design in sufficient detail that: + +- Its interaction with other features is clear. +- It is reasonably clear how the feature would be implemented. +- Corner cases are dissected by example. + +The section should return to the examples given in the previous section, and explain more fully how the detailed proposal makes those examples work. + +## Error handling + +`?` desugars to + +# Drawbacks +[drawbacks]: #drawbacks + +Why should we *not* do this? + +# Rationale and alternatives +[rationale-and-alternatives]: #rationale-and-alternatives + +- Why is this design the best in the space of possible designs? +- What other designs have been considered and what is the rationale for not choosing them? +- What is the impact of not doing this? +- If this is a language proposal, could this be done in a library or macro instead? Does the proposed change make Rust code easier or harder to read, understand, and maintain? + +## Keyword + +We could also use `gen` (for `generator`) as a keyword. The reason I chose `iter` in this RFC, is that people (including me) connect generators with a more powerful +scheme than just plain `Iterator`s. The `Generator` trait can do everything that `iter` blocks and `async` blocks can do, and more. I believe connecting the `Iterator` +trait with `iter` blocks is the right choice, but I also don't feel too strongly about it. + +## Non-Contextual keyword + +We could forbid `iter` from being used as an identifier anywhere. + +I believe blocking `iter` (or even just `gen`) from being used as module, type and function names is not feasible. +The standard library contains an `iter` module and many +data structures have `iter` methods implemented for them. + +## Do not do this + +The alternative is to keep adding more helper methods to `Iterator`. It is already rather hard for new Rustaceans to get a hold of all the options they have on `Iterator`. +Some such methods would also need to be very generic (not an `Iterator` example, but https://doc.rust-lang.org/std/primitive.array.html#method.try_map on arrays is something +that has very complex diagnostics that are hard to improve, even if it's nice once it works). + +Users can use crates like [`genawaiter`](https://crates.io/crates/genawaiter) instead, which work on stable and give you `gen!` blocks that behave pretty mostly +like `iter` blocks, but don't have compiler support for nice diagnostics or language support for the `?` operator. + +# Prior art +[prior-art]: #prior-art + +## Python + +Python has `iter fn`: any funciton that uses `yield` internally. +These work pretty much like the `iter` functions proposed in this PR. The main difference is that raising an +exception automatically passes the exception outwards, instead of yielding an `Err()` element. + +```python +def odd_dup(values): + for value in values: + if is_odd(value): + yield value * 2 +``` + +# Unresolved questions +[unresolved-questions]: #unresolved-questions + +- What parts of the design do you expect to resolve through the RFC process before this gets merged? +- What parts of the design do you expect to resolve through the implementation of this feature before stabilization? +- What related issues do you consider out of scope for this RFC that could be addressed in the future independently of the solution that comes out of this RFC? + +# Future possibilities +[future-possibilities]: #future-possibilities + +Think about what the natural extension and evolution of your proposal would +be and how it would affect the language and project as a whole in a holistic +way. Try to use this section as a tool to more fully consider all possible +interactions with the project and language in your proposal. +Also consider how this all fits into the roadmap for the project +and of the relevant sub-team. + +This is also a good place to "dump ideas", if they are out of scope for the +RFC you are writing but otherwise related. + +If you have tried and cannot think of any future possibilities, +you may simply state that you cannot think of anything. + +Note that having something written down in the future-possibilities section +is not a reason to accept the current or a future RFC; such notes should be +in the section on motivation or rationale in this or subsequent RFCs. +The section merely provides additional information. From 6b3726e1310fa5aa0d870b65ad621531e8bce3af Mon Sep 17 00:00:00 2001 From: Oli Scherer Date: Wed, 11 Oct 2023 11:53:39 +0000 Subject: [PATCH 02/45] iter -> gen --- text/{0000-iter-fn.md => 0000-gen-fn.md} | 58 ++++++++++++++---------- 1 file changed, 34 insertions(+), 24 deletions(-) rename text/{0000-iter-fn.md => 0000-gen-fn.md} (74%) diff --git a/text/0000-iter-fn.md b/text/0000-gen-fn.md similarity index 74% rename from text/0000-iter-fn.md rename to text/0000-gen-fn.md index a5d572546f6..04b2bb813c3 100644 --- a/text/0000-iter-fn.md +++ b/text/0000-gen-fn.md @@ -1,4 +1,4 @@ -- Feature Name: `iter-fn` +- Feature Name: `gen-fn` - Start Date: 2023-10-10 - RFC PR: [rust-lang/rfcs#0000](https://github.com/rust-lang/rfcs/pull/0000) - Rust Issue: [rust-lang/rust#0000](https://github.com/rust-lang/rust/issues/0000) @@ -6,13 +6,13 @@ # Summary [summary]: #summary -Add `iter {}` blocks to the language. These blocks implement `Iterator` and +Add `gen {}` blocks to the language. These blocks implement `Iterator` and enable writing iterators in regular code by `yield`ing elements instead of having to implement `Iterator` for a custom struct and manually writing an `Iterator::next` method body. This is a change similar to adding `async {}` blocks that implement `Future` instead of having to manually write futures and their state machines. -Furthermore, add `iter fn` to the language. `iter fn foo(arg: X) -> Y` desugars to +Furthermore, add `gen fn` to the language. `gen fn foo(arg: X) -> Y` desugars to `fn foo(arg: X) -> impl Iterator`. # Motivation @@ -49,9 +49,9 @@ fn odd_dup(values: impl Iterator) -> impl Iterator { } Foo(values) } -// `iter block` +// `gen block` fn odd_dup(values: impl Iterator) -> impl Iterator { - iter { + gen { for value in values { if value.is_odd() { yield value * 2; @@ -60,8 +60,8 @@ fn odd_dup(values: impl Iterator) -> impl Iterator { } } -// `iter fn` -iter fn odd_dup(values: impl Iterator) -> u32 { +// `gen fn` +gen fn odd_dup(values: impl Iterator) -> u32 { for value in values { if value.is_odd() { yield value * 2; @@ -82,17 +82,21 @@ iter fn odd_dup(values: impl Iterator) -> u32 { For implementation-oriented RFCs (e.g. for compiler internals), this section should focus on how compiler contributors should think about the change, and give examples of its concrete impact. For policy RFCs, this section should provide an example-driven introduction to the policy, and explain its impact in concrete terms. +## New keyword + +Starting in the 2024 edition, `gen` is a keyword that cannot be used for naming any items or bindings. This means during the migration to the 2024 edition, all variables, functions, modules, types, ... named `gen` must be renamed. + ## Returning/finishing an iterator -`iter` blocks' trailing expression must be of unit type or the block must diverge before reaching its end. +`gen` blocks' trailing expression must be of unit type or the block must diverge before reaching its end. ### Diverging iterators -For example, an `iter` block that produces the sequence `0, 1, 0, 1, 0, 1, ...`, will never return `None` +For example, an `gen` block that produces the sequence `0, 1, 0, 1, 0, 1, ...`, will never return `None` from `next`, and only drop its captured data when the iterator is dropped. ```rust -iter { +gen { loop { yield 0; yield 1; @@ -100,16 +104,16 @@ iter { } ``` -If an `iter` panics, the behavior is very similar to `return`, except that `next` doesn't return `None`, but unwinds. +If an `gen` panics, the behavior is very similar to `return`, except that `next` doesn't return `None`, but unwinds. ## Error handling -Within `iter` blocks, the `?` operator desugars differently from how it desugars outside of `iter` blocks. +Within `gen` blocks, the `?` operator desugars differently from how it desugars outside of `gen` blocks. Instead of returning the `Err` variant, `foo?` yields the `Err` variant and then `return`s immediately afterwards. This has the effect of it being an iterator with `Iterator::Item`'s type being `Result`, and once a `Some(Err(e))` is produced via `?`, the iterator returns `None` next. -`iter` blocks do not need to have a trailing `Ok(x)` expression, because returning from an `iter` block will make the `Iterator` return `None` from now, which needs no value. +`gen` blocks do not need to have a trailing `Ok(x)` expression, because returning from an `gen` block will make the `Iterator` return `None` from now, which needs no value. # Reference-level explanation [reference-level-explanation]: #reference-level-explanation @@ -141,17 +145,23 @@ Why should we *not* do this? ## Keyword -We could also use `gen` (for `generator`) as a keyword. The reason I chose `iter` in this RFC, is that people (including me) connect generators with a more powerful +We could also use `iter` as a keyword. I would prefer `iter` in because I connect generators with a more powerful scheme than just plain `Iterator`s. The `Generator` trait can do everything that `iter` blocks and `async` blocks can do, and more. I believe connecting the `Iterator` -trait with `iter` blocks is the right choice, but I also don't feel too strongly about it. +trait with `iter` blocks is the right choice, but that would require us to carve out many exceptions for this keyword, +as `iter` is used for module names and method names everywhere (including libstd/libcore). + +## Contextual keyword + +We allow `gen` as an identifier for function names and module names, without that conflicting with `gen` blocks, but that makes the syntax more complicated than necessary, for not too much gain. + +## 2021 edition -## Non-Contextual keyword +We could allow `gen` blocks on the 2021 edition via `k#gen {}` syntax. +We can allow `gen fn` on all editions. -We could forbid `iter` from being used as an identifier anywhere. +## `gen` identifiers on 2024 edition -I believe blocking `iter` (or even just `gen`) from being used as module, type and function names is not feasible. -The standard library contains an `iter` module and many -data structures have `iter` methods implemented for them. +We can allow `i#gen` identifiers in the 2024 edition in order to refer to items named `gen` in previous edition crates. ## Do not do this @@ -159,16 +169,16 @@ The alternative is to keep adding more helper methods to `Iterator`. It is alrea Some such methods would also need to be very generic (not an `Iterator` example, but https://doc.rust-lang.org/std/primitive.array.html#method.try_map on arrays is something that has very complex diagnostics that are hard to improve, even if it's nice once it works). -Users can use crates like [`genawaiter`](https://crates.io/crates/genawaiter) instead, which work on stable and give you `gen!` blocks that behave pretty mostly -like `iter` blocks, but don't have compiler support for nice diagnostics or language support for the `?` operator. +Users can use crates like [`genawagen`](https://crates.io/crates/genawagen) instead, which work on stable and give you `gen!` blocks that behave pretty mostly +like `gen` blocks, but don't have compiler support for nice diagnostics or language support for the `?` operator. # Prior art [prior-art]: #prior-art ## Python -Python has `iter fn`: any funciton that uses `yield` internally. -These work pretty much like the `iter` functions proposed in this PR. The main difference is that raising an +Python has `gen fn`: any function that uses `yield` internally. +These work pretty much like the `gen` functions proposed in this PR. The main difference is that raising an exception automatically passes the exception outwards, instead of yielding an `Err()` element. ```python From 585afb97003b14db2cf4b47cc222e852fd9bf5e0 Mon Sep 17 00:00:00 2001 From: Oli Scherer Date: Wed, 11 Oct 2023 12:02:51 +0000 Subject: [PATCH 03/45] `?` desugaring --- text/0000-gen-fn.md | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/text/0000-gen-fn.md b/text/0000-gen-fn.md index 04b2bb813c3..d6cc9efbd13 100644 --- a/text/0000-gen-fn.md +++ b/text/0000-gen-fn.md @@ -126,14 +126,28 @@ This is the technical portion of the RFC. Explain the design in sufficient detai The section should return to the examples given in the previous section, and explain more fully how the detailed proposal makes those examples work. +## New keyword + +In the 2024 edition we reserve `gen` as a keyword. Previous editions need to use `k#gen` to get the same features. + ## Error handling -`?` desugars to +`foo?` in `gen` blocks desugars to + +```rust +match foo { + Err(err) => { + yield Err(err.into()); + return; + }, + Ok(val) => val, +} +``` # Drawbacks [drawbacks]: #drawbacks -Why should we *not* do this? +It's another language feature for something that can already be written entirely in user code. # Rationale and alternatives [rationale-and-alternatives]: #rationale-and-alternatives From c905ce07a57becc03a5154b836775b8a0bb47f32 Mon Sep 17 00:00:00 2001 From: Oli Scherer Date: Wed, 11 Oct 2023 12:37:05 +0000 Subject: [PATCH 04/45] Use correct desugaring to support all `Try` types --- text/0000-gen-fn.md | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/text/0000-gen-fn.md b/text/0000-gen-fn.md index d6cc9efbd13..365234b3f1c 100644 --- a/text/0000-gen-fn.md +++ b/text/0000-gen-fn.md @@ -113,7 +113,9 @@ Instead of returning the `Err` variant, `foo?` yields the `Err` variant and then This has the effect of it being an iterator with `Iterator::Item`'s type being `Result`, and once a `Some(Err(e))` is produced via `?`, the iterator returns `None` next. -`gen` blocks do not need to have a trailing `Ok(x)` expression, because returning from an `gen` block will make the `Iterator` return `None` from now, which needs no value. +`gen` blocks do not need to have a trailing `Ok(x)` expression, because returning from an `gen` block will make the `Iterator` return `None` from now, which needs no value. Instead all `yield` operations must be given a `Result`. + +Similarly the `?` operator on `Option`s will `yield None` if it is `None`, and require passing an `Option` to all `yield` operations. # Reference-level explanation [reference-level-explanation]: #reference-level-explanation @@ -135,15 +137,18 @@ In the 2024 edition we reserve `gen` as a keyword. Previous editions need to use `foo?` in `gen` blocks desugars to ```rust -match foo { - Err(err) => { - yield Err(err.into()); +match foo.branch() { + ControlFlow::Break(err) => { + yield R::from_residual(err); return; }, - Ok(val) => val, + ControlFlow::Continue(val) => val, } ``` +which will stop iteration after the first error. This is the same behaviour that `collect::>()` performs +on any iterator over `Result`s + # Drawbacks [drawbacks]: #drawbacks From e10c2a648e0c72f8d12c54fca436318a71cb6997 Mon Sep 17 00:00:00 2001 From: Oli Scherer Date: Thu, 12 Oct 2023 08:55:28 +0000 Subject: [PATCH 05/45] Unresolved questions and future ideas --- text/0000-gen-fn.md | 48 ++++++++++++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/text/0000-gen-fn.md b/text/0000-gen-fn.md index 365234b3f1c..aa2137a17e3 100644 --- a/text/0000-gen-fn.md +++ b/text/0000-gen-fn.md @@ -214,23 +214,43 @@ def odd_dup(values): - What parts of the design do you expect to resolve through the implementation of this feature before stabilization? - What related issues do you consider out of scope for this RFC that could be addressed in the future independently of the solution that comes out of this RFC? +## Panicking + +What happens when a `gen` block that panicked gets `next` called again? Do we need to poison the iterator? + +## Fusing + +Are `gen` blocks fused? Or may they behave eratically after returning `None` the first time? + # Future possibilities [future-possibilities]: #future-possibilities -Think about what the natural extension and evolution of your proposal would -be and how it would affect the language and project as a whole in a holistic -way. Try to use this section as a tool to more fully consider all possible -interactions with the project and language in your proposal. -Also consider how this all fits into the roadmap for the project -and of the relevant sub-team. +## `yield from` (forwarding operation) + +Python has the ability to `yield from` an iterator. +Effectively this is syntax sugar for looping over all elements of the iterator and yielding them individually. +There are infinite options to choose from if we want such a feature, so I'm just going to list the general ideas below: -This is also a good place to "dump ideas", if they are out of scope for the -RFC you are writing but otherwise related. +### Do nothing, just use loops + +```rust +for x in iter { + yield x +} +``` -If you have tried and cannot think of any future possibilities, -you may simply state that you cannot think of anything. +### language support -Note that having something written down in the future-possibilities section -is not a reason to accept the current or a future RFC; such notes should be -in the section on motivation or rationale in this or subsequent RFCs. -The section merely provides additional information. +we could do something like postfix `yield` or an entirely new keyword, or... + +```rust +iter.yield +``` + +### stlib macro + +We could add a macro to the standard library and prelude, the macro would just expand to the for loop + yield. + +```rust +yield_all!(iter) +``` From 625826d09f45820052551adf872e68242ac1f72d Mon Sep 17 00:00:00 2001 From: Oli Scherer Date: Thu, 12 Oct 2023 09:02:19 +0000 Subject: [PATCH 06/45] replace-all failure --- text/0000-gen-fn.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/text/0000-gen-fn.md b/text/0000-gen-fn.md index aa2137a17e3..7fdd3e1a1c8 100644 --- a/text/0000-gen-fn.md +++ b/text/0000-gen-fn.md @@ -188,7 +188,7 @@ The alternative is to keep adding more helper methods to `Iterator`. It is alrea Some such methods would also need to be very generic (not an `Iterator` example, but https://doc.rust-lang.org/std/primitive.array.html#method.try_map on arrays is something that has very complex diagnostics that are hard to improve, even if it's nice once it works). -Users can use crates like [`genawagen`](https://crates.io/crates/genawagen) instead, which work on stable and give you `gen!` blocks that behave pretty mostly +Users can use crates like [`genawaiter`](https://crates.io/crates/genawaiter) instead, which work on stable and give you `gen!` blocks that behave pretty mostly like `gen` blocks, but don't have compiler support for nice diagnostics or language support for the `?` operator. # Prior art From 3f3799a0ba98bef8be2d747388865f350b09292b Mon Sep 17 00:00:00 2001 From: Oli Scherer Date: Thu, 12 Oct 2023 09:13:01 +0000 Subject: [PATCH 07/45] Motivation and generators --- text/0000-gen-fn.md | 47 ++++++++++++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/text/0000-gen-fn.md b/text/0000-gen-fn.md index 7fdd3e1a1c8..2d3201e36a4 100644 --- a/text/0000-gen-fn.md +++ b/text/0000-gen-fn.md @@ -18,6 +18,12 @@ Furthermore, add `gen fn` to the language. `gen fn foo(arg: X) -> Y` desugars to # Motivation [motivation]: #motivation +The main motivation of this RFC is to reserve a new keyword in the 2024 edition. +The feature used by this keyword described here should be treated as an e-RFC for +experimentation on nightly with this new keyword. I would like to avoid too much +discussion of the semantics provided here, and instead discuss the semantics during +the experimental implementation work. + Writing iterators manually can be very painful. Many iterators can be written by chaining `Iterator` methods, but some need to be written as a `struct` and have `Iterator` implemented for them. Some of the code that is written this way pushes @@ -167,21 +173,13 @@ It's another language feature for something that can already be written entirely We could also use `iter` as a keyword. I would prefer `iter` in because I connect generators with a more powerful scheme than just plain `Iterator`s. The `Generator` trait can do everything that `iter` blocks and `async` blocks can do, and more. I believe connecting the `Iterator` trait with `iter` blocks is the right choice, but that would require us to carve out many exceptions for this keyword, -as `iter` is used for module names and method names everywhere (including libstd/libcore). - -## Contextual keyword - -We allow `gen` as an identifier for function names and module names, without that conflicting with `gen` blocks, but that makes the syntax more complicated than necessary, for not too much gain. +as `iter` is used for module names and method names everywhere (including libstd/libcore). It may not be much worse than `gen` (see also [#unresolved-questions]) ## 2021 edition We could allow `gen` blocks on the 2021 edition via `k#gen {}` syntax. We can allow `gen fn` on all editions. -## `gen` identifiers on 2024 edition - -We can allow `i#gen` identifiers in the 2024 edition in order to refer to items named `gen` in previous edition crates. - ## Do not do this The alternative is to keep adding more helper methods to `Iterator`. It is already rather hard for new Rustaceans to get a hold of all the options they have on `Iterator`. @@ -210,10 +208,6 @@ def odd_dup(values): # Unresolved questions [unresolved-questions]: #unresolved-questions -- What parts of the design do you expect to resolve through the RFC process before this gets merged? -- What parts of the design do you expect to resolve through the implementation of this feature before stabilization? -- What related issues do you consider out of scope for this RFC that could be addressed in the future independently of the solution that comes out of this RFC? - ## Panicking What happens when a `gen` block that panicked gets `next` called again? Do we need to poison the iterator? @@ -222,6 +216,19 @@ What happens when a `gen` block that panicked gets `next` called again? Do we ne Are `gen` blocks fused? Or may they behave eratically after returning `None` the first time? +## Contextual keyword + +Popular crates (like `rand`) have methods called `gen` (https://docs.rs/rand/latest/rand/trait.Rng.html#method.gen). If we forbid those, we are forcing those crates to make a major version bump when they update their edition, and we are requiring any users of those crates to use `r#gen` instead of `gen` when calling that method. + +We could instead choose to use a contextual keyword and only forbid + +* bindings, +* field names (due to destructuring bindings), +* enum variants, +* and type names + +to be `gen`. This should avoid any parsing issues around `gen` followed by `{` in expressions. + # Future possibilities [future-possibilities]: #future-possibilities @@ -254,3 +261,17 @@ We could add a macro to the standard library and prelude, the macro would just e ```rust yield_all!(iter) ``` + +## Full on `Generator` support + +We already have a `Generator` trait on nightly that is much more powerful than the `Iterator` +API could possibly be. + +1. it uses `Pin<&mut Self>`, allowing self-references in the generator across yield points +2. it has arguments (`yield` returns the arguments passed to it in the subsequent invocations) + +Similar (but def not the same) to ideas around `async` closures, I think we could argue for `Generators` to be `gen` closures, +while `gen` blocks are the simpler concept that has no arguments and just captures variables. + +Either way, support for full `Generator`s should (in my opinion) be discussed and implemented separately, +as there are many more open questions around them than around just a simpler way to write `Iterator`s. From 4146a83d7b18051a4b175494b7bd6d4973f12959 Mon Sep 17 00:00:00 2001 From: Oli Scherer Date: Thu, 12 Oct 2023 09:29:10 +0000 Subject: [PATCH 08/45] Add implementation section --- text/0000-gen-fn.md | 52 +++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/text/0000-gen-fn.md b/text/0000-gen-fn.md index 2d3201e36a4..be3036e61cf 100644 --- a/text/0000-gen-fn.md +++ b/text/0000-gen-fn.md @@ -79,15 +79,6 @@ gen fn odd_dup(values: impl Iterator) -> u32 { # Guide-level explanation [guide-level-explanation]: #guide-level-explanation -- Introducing new named concepts. -- Explaining the feature largely in terms of examples. -- Explaining how Rust programmers should *think* about the feature, and how it should impact the way they use Rust. It should explain the impact as concretely as possible. -- If applicable, provide sample error messages, deprecation warnings, or migration guidance. -- If applicable, describe the differences between teaching this to existing Rust programmers and new Rust programmers. -- Discuss how this impacts the ability to read, understand, and maintain Rust code. Code is read and modified far more often than written; will the proposed feature make code easier to maintain? - -For implementation-oriented RFCs (e.g. for compiler internals), this section should focus on how compiler contributors should think about the change, and give examples of its concrete impact. For policy RFCs, this section should provide an example-driven introduction to the policy, and explain its impact in concrete terms. - ## New keyword Starting in the 2024 edition, `gen` is a keyword that cannot be used for naming any items or bindings. This means during the migration to the 2024 edition, all variables, functions, modules, types, ... named `gen` must be renamed. @@ -125,15 +116,6 @@ Similarly the `?` operator on `Option`s will `yield None` if it is `None`, and r # Reference-level explanation [reference-level-explanation]: #reference-level-explanation - -This is the technical portion of the RFC. Explain the design in sufficient detail that: - -- Its interaction with other features is clear. -- It is reasonably clear how the feature would be implemented. -- Corner cases are dissected by example. - -The section should return to the examples given in the previous section, and explain more fully how the detailed proposal makes those examples work. - ## New keyword In the 2024 edition we reserve `gen` as a keyword. Previous editions need to use `k#gen` to get the same features. @@ -155,6 +137,30 @@ match foo.branch() { which will stop iteration after the first error. This is the same behaviour that `collect::>()` performs on any iterator over `Result`s +## Implementation + +This feature is mostly implemented via existing generators, we'll just need some desugarings and then lots of work to get good diagnostics. + +### Gen fn + +`gen fn` desugars to the function itself, with its return type replaced by `impl Iterator` and its body wrapped in a `gen` block. +So a `gen fn`'s "return type" is in fact its iterator's `yield` type. + +A `gen fn` captures all lifetimes and generic parameters into the `impl Iterator` return type (just like `async fn`). +If you want more control over your captures, you'll need to use type alias impl trait when that becomes stable. + +Just like all other uses of `impl Trait`, auto traits are revealed without being specified. + +### Gen blocks + +`gen` blocks are effectively the same as an unstable generator + +* without arguments, +* with an additional check forbidding holding borrows across `yield` points, +* and an automatic `Iterator` implementation. + +We'll probably be able to modularize the generator impl and make it more robust (on the impl and diagnostics side) for the `gen` block case, but I believe the initial implementation should just be a HIR lowering to a generator and wrapping that generator in `std::iterator::from_generator`. + # Drawbacks [drawbacks]: #drawbacks @@ -162,18 +168,14 @@ It's another language feature for something that can already be written entirely # Rationale and alternatives [rationale-and-alternatives]: #rationale-and-alternatives - -- Why is this design the best in the space of possible designs? -- What other designs have been considered and what is the rationale for not choosing them? -- What is the impact of not doing this? -- If this is a language proposal, could this be done in a library or macro instead? Does the proposed change make Rust code easier or harder to read, understand, and maintain? - ## Keyword We could also use `iter` as a keyword. I would prefer `iter` in because I connect generators with a more powerful scheme than just plain `Iterator`s. The `Generator` trait can do everything that `iter` blocks and `async` blocks can do, and more. I believe connecting the `Iterator` trait with `iter` blocks is the right choice, but that would require us to carve out many exceptions for this keyword, -as `iter` is used for module names and method names everywhere (including libstd/libcore). It may not be much worse than `gen` (see also [#unresolved-questions]) +as `iter` is used for module names and method names everywhere (including libstd/libcore). It may not be much worse than `gen` (see also [#unresolved-questions]). + +One argument for `iter` is also that we may want to use `gen` for full on generators in the future. ## 2021 edition From ca7bb0118090672ba99e8ca340738f412c5a454a Mon Sep 17 00:00:00 2001 From: Oli Scherer Date: Thu, 12 Oct 2023 10:45:33 +0000 Subject: [PATCH 09/45] Some details about interactions with other features --- text/0000-gen-fn.md | 61 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/text/0000-gen-fn.md b/text/0000-gen-fn.md index be3036e61cf..4c036268b83 100644 --- a/text/0000-gen-fn.md +++ b/text/0000-gen-fn.md @@ -76,6 +76,10 @@ gen fn odd_dup(values: impl Iterator) -> u32 { } ``` +Iterators created with `gen` return `None` once they `return` (implicitly at the end of the scope or explicitly with `return`). +See [#unresolved-questions] for whether `gen` iterators are fused or may behave strangely after having returned `None` once. +Under no circumstances will it be undefined behavior if `next` is invoked again after having gotten a `None`. + # Guide-level explanation [guide-level-explanation]: #guide-level-explanation @@ -166,6 +170,10 @@ We'll probably be able to modularize the generator impl and make it more robust It's another language feature for something that can already be written entirely in user code. +In contrast to `Generator`, `gen` blocks that produce `Iterator`s cannot hold references across `yield` points. +See also https://doc.rust-lang.org/std/iter/fn.from_generator.html, which has an `Unpin` bound on the generator it takes +to produce an `Iterator`. + # Rationale and alternatives [rationale-and-alternatives]: #rationale-and-alternatives ## Keyword @@ -191,6 +199,40 @@ that has very complex diagnostics that are hard to improve, even if it's nice on Users can use crates like [`genawaiter`](https://crates.io/crates/genawaiter) instead, which work on stable and give you `gen!` blocks that behave pretty mostly like `gen` blocks, but don't have compiler support for nice diagnostics or language support for the `?` operator. +## `return` statements `yield` one last element + +Similarly to `try` blocks, trailing expresisons could yield their element. + +But then have no way to terminate iteration, as `return` statements would similarly have to have a +value that needs to get `yield`ed before terminating iteration. + +We could do something magical where returning `()` terminates the iteration, so + +```rust +gen fn foo() -> i32 { + 42 +} +``` + +could be a way to specify `std::iter::once(42)`. The issue I see with this is that + +```rust +gen fn foo() -> i32 { + 42; // note the semicolon +} +``` + +would then not return a value. + +Furthermore this would make it unclear what the behaviour of + +```rust +gen fn foo() {} +``` + +is supposed to be, as it could be either `std::iter::once(())` or `std::iter::empty::<()>()` + + # Prior art [prior-art]: #prior-art @@ -277,3 +319,22 @@ while `gen` blocks are the simpler concept that has no arguments and just captur Either way, support for full `Generator`s should (in my opinion) be discussed and implemented separately, as there are many more open questions around them than around just a simpler way to write `Iterator`s. + +## `async` interactions + +We could support using `await` in `gen` blocks, similar to how we support `?` being used within them. +This is not trivially possible due to the fact that `Iterator::next` takes `&mut self` and not `Pin<&mut self>`. + +There are a few options forward for this: + +* Add a separate trait for pinned iteration that is also usable with `gen` and `for` + * downside: very similar traits for the same thing +* backwards compatibly add a way to change the argument type of `Iterator::next` + * downside: unclear if possible +* implement `Iterator` for `Pin<&mut G>` instead of for `G` directly (whatever `G` is here, but it could be a `gen` block) + * downside: the thing being iterated over must now be pinned for the entire iteration, instead of for each iteration + +## `try` interactions + +We could allow `try gen fn foo() -> i32` to actually mean something akin to `gen fn foo() -> Result`. +Whatever we do here, it should mirror whatever `try fn` will mean in the future. From e07b76622d8ee0a1819f69ea679d07251cbd0142 Mon Sep 17 00:00:00 2001 From: Oli Scherer Date: Thu, 12 Oct 2023 10:54:12 +0000 Subject: [PATCH 10/45] Explain lack of fusing --- text/0000-gen-fn.md | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/text/0000-gen-fn.md b/text/0000-gen-fn.md index 4c036268b83..1e21d2dec89 100644 --- a/text/0000-gen-fn.md +++ b/text/0000-gen-fn.md @@ -118,6 +118,12 @@ is produced via `?`, the iterator returns `None` next. Similarly the `?` operator on `Option`s will `yield None` if it is `None`, and require passing an `Option` to all `yield` operations. +## Fusing + +Just like `Generators`, Iterators produced by `gen` panic when invoked again after they have returned `None` once. +This can probably be fixed by special casing the generator impl if `Generator::Return = ()`, as we can trivally +produce infinite values of `()` type. + # Reference-level explanation [reference-level-explanation]: #reference-level-explanation ## New keyword @@ -252,13 +258,35 @@ def odd_dup(values): # Unresolved questions [unresolved-questions]: #unresolved-questions +## Keyword + +Should we use `iter` as a keyword instead, as we're producing `Iterator`s. +We can also use `gen` like proposed in this RFC and later extend its abilities to more powerful generators. + +[playground](https://play.rust-lang.org/?version=nightly&mode=debug&edition=2021&gist=efeacb803158c2ebd57d43b4e606c0b5) + +```rust +#![feature(generators)] +#![feature(iter_from_generator)] + +fn main() { + let mut it = std::iter::from_generator(|| { + yield 1 + }); + + assert_eq!(it.next(), Some(1)); + assert_eq!(it.next(), None); + it.next(); // panics +} +``` + ## Panicking What happens when a `gen` block that panicked gets `next` called again? Do we need to poison the iterator? ## Fusing -Are `gen` blocks fused? Or may they behave eratically after returning `None` the first time? +Should we make `gen` blocks fused? Right now they'd panic (which is what the generator impl does): ## Contextual keyword From 1bb969fced1cf0c6f1421ce6c19158a132c9e315 Mon Sep 17 00:00:00 2001 From: Oli Scherer Date: Thu, 12 Oct 2023 11:04:29 +0000 Subject: [PATCH 11/45] Actually, the `async gen` troubles aren't anything beyond the `self-referential` `gen` block issues --- text/0000-gen-fn.md | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/text/0000-gen-fn.md b/text/0000-gen-fn.md index 1e21d2dec89..01d8c7ef430 100644 --- a/text/0000-gen-fn.md +++ b/text/0000-gen-fn.md @@ -350,17 +350,22 @@ as there are many more open questions around them than around just a simpler way ## `async` interactions -We could support using `await` in `gen` blocks, similar to how we support `?` being used within them. -This is not trivially possible due to the fact that `Iterator::next` takes `&mut self` and not `Pin<&mut self>`. +We could support using `await` in `async gen` blocks, similar to how we support `?` being used within `gen` blocks. +This is not possible in general due to the fact that `Iterator::next` takes `&mut self` and not `Pin<&mut self>`, but +it should be possible if no references are held across the `await` point, similar to how we disallow holding +references across `yield` points in this RFC. -There are a few options forward for this: + +## self-referential `gen` bloocks + +There are a few options forward: * Add a separate trait for pinned iteration that is also usable with `gen` and `for` * downside: very similar traits for the same thing * backwards compatibly add a way to change the argument type of `Iterator::next` * downside: unclear if possible * implement `Iterator` for `Pin<&mut G>` instead of for `G` directly (whatever `G` is here, but it could be a `gen` block) - * downside: the thing being iterated over must now be pinned for the entire iteration, instead of for each iteration + * downside: the thing being iterated over must now be pinned for the entire iteration, instead of for each invocation of `next`. ## `try` interactions From 7d06af4e66c90fe1676191b08ee51439de68dedf Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 12 Oct 2023 09:15:30 -0400 Subject: [PATCH 12/45] feedback (#2) * minor typos * intro * motivation * guide * reference * implementation * alternates * rationale * future * whitespace --- text/0000-gen-fn.md | 171 +++++++++++++++++++++++--------------------- 1 file changed, 91 insertions(+), 80 deletions(-) diff --git a/text/0000-gen-fn.md b/text/0000-gen-fn.md index 01d8c7ef430..3478a2990b5 100644 --- a/text/0000-gen-fn.md +++ b/text/0000-gen-fn.md @@ -6,11 +6,12 @@ # Summary [summary]: #summary -Add `gen {}` blocks to the language. These blocks implement `Iterator` and -enable writing iterators in regular code by `yield`ing elements instead of having -to implement `Iterator` for a custom struct and manually writing an `Iterator::next` -method body. This is a change similar to adding `async {}` blocks that implement -`Future` instead of having to manually write futures and their state machines. +Add `gen {}` blocks to the language. These implement `Iterator` by `yield`ing +elements. This is simpler and more intuitive than creating a custom type and +manually implementing `Iterator` for that type, which requires writing an +explicit `Iterator::next` method body. This is a change similar to adding `async +{}` blocks that implement `Future` instead of having to manually write futures +and their state machines. Furthermore, add `gen fn` to the language. `gen fn foo(arg: X) -> Y` desugars to `fn foo(arg: X) -> impl Iterator`. @@ -19,19 +20,19 @@ Furthermore, add `gen fn` to the language. `gen fn foo(arg: X) -> Y` desugars to [motivation]: #motivation The main motivation of this RFC is to reserve a new keyword in the 2024 edition. -The feature used by this keyword described here should be treated as an e-RFC for -experimentation on nightly with this new keyword. I would like to avoid too much -discussion of the semantics provided here, and instead discuss the semantics during -the experimental implementation work. +The feature used by the keyword described here should be treated as an e-RFC for +experimentation on nightly. I would like to avoid discussion of the semantics +provided here, deferring that discussion until during the experimental +implementation work. Writing iterators manually can be very painful. Many iterators can be written by chaining `Iterator` methods, but some need to be written as a `struct` and have -`Iterator` implemented for them. Some of the code that is written this way pushes -people to instead not use iterators, but just run a `for` loop and write to mutable -state. With this RFC, you could write the `for` loop, without mutable state, and get -an iterator out of it again. +`Iterator` implemented for them. Some of the code that is written this way +pushes people to avoid iterators and instead execute a `for` loop that eagerly +writes values to mutable state. With this RFC, one can write the `for` loop +and still get a lazy iterator of values. -As an example, here are three ways to write an iterator over something that contains integers, +As an example, here are multiple ways to write an iterator over something that contains integers, only keep the odd integers, and multiply all of them by 2: ```rust @@ -39,6 +40,7 @@ only keep the odd integers, and multiply all of them by 2: fn odd_dup(values: impl Iterator) -> impl Iterator { values.filter(|value| value.is_odd()).map(|value| value * 2) } + // `struct` and manual `impl` fn odd_dup(values: impl Iterator) -> impl Iterator { struct Foo(T); @@ -55,6 +57,7 @@ fn odd_dup(values: impl Iterator) -> impl Iterator { } Foo(values) } + // `gen block` fn odd_dup(values: impl Iterator) -> impl Iterator { gen { @@ -77,7 +80,7 @@ gen fn odd_dup(values: impl Iterator) -> u32 { ``` Iterators created with `gen` return `None` once they `return` (implicitly at the end of the scope or explicitly with `return`). -See [#unresolved-questions] for whether `gen` iterators are fused or may behave strangely after having returned `None` once. +See [the unresolved questions][#unresolved-questions] for whether `gen` iterators are fused or may behave strangely after having returned `None` once. Under no circumstances will it be undefined behavior if `next` is invoked again after having gotten a `None`. # Guide-level explanation @@ -85,15 +88,15 @@ Under no circumstances will it be undefined behavior if `next` is invoked again ## New keyword -Starting in the 2024 edition, `gen` is a keyword that cannot be used for naming any items or bindings. This means during the migration to the 2024 edition, all variables, functions, modules, types, ... named `gen` must be renamed. +Starting in the 2024 edition, `gen` is a keyword that cannot be used for naming any items or bindings. This means during the migration to the 2024 edition, all variables, functions, modules, types, ... named `gen` must be renamed. ## Returning/finishing an iterator -`gen` blocks' trailing expression must be of unit type or the block must diverge before reaching its end. +`gen` block's trailing expression must be of the unit type or the block must diverge before reaching its end. ### Diverging iterators -For example, an `gen` block that produces the sequence `0, 1, 0, 1, 0, 1, ...`, will never return `None` +For example, a `gen` block that produces the infinite sequence `0, 1, 0, 1, 0, 1, ...`, will never return `None` from `next`, and only drop its captured data when the iterator is dropped. ```rust @@ -105,34 +108,36 @@ gen { } ``` -If an `gen` panics, the behavior is very similar to `return`, except that `next` doesn't return `None`, but unwinds. +If a `gen` block panics, the behavior is very similar to `return`, except that `next` unwinds instead of returning `None`. ## Error handling Within `gen` blocks, the `?` operator desugars differently from how it desugars outside of `gen` blocks. Instead of returning the `Err` variant, `foo?` yields the `Err` variant and then `return`s immediately afterwards. -This has the effect of it being an iterator with `Iterator::Item`'s type being `Result`, and once a `Some(Err(e))` -is produced via `?`, the iterator returns `None` next. +This creates an iterator with `Iterator::Item`'s type being `Result`. +Once a `Some(Err(e))` is produced via `?`, the iterator returns `None` on the subsequent call to `Iterator::next`. -`gen` blocks do not need to have a trailing `Ok(x)` expression, because returning from an `gen` block will make the `Iterator` return `None` from now, which needs no value. Instead all `yield` operations must be given a `Result`. +`gen` blocks do not need to have a trailing `Ok(x)` expression. +Returning from a `gen` block will make the `Iterator` return `None`, which needs no value. +Instead, all `yield` operations must be given a `Result`. -Similarly the `?` operator on `Option`s will `yield None` if it is `None`, and require passing an `Option` to all `yield` operations. +The `?` operator on `Option`s will `yield None` if it is `None`, and require passing an `Option` to all `yield` operations. ## Fusing -Just like `Generators`, Iterators produced by `gen` panic when invoked again after they have returned `None` once. -This can probably be fixed by special casing the generator impl if `Generator::Return = ()`, as we can trivally -produce infinite values of `()` type. +Like `Generators`, `Iterator`s produced by `gen` panic when invoked again after they have returned `None` once. +This can probably be fixed by special casing the generator impl if `Generator::Return = ()`, as we can trivially +produce infinite values of the unit type. # Reference-level explanation [reference-level-explanation]: #reference-level-explanation ## New keyword -In the 2024 edition we reserve `gen` as a keyword. Previous editions need to use `k#gen` to get the same features. +In the 2024 edition we reserve `gen` as a keyword. Previous editions will use `k#gen` to get the same features. ## Error handling -`foo?` in `gen` blocks desugars to +`foo?` in `gen` blocks will stop iteration after the first error by desugaring to ```rust match foo.branch() { @@ -144,32 +149,33 @@ match foo.branch() { } ``` -which will stop iteration after the first error. This is the same behaviour that `collect::>()` performs -on any iterator over `Result`s +This is the same behaviour that `collect::>()` performs +on iterators over `Result`s ## Implementation -This feature is mostly implemented via existing generators, we'll just need some desugarings and then lots of work to get good diagnostics. +This feature is mostly implemented via existing generators. +We'll need additional desugarings and lots of work to get good diagnostics. -### Gen fn +### `gen fn` -`gen fn` desugars to the function itself, with its return type replaced by `impl Iterator` and its body wrapped in a `gen` block. -So a `gen fn`'s "return type" is in fact its iterator's `yield` type. +`gen fn` desugars to the function itself with the return type replaced by `impl Iterator` and its body wrapped in a `gen` block. +A `gen fn`'s "return type" is its iterator's `yield` type. A `gen fn` captures all lifetimes and generic parameters into the `impl Iterator` return type (just like `async fn`). -If you want more control over your captures, you'll need to use type alias impl trait when that becomes stable. +If more control over captures is needed, type alias impl trait can be used when it is stabilized. -Just like all other uses of `impl Trait`, auto traits are revealed without being specified. +Like other uses of `impl Trait`, auto traits are revealed without being specified. -### Gen blocks +### `gen` blocks -`gen` blocks are effectively the same as an unstable generator +`gen` blocks are the same as an unstable generator * without arguments, * with an additional check forbidding holding borrows across `yield` points, * and an automatic `Iterator` implementation. -We'll probably be able to modularize the generator impl and make it more robust (on the impl and diagnostics side) for the `gen` block case, but I believe the initial implementation should just be a HIR lowering to a generator and wrapping that generator in `std::iterator::from_generator`. +We'll probably be able to modularize the generator implementation and make it more robust on the implementation and diagnostics side for the `gen` block case, but I believe the initial implementation should be a HIR lowering to a generator and wrapping that generator in [`from_generator`][]. # Drawbacks [drawbacks]: #drawbacks @@ -177,19 +183,21 @@ We'll probably be able to modularize the generator impl and make it more robust It's another language feature for something that can already be written entirely in user code. In contrast to `Generator`, `gen` blocks that produce `Iterator`s cannot hold references across `yield` points. -See also https://doc.rust-lang.org/std/iter/fn.from_generator.html, which has an `Unpin` bound on the generator it takes -to produce an `Iterator`. +See [`from_generator`][] which has an `Unpin` bound on the generator it takes to produce an `Iterator`. + +[`from_generator`]: https://doc.rust-lang.org/std/iter/fn.from_generator.html # Rationale and alternatives [rationale-and-alternatives]: #rationale-and-alternatives ## Keyword -We could also use `iter` as a keyword. I would prefer `iter` in because I connect generators with a more powerful -scheme than just plain `Iterator`s. The `Generator` trait can do everything that `iter` blocks and `async` blocks can do, and more. I believe connecting the `Iterator` -trait with `iter` blocks is the right choice, but that would require us to carve out many exceptions for this keyword, -as `iter` is used for module names and method names everywhere (including libstd/libcore). It may not be much worse than `gen` (see also [#unresolved-questions]). - -One argument for `iter` is also that we may want to use `gen` for full on generators in the future. +We could use `iter` as the keyword. +I prefer `iter` because I connect generators with a more powerful scheme than plain `Iterator`s. +The `Generator` trait can do everything that `iter` blocks and `async` blocks can do and more. +I believe connecting the `Iterator` trait with `iter` blocks is the right choice, +but that would require us to carve out many exceptions for this keyword as `iter` is used for module names and method names everywhere (including libstd/libcore). +It may not be much worse than `gen` (see also [the unresolved questions][#unresolved-questions]). +We may want to use `gen` for full on generators in the future. ## 2021 edition @@ -198,19 +206,22 @@ We can allow `gen fn` on all editions. ## Do not do this -The alternative is to keep adding more helper methods to `Iterator`. It is already rather hard for new Rustaceans to get a hold of all the options they have on `Iterator`. -Some such methods would also need to be very generic (not an `Iterator` example, but https://doc.rust-lang.org/std/primitive.array.html#method.try_map on arrays is something -that has very complex diagnostics that are hard to improve, even if it's nice once it works). +One alternative is to keep adding more helper methods to `Iterator`. +It is already hard for new Rustaceans to be aware of all the capabilities of `Iterator`. +Some of these new methods would need to be very generic. +While it's not an `Iterator` example, [`array::try_map`][] is something that has very complex diagnostics that are hard to improve, even if it's nice once it works. -Users can use crates like [`genawaiter`](https://crates.io/crates/genawaiter) instead, which work on stable and give you `gen!` blocks that behave pretty mostly -like `gen` blocks, but don't have compiler support for nice diagnostics or language support for the `?` operator. +Users can use crates like [`genawaiter`](https://crates.io/crates/genawaiter) instead. +This crate works on stable and provides `gen!` macro blocks that behave like `gen` blocks, but don't have compiler support for nice diagnostics or language support for the `?` operator. + +[`array::try_map`]: https://doc.rust-lang.org/std/primitive.array.html#method.try_map ## `return` statements `yield` one last element -Similarly to `try` blocks, trailing expresisons could yield their element. +Similarly to `try` blocks, trailing expressions could yield their element. -But then have no way to terminate iteration, as `return` statements would similarly have to have a -value that needs to get `yield`ed before terminating iteration. +There would then be no way to terminate iteration as `return` statements would have to have a +value that is `yield`ed before terminating iteration. We could do something magical where returning `()` terminates the iteration, so @@ -238,15 +249,13 @@ gen fn foo() {} is supposed to be, as it could be either `std::iter::once(())` or `std::iter::empty::<()>()` - # Prior art [prior-art]: #prior-art ## Python -Python has `gen fn`: any function that uses `yield` internally. -These work pretty much like the `gen` functions proposed in this PR. The main difference is that raising an -exception automatically passes the exception outwards, instead of yielding an `Err()` element. +Python has equivalent functionality to `gen fn`: any function that uses `yield` internally. +The main difference is that raising an exception automatically passes the exception outwards, instead of yielding an `Err()` element. ```python def odd_dup(values): @@ -260,8 +269,8 @@ def odd_dup(values): ## Keyword -Should we use `iter` as a keyword instead, as we're producing `Iterator`s. -We can also use `gen` like proposed in this RFC and later extend its abilities to more powerful generators. +Should we use `iter` as the keyword, as we're producing `Iterator`s? +We could use `gen` as proposed in this RFC and later extend its abilities to more powerful generators. [playground](https://play.rust-lang.org/?version=nightly&mode=debug&edition=2021&gist=efeacb803158c2ebd57d43b4e606c0b5) @@ -282,24 +291,26 @@ fn main() { ## Panicking -What happens when a `gen` block that panicked gets `next` called again? Do we need to poison the iterator? +What happens when `Iterator::next` is called again on a `gen` block that panicked? Do we need to poison the iterator? ## Fusing -Should we make `gen` blocks fused? Right now they'd panic (which is what the generator impl does): +Should we make `gen` blocks fused? Right now they'd panic (which is what the generator implementation does): ## Contextual keyword -Popular crates (like `rand`) have methods called `gen` (https://docs.rs/rand/latest/rand/trait.Rng.html#method.gen). If we forbid those, we are forcing those crates to make a major version bump when they update their edition, and we are requiring any users of those crates to use `r#gen` instead of `gen` when calling that method. +Popular crates (like `rand`) have methods called [`gen`][Rng::gen]. If we forbid those, we are forcing those crates to make a major version bump when they update their edition, and we are requiring any users of those crates to use `r#gen` instead of `gen` when calling that method. -We could instead choose to use a contextual keyword and only forbid +We could choose to use a contextual keyword and only forbid `gen` in * bindings, * field names (due to destructuring bindings), * enum variants, * and type names -to be `gen`. This should avoid any parsing issues around `gen` followed by `{` in expressions. +This should avoid any parsing issues around `gen` followed by `{` in expressions. + +[Rng::gen]: https://docs.rs/rand/latest/rand/trait.Rng.html#method.gen # Future possibilities [future-possibilities]: #future-possibilities @@ -308,7 +319,7 @@ to be `gen`. This should avoid any parsing issues around `gen` followed by `{` i Python has the ability to `yield from` an iterator. Effectively this is syntax sugar for looping over all elements of the iterator and yielding them individually. -There are infinite options to choose from if we want such a feature, so I'm just going to list the general ideas below: +There are infinite options to choose from if we want such a feature, so I'm listing general ideas: ### Do nothing, just use loops @@ -318,7 +329,7 @@ for x in iter { } ``` -### language support +### Language support we could do something like postfix `yield` or an entirely new keyword, or... @@ -326,27 +337,28 @@ we could do something like postfix `yield` or an entirely new keyword, or... iter.yield ``` -### stlib macro +### stdlib macro -We could add a macro to the standard library and prelude, the macro would just expand to the for loop + yield. +We could add a macro to the standard library and prelude. +The macro would expand to a `for` loop + `yield`. ```rust yield_all!(iter) ``` -## Full on `Generator` support +## Complete `Generator` support -We already have a `Generator` trait on nightly that is much more powerful than the `Iterator` +We already have a `Generator` trait on nightly that is more powerful than the `Iterator` API could possibly be. 1. it uses `Pin<&mut Self>`, allowing self-references in the generator across yield points 2. it has arguments (`yield` returns the arguments passed to it in the subsequent invocations) -Similar (but def not the same) to ideas around `async` closures, I think we could argue for `Generators` to be `gen` closures, -while `gen` blocks are the simpler concept that has no arguments and just captures variables. +Similar to the ideas around `async` closures, +I think we could argue for `Generators` to be `gen` closures while `gen` blocks are a simpler concept that has no arguments and only captures variables. -Either way, support for full `Generator`s should (in my opinion) be discussed and implemented separately, -as there are many more open questions around them than around just a simpler way to write `Iterator`s. +Either way, support for full `Generator`s should be discussed and implemented separately, +as there are many more open questions around them beyond a simpler way to write `Iterator`s. ## `async` interactions @@ -355,8 +367,7 @@ This is not possible in general due to the fact that `Iterator::next` takes `&mu it should be possible if no references are held across the `await` point, similar to how we disallow holding references across `yield` points in this RFC. - -## self-referential `gen` bloocks +## self-referential `gen` blocks There are a few options forward: @@ -369,5 +380,5 @@ There are a few options forward: ## `try` interactions -We could allow `try gen fn foo() -> i32` to actually mean something akin to `gen fn foo() -> Result`. -Whatever we do here, it should mirror whatever `try fn` will mean in the future. +We could allow `try gen fn foo() -> i32` to mean something akin to `gen fn foo() -> Result`. +Whatever we do here, it should mirror whatever `try fn` means in the future. From 08c07d2941098ce23538fe238abdd43feb883e81 Mon Sep 17 00:00:00 2001 From: Oli Scherer Date: Fri, 13 Oct 2023 11:32:36 +0000 Subject: [PATCH 13/45] Address concerns around my phrasing of `async gen` blocks --- text/0000-gen-fn.md | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/text/0000-gen-fn.md b/text/0000-gen-fn.md index 3478a2990b5..e92beaee1e1 100644 --- a/text/0000-gen-fn.md +++ b/text/0000-gen-fn.md @@ -363,13 +363,18 @@ as there are many more open questions around them beyond a simpler way to write ## `async` interactions We could support using `await` in `async gen` blocks, similar to how we support `?` being used within `gen` blocks. -This is not possible in general due to the fact that `Iterator::next` takes `&mut self` and not `Pin<&mut self>`, but -it should be possible if no references are held across the `await` point, similar to how we disallow holding -references across `yield` points in this RFC. +We'd have similar limitations holding references held across `await` points as we do have with `yield` points. +The solution space for `async gen` is large enough that I will not explore it here. +This RFC's design is forward compatible with anything we decide on. + +At present it is only possible to have a `gen` block yield futures, but not `await` within it, similar to how +you cannot write iterators that `await`, but that return futures from `next`. ## self-referential `gen` blocks -There are a few options forward: +We can allow `gen` blocks to hold borrows across `yield` points in the future. + +There are a few options forward (though this list is probably not complete): * Add a separate trait for pinned iteration that is also usable with `gen` and `for` * downside: very similar traits for the same thing @@ -378,6 +383,8 @@ There are a few options forward: * implement `Iterator` for `Pin<&mut G>` instead of for `G` directly (whatever `G` is here, but it could be a `gen` block) * downside: the thing being iterated over must now be pinned for the entire iteration, instead of for each invocation of `next`. +This RFC is forward compatible with any such designs, so I will not explore it here. + ## `try` interactions We could allow `try gen fn foo() -> i32` to mean something akin to `gen fn foo() -> Result`. From d89eb261616996cc7a2cf5666aaff3e838261738 Mon Sep 17 00:00:00 2001 From: Oli Scherer Date: Fri, 13 Oct 2023 11:44:41 +0000 Subject: [PATCH 14/45] Address some concerns --- text/0000-gen-fn.md | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/text/0000-gen-fn.md b/text/0000-gen-fn.md index e92beaee1e1..c7353c11965 100644 --- a/text/0000-gen-fn.md +++ b/text/0000-gen-fn.md @@ -117,11 +117,19 @@ Instead of returning the `Err` variant, `foo?` yields the `Err` variant and then This creates an iterator with `Iterator::Item`'s type being `Result`. Once a `Some(Err(e))` is produced via `?`, the iterator returns `None` on the subsequent call to `Iterator::next`. -`gen` blocks do not need to have a trailing `Ok(x)` expression. +In contrast to other code where you can use `?`, `gen` blocks do not need to have a trailing `Ok(x)` or `x` expression. Returning from a `gen` block will make the `Iterator` return `None`, which needs no value. Instead, all `yield` operations must be given a `Result`. The `?` operator on `Option`s will `yield None` if it is `None`, and require passing an `Option` to all `yield` operations. +As an example: + +```rust +let x = some_option?; +yield Some(x + 1) +``` + +will yield `None` if `some_option` is `None`, but `Some(x + 1)` otherwise. ## Fusing @@ -249,6 +257,25 @@ gen fn foo() {} is supposed to be, as it could be either `std::iter::once(())` or `std::iter::empty::<()>()` +## different syntax for `gen fn`: + +There are many options to choose from, and we can decide on it while implementing the feature and before stabilization. +Some options are: + +```rust +fn foo(args) yield item +fn foo(args) yields item +fn foo(args) => item +fn* foo(args) -> item // or any of the `fn foo` variants for the item type +gen fn foo(args) // or any of the above variants for the item type +gen foo(args) // or any of the above variants for the item type +generator fn foo(args) // or any of the above variants for the item type +``` + +The design space here is very large. I propose to use `gen fn foo(args) -> item` for now as +experimental syntax, as it requries the least parser changes. Or even use `#[rustc_gen]` to require +no parser changes, but still reserve the `gen` keyword in the 2024 edition for `gen` blocks. + # Prior art [prior-art]: #prior-art From e06745e8b302b5cfb0ee38fad8be9306600d51c0 Mon Sep 17 00:00:00 2001 From: Oli Scherer Date: Fri, 13 Oct 2023 15:48:01 +0000 Subject: [PATCH 15/45] No syntax, only semantics --- text/0000-gen-fn.md | 110 ++++++++++++++++++++++---------------------- 1 file changed, 56 insertions(+), 54 deletions(-) diff --git a/text/0000-gen-fn.md b/text/0000-gen-fn.md index c7353c11965..8e2343c0bc2 100644 --- a/text/0000-gen-fn.md +++ b/text/0000-gen-fn.md @@ -6,14 +6,18 @@ # Summary [summary]: #summary -Add `gen {}` blocks to the language. These implement `Iterator` by `yield`ing +Reserve the `gen` keyword in the 2024 edition and start experimenting on +implementing generator functions and blocks that allow implementing `Iterator`s +without writing `Iterator::next` impls under placeholder syntax. + +Add `#[rustc_gen] {}` blocks to the language. These implement `Iterator` by `yield`ing elements. This is simpler and more intuitive than creating a custom type and manually implementing `Iterator` for that type, which requires writing an explicit `Iterator::next` method body. This is a change similar to adding `async {}` blocks that implement `Future` instead of having to manually write futures and their state machines. -Furthermore, add `gen fn` to the language. `gen fn foo(arg: X) -> Y` desugars to +Furthermore, add `#[rustc_gen] fn` to the language. `#[rustc_gen] fn foo(arg: X) -> Y` desugars to `fn foo(arg: X) -> impl Iterator`. # Motivation @@ -60,7 +64,7 @@ fn odd_dup(values: impl Iterator) -> impl Iterator { // `gen block` fn odd_dup(values: impl Iterator) -> impl Iterator { - gen { + #[rustc_gen] { for value in values { if value.is_odd() { yield value * 2; @@ -70,7 +74,8 @@ fn odd_dup(values: impl Iterator) -> impl Iterator { } // `gen fn` -gen fn odd_dup(values: impl Iterator) -> u32 { +#[rustc_gen] +fn odd_dup(values: impl Iterator) -> u32 { for value in values { if value.is_odd() { yield value * 2; @@ -79,8 +84,8 @@ gen fn odd_dup(values: impl Iterator) -> u32 { } ``` -Iterators created with `gen` return `None` once they `return` (implicitly at the end of the scope or explicitly with `return`). -See [the unresolved questions][#unresolved-questions] for whether `gen` iterators are fused or may behave strangely after having returned `None` once. +Iterators created with `#[rustc_gen]` return `None` once they `return` (implicitly at the end of the scope or explicitly with `return`). +See [the unresolved questions][#unresolved-questions] for whether `#[rustc_gen]` iterators are fused or may behave strangely after having returned `None` once. Under no circumstances will it be undefined behavior if `next` is invoked again after having gotten a `None`. # Guide-level explanation @@ -92,15 +97,15 @@ Starting in the 2024 edition, `gen` is a keyword that cannot be used for naming ## Returning/finishing an iterator -`gen` block's trailing expression must be of the unit type or the block must diverge before reaching its end. +`#[rustc_gen]` block's trailing expression must be of the unit type or the block must diverge before reaching its end. ### Diverging iterators -For example, a `gen` block that produces the infinite sequence `0, 1, 0, 1, 0, 1, ...`, will never return `None` +For example, a `#[rustc_gen]` block that produces the infinite sequence `0, 1, 0, 1, 0, 1, ...`, will never return `None` from `next`, and only drop its captured data when the iterator is dropped. ```rust -gen { +#[rustc_gen] { loop { yield 0; yield 1; @@ -108,17 +113,17 @@ gen { } ``` -If a `gen` block panics, the behavior is very similar to `return`, except that `next` unwinds instead of returning `None`. +If a `#[rustc_gen]` block panics, the behavior is very similar to `return`, except that `next` unwinds instead of returning `None`. ## Error handling -Within `gen` blocks, the `?` operator desugars differently from how it desugars outside of `gen` blocks. +Within `#[rustc_gen]` blocks, the `?` operator desugars differently from how it desugars outside of `#[rustc_gen]` blocks. Instead of returning the `Err` variant, `foo?` yields the `Err` variant and then `return`s immediately afterwards. This creates an iterator with `Iterator::Item`'s type being `Result`. Once a `Some(Err(e))` is produced via `?`, the iterator returns `None` on the subsequent call to `Iterator::next`. -In contrast to other code where you can use `?`, `gen` blocks do not need to have a trailing `Ok(x)` or `x` expression. -Returning from a `gen` block will make the `Iterator` return `None`, which needs no value. +In contrast to other code where you can use `?`, `#[rustc_gen]` blocks do not need to have a trailing `Ok(x)` or `x` expression. +Returning from a `#[rustc_gen]` block will make the `Iterator` return `None`, which needs no value. Instead, all `yield` operations must be given a `Result`. The `?` operator on `Option`s will `yield None` if it is `None`, and require passing an `Option` to all `yield` operations. @@ -133,8 +138,8 @@ will yield `None` if `some_option` is `None`, but `Some(x + 1)` otherwise. ## Fusing -Like `Generators`, `Iterator`s produced by `gen` panic when invoked again after they have returned `None` once. -This can probably be fixed by special casing the generator impl if `Generator::Return = ()`, as we can trivially +Like `Generators`, `Iterator`s produced by `#[rustc_gen]` panic when invoked again after they have returned `None` once. +This will probably be fixed by special casing the generator impl if `Generator::Return = ()`, as we can trivially produce infinite values of the unit type. # Reference-level explanation @@ -145,7 +150,7 @@ In the 2024 edition we reserve `gen` as a keyword. Previous editions will use `k ## Error handling -`foo?` in `gen` blocks will stop iteration after the first error by desugaring to +`foo?` in `#[rustc_gen]` blocks will stop iteration after the first error by desugaring to ```rust match foo.branch() { @@ -165,32 +170,37 @@ on iterators over `Result`s This feature is mostly implemented via existing generators. We'll need additional desugarings and lots of work to get good diagnostics. -### `gen fn` +### `#[rustc_gen] fn` -`gen fn` desugars to the function itself with the return type replaced by `impl Iterator` and its body wrapped in a `gen` block. -A `gen fn`'s "return type" is its iterator's `yield` type. +`#[rustc_gen] fn` desugars to the function itself with the return type replaced by `impl Iterator` and its body wrapped in a `#[rustc_gen]` block. +A `#[rustc_gen] fn`'s "return type" is its iterator's `yield` type. -A `gen fn` captures all lifetimes and generic parameters into the `impl Iterator` return type (just like `async fn`). +A `#[rustc_gen] fn` captures all lifetimes and #[rustc_gen]eric parameters into the `impl Iterator` return type (just like `async fn`). If more control over captures is needed, type alias impl trait can be used when it is stabilized. Like other uses of `impl Trait`, auto traits are revealed without being specified. -### `gen` blocks +### `#[rustc_gen]` blocks -`gen` blocks are the same as an unstable generator +`#[rustc_gen]` blocks are the same as an unstable generator * without arguments, * with an additional check forbidding holding borrows across `yield` points, * and an automatic `Iterator` implementation. -We'll probably be able to modularize the generator implementation and make it more robust on the implementation and diagnostics side for the `gen` block case, but I believe the initial implementation should be a HIR lowering to a generator and wrapping that generator in [`from_generator`][]. +We'll probably be able to modularize the generator implementation and make it more robust on the implementation and diagnostics side for the `#[rustc_gen]` block case, but I believe the initial implementation should be a HIR lowering to a generator and wrapping that generator in [`from_generator`][]. + +## Fusing + +Special case the generator implementation if `Generator::Return = ()` to not panic, but +repeatedly produce values of the unit type. # Drawbacks [drawbacks]: #drawbacks It's another language feature for something that can already be written entirely in user code. -In contrast to `Generator`, `gen` blocks that produce `Iterator`s cannot hold references across `yield` points. +In contrast to `Generator`, `#[rustc_gen]` blocks that produce `Iterator`s cannot hold references across `yield` points. See [`from_generator`][] which has an `Unpin` bound on the generator it takes to produce an `Iterator`. [`from_generator`]: https://doc.rust-lang.org/std/iter/fn.from_generator.html @@ -207,11 +217,6 @@ but that would require us to carve out many exceptions for this keyword as `iter It may not be much worse than `gen` (see also [the unresolved questions][#unresolved-questions]). We may want to use `gen` for full on generators in the future. -## 2021 edition - -We could allow `gen` blocks on the 2021 edition via `k#gen {}` syntax. -We can allow `gen fn` on all editions. - ## Do not do this One alternative is to keep adding more helper methods to `Iterator`. @@ -219,8 +224,9 @@ It is already hard for new Rustaceans to be aware of all the capabilities of `It Some of these new methods would need to be very generic. While it's not an `Iterator` example, [`array::try_map`][] is something that has very complex diagnostics that are hard to improve, even if it's nice once it works. -Users can use crates like [`genawaiter`](https://crates.io/crates/genawaiter) instead. -This crate works on stable and provides `gen!` macro blocks that behave like `gen` blocks, but don't have compiler support for nice diagnostics or language support for the `?` operator. +Users can use crates like [`genawaiter`](https://crates.io/crates/genawaiter) or [`propane`](https://crates.io/crates/propane) instead. +`genawaiter` works on stable and provides `gen!` macro blocks that behave like `#[rustc_gen]` blocks, but don't have compiler support for nice diagnostics or language support for the `?` operator. The `propane` crate uses the `Generator` trait from nightly and works mostly +like `#[rustc_gen]` would. [`array::try_map`]: https://doc.rust-lang.org/std/primitive.array.html#method.try_map @@ -234,7 +240,7 @@ value that is `yield`ed before terminating iteration. We could do something magical where returning `()` terminates the iteration, so ```rust -gen fn foo() -> i32 { +#[rustc_gen] fn foo() -> i32 { 42 } ``` @@ -242,7 +248,7 @@ gen fn foo() -> i32 { could be a way to specify `std::iter::once(42)`. The issue I see with this is that ```rust -gen fn foo() -> i32 { +#[rustc_gen] fn foo() -> i32 { 42; // note the semicolon } ``` @@ -252,14 +258,16 @@ would then not return a value. Furthermore this would make it unclear what the behaviour of ```rust -gen fn foo() {} +#[rustc_gen] fn foo() {} ``` is supposed to be, as it could be either `std::iter::once(())` or `std::iter::empty::<()>()` -## different syntax for `gen fn`: +## different syntax for `#[rustc_gen] fn`: -There are many options to choose from, and we can decide on it while implementing the feature and before stabilization. +This RFC explicitly picks an attribute, as that has no conflicts with any other syntax, even within macros, and +does not pick any option that may influence how experimental users think about syntax. +There are many options to choose from, and we'll have to decide on one before stabilization. Some options are: ```rust @@ -272,16 +280,14 @@ gen foo(args) // or any of the above variants for the item type generator fn foo(args) // or any of the above variants for the item type ``` -The design space here is very large. I propose to use `gen fn foo(args) -> item` for now as -experimental syntax, as it requries the least parser changes. Or even use `#[rustc_gen]` to require -no parser changes, but still reserve the `gen` keyword in the 2024 edition for `gen` blocks. +The design space here is very large, but either way, I propose to reserve the `gen` keyword. # Prior art [prior-art]: #prior-art ## Python -Python has equivalent functionality to `gen fn`: any function that uses `yield` internally. +Python has equivalent functionality to `#[rustc_gen] fn`: any function that uses `yield` internally. The main difference is that raising an exception automatically passes the exception outwards, instead of yielding an `Err()` element. ```python @@ -318,11 +324,7 @@ fn main() { ## Panicking -What happens when `Iterator::next` is called again on a `gen` block that panicked? Do we need to poison the iterator? - -## Fusing - -Should we make `gen` blocks fused? Right now they'd panic (which is what the generator implementation does): +What happens when `Iterator::next` is called again on a `#[rustc_gen]` block that panicked? Do we need to poison the iterator? ## Contextual keyword @@ -382,37 +384,37 @@ API could possibly be. 2. it has arguments (`yield` returns the arguments passed to it in the subsequent invocations) Similar to the ideas around `async` closures, -I think we could argue for `Generators` to be `gen` closures while `gen` blocks are a simpler concept that has no arguments and only captures variables. +I think we could argue for `Generators` to be `#[rustc_gen]` closures while `#[rustc_gen]` blocks are a simpler concept that has no arguments and only captures variables. Either way, support for full `Generator`s should be discussed and implemented separately, as there are many more open questions around them beyond a simpler way to write `Iterator`s. ## `async` interactions -We could support using `await` in `async gen` blocks, similar to how we support `?` being used within `gen` blocks. +We could support using `await` in `#[rustc_gen] async` blocks, similar to how we support `?` being used within `#[rustc_gen]` blocks. We'd have similar limitations holding references held across `await` points as we do have with `yield` points. -The solution space for `async gen` is large enough that I will not explore it here. +The solution space for `#[rustc_gen] async` is large enough that I will not explore it here. This RFC's design is forward compatible with anything we decide on. -At present it is only possible to have a `gen` block yield futures, but not `await` within it, similar to how +At present it is only possible to have a `#[rustc_gen]` block yield futures, but not `await` within it, similar to how you cannot write iterators that `await`, but that return futures from `next`. -## self-referential `gen` blocks +## self-referential `#[rustc_gen]` blocks -We can allow `gen` blocks to hold borrows across `yield` points in the future. +We can allow `#[rustc_gen]` blocks to hold borrows across `yield` points in the future. There are a few options forward (though this list is probably not complete): -* Add a separate trait for pinned iteration that is also usable with `gen` and `for` +* Add a separate trait for pinned iteration that is also usable with `#[rustc_gen]` and `for` * downside: very similar traits for the same thing * backwards compatibly add a way to change the argument type of `Iterator::next` * downside: unclear if possible -* implement `Iterator` for `Pin<&mut G>` instead of for `G` directly (whatever `G` is here, but it could be a `gen` block) +* implement `Iterator` for `Pin<&mut G>` instead of for `G` directly (whatever `G` is here, but it could be a `#[rustc_gen]` block) * downside: the thing being iterated over must now be pinned for the entire iteration, instead of for each invocation of `next`. This RFC is forward compatible with any such designs, so I will not explore it here. ## `try` interactions -We could allow `try gen fn foo() -> i32` to mean something akin to `gen fn foo() -> Result`. +We could allow `#[rustc_gen] try fn foo() -> i32` to mean something akin to `#[rustc_gen] fn foo() -> Result`. Whatever we do here, it should mirror whatever `try fn` means in the future. From 09c3ff1f0f7da6c13a45fa55dc639433e986f62b Mon Sep 17 00:00:00 2001 From: Oli Scherer Date: Fri, 13 Oct 2023 16:03:07 +0000 Subject: [PATCH 16/45] `Iterator::size_hint` --- text/0000-gen-fn.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/text/0000-gen-fn.md b/text/0000-gen-fn.md index 8e2343c0bc2..c64b242c927 100644 --- a/text/0000-gen-fn.md +++ b/text/0000-gen-fn.md @@ -341,6 +341,15 @@ This should avoid any parsing issues around `gen` followed by `{` in expressions [Rng::gen]: https://docs.rs/rand/latest/rand/trait.Rng.html#method.gen +## `Iterator::size_hint` + +Should we try to compute a conservative `size_hint`? This will reveal information from the body of a generator, +but at least for simple cases users will likely expect `size_hint` to not just be the default. + +## Implement other `Iterator` traits. + +Is there a possibility for implementing traits like `DoubleEndedIterator`, `ExactSizeIterator` at all? + # Future possibilities [future-possibilities]: #future-possibilities From 45ce4db98d91886917e46b6f27eeac21c6047c18 Mon Sep 17 00:00:00 2001 From: Oli Scherer Date: Fri, 13 Oct 2023 16:07:26 +0000 Subject: [PATCH 17/45] Update RFC PR id --- text/0000-gen-fn.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/text/0000-gen-fn.md b/text/0000-gen-fn.md index c64b242c927..cb410afbcba 100644 --- a/text/0000-gen-fn.md +++ b/text/0000-gen-fn.md @@ -1,6 +1,6 @@ - Feature Name: `gen-fn` - Start Date: 2023-10-10 -- RFC PR: [rust-lang/rfcs#0000](https://github.com/rust-lang/rfcs/pull/0000) +- RFC PR: [rust-lang/rfcs#3513](https://github.com/rust-lang/rfcs/pull/3513) - Rust Issue: [rust-lang/rust#0000](https://github.com/rust-lang/rust/issues/0000) # Summary From e9bbc6eff6ce78939ae9b027ed743b536be54f22 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Fri, 13 Oct 2023 18:58:25 +0000 Subject: [PATCH 18/45] Fix and improve capitalization and punctuation There were some places with erroneous or unclear punctuation and capitalization. Let's fix those and make some related typographic and linguistic improvements. --- text/0000-gen-fn.md | 66 +++++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 32 deletions(-) diff --git a/text/0000-gen-fn.md b/text/0000-gen-fn.md index cb410afbcba..974b9181aff 100644 --- a/text/0000-gen-fn.md +++ b/text/0000-gen-fn.md @@ -36,8 +36,8 @@ pushes people to avoid iterators and instead execute a `for` loop that eagerly writes values to mutable state. With this RFC, one can write the `for` loop and still get a lazy iterator of values. -As an example, here are multiple ways to write an iterator over something that contains integers, -only keep the odd integers, and multiply all of them by 2: +As an example, here are multiple ways to write an iterator over something that contains integers +while only keeping the odd integers and multiplying each by 2: ```rust // `Iterator` methods @@ -93,7 +93,7 @@ Under no circumstances will it be undefined behavior if `next` is invoked again ## New keyword -Starting in the 2024 edition, `gen` is a keyword that cannot be used for naming any items or bindings. This means during the migration to the 2024 edition, all variables, functions, modules, types, ... named `gen` must be renamed. +Starting in the 2024 edition, `gen` is a keyword that cannot be used for naming any items or bindings. This means during the migration to the 2024 edition, all variables, functions, modules, types, etc. named `gen` must be renamed. ## Returning/finishing an iterator @@ -102,7 +102,7 @@ Starting in the 2024 edition, `gen` is a keyword that cannot be used for naming ### Diverging iterators For example, a `#[rustc_gen]` block that produces the infinite sequence `0, 1, 0, 1, 0, 1, ...`, will never return `None` -from `next`, and only drop its captured data when the iterator is dropped. +from `next`, and only drop its captured data when the iterator is dropped: ```rust #[rustc_gen] { @@ -150,7 +150,7 @@ In the 2024 edition we reserve `gen` as a keyword. Previous editions will use `k ## Error handling -`foo?` in `#[rustc_gen]` blocks will stop iteration after the first error by desugaring to +`foo?` in `#[rustc_gen]` blocks will stop iteration after the first error by desugaring to: ```rust match foo.branch() { @@ -163,7 +163,7 @@ match foo.branch() { ``` This is the same behaviour that `collect::>()` performs -on iterators over `Result`s +on iterators over `Result`s. ## Implementation @@ -182,11 +182,11 @@ Like other uses of `impl Trait`, auto traits are revealed without being specifie ### `#[rustc_gen]` blocks -`#[rustc_gen]` blocks are the same as an unstable generator +`#[rustc_gen]` blocks are the same as an unstable generator... -* without arguments, -* with an additional check forbidding holding borrows across `yield` points, -* and an automatic `Iterator` implementation. +* ...without arguments, +* ...with an additional check forbidding holding borrows across `yield` points, +* ...and with an automatic `Iterator` implementation. We'll probably be able to modularize the generator implementation and make it more robust on the implementation and diagnostics side for the `#[rustc_gen]` block case, but I believe the initial implementation should be a HIR lowering to a generator and wrapping that generator in [`from_generator`][]. @@ -237,7 +237,7 @@ Similarly to `try` blocks, trailing expressions could yield their element. There would then be no way to terminate iteration as `return` statements would have to have a value that is `yield`ed before terminating iteration. -We could do something magical where returning `()` terminates the iteration, so +We could do something magical where returning `()` terminates the iteration, so this code... ```rust #[rustc_gen] fn foo() -> i32 { @@ -245,7 +245,7 @@ We could do something magical where returning `()` terminates the iteration, so } ``` -could be a way to specify `std::iter::once(42)`. The issue I see with this is that +...could be a way to specify `std::iter::once(42)`. The issue I see with this is that this... ```rust #[rustc_gen] fn foo() -> i32 { @@ -253,17 +253,17 @@ could be a way to specify `std::iter::once(42)`. The issue I see with this is th } ``` -would then not return a value. +...would then not return a value. -Furthermore this would make it unclear what the behaviour of +Furthermore this would make it unclear what the behaviour of this... ```rust #[rustc_gen] fn foo() {} ``` -is supposed to be, as it could be either `std::iter::once(())` or `std::iter::empty::<()>()` +...is supposed to be, as it could be either `std::iter::once(())` or `std::iter::empty::<()>()`. -## different syntax for `#[rustc_gen] fn`: +## Different syntax for `#[rustc_gen] fn`: This RFC explicitly picks an attribute, as that has no conflicts with any other syntax, even within macros, and does not pick any option that may influence how experimental users think about syntax. @@ -330,12 +330,12 @@ What happens when `Iterator::next` is called again on a `#[rustc_gen]` block tha Popular crates (like `rand`) have methods called [`gen`][Rng::gen]. If we forbid those, we are forcing those crates to make a major version bump when they update their edition, and we are requiring any users of those crates to use `r#gen` instead of `gen` when calling that method. -We could choose to use a contextual keyword and only forbid `gen` in +We could choose to use a contextual keyword and only forbid `gen` in: -* bindings, -* field names (due to destructuring bindings), -* enum variants, -* and type names +* bindings +* field names (due to destructuring bindings) +* enum variants +* type names This should avoid any parsing issues around `gen` followed by `{` in expressions. @@ -369,12 +369,14 @@ for x in iter { ### Language support -we could do something like postfix `yield` or an entirely new keyword, or... +We could do something like postfix `yield`: ```rust iter.yield ``` +Or we could use an entirely new keyword. + ### stdlib macro We could add a macro to the standard library and prelude. @@ -387,10 +389,10 @@ yield_all!(iter) ## Complete `Generator` support We already have a `Generator` trait on nightly that is more powerful than the `Iterator` -API could possibly be. +API could possibly be: -1. it uses `Pin<&mut Self>`, allowing self-references in the generator across yield points -2. it has arguments (`yield` returns the arguments passed to it in the subsequent invocations) +1. It uses `Pin<&mut Self>`, allowing self-references in the generator across yield points. +2. It has arguments (`yield` returns the arguments passed to it in the subsequent invocations). Similar to the ideas around `async` closures, I think we could argue for `Generators` to be `#[rustc_gen]` closures while `#[rustc_gen]` blocks are a simpler concept that has no arguments and only captures variables. @@ -408,18 +410,18 @@ This RFC's design is forward compatible with anything we decide on. At present it is only possible to have a `#[rustc_gen]` block yield futures, but not `await` within it, similar to how you cannot write iterators that `await`, but that return futures from `next`. -## self-referential `#[rustc_gen]` blocks +## Self-referential `#[rustc_gen]` blocks We can allow `#[rustc_gen]` blocks to hold borrows across `yield` points in the future. There are a few options forward (though this list is probably not complete): -* Add a separate trait for pinned iteration that is also usable with `#[rustc_gen]` and `for` - * downside: very similar traits for the same thing -* backwards compatibly add a way to change the argument type of `Iterator::next` - * downside: unclear if possible -* implement `Iterator` for `Pin<&mut G>` instead of for `G` directly (whatever `G` is here, but it could be a `#[rustc_gen]` block) - * downside: the thing being iterated over must now be pinned for the entire iteration, instead of for each invocation of `next`. +* Add a separate trait for pinned iteration that is also usable with `#[rustc_gen]` and `for`. + * *Downside*: We would have very similar traits for the same thing. +* Backward-compatibly add a way to change the argument type of `Iterator::next`. + * *Downside*: It's unclear whether this is possible. +* Implement `Iterator` for `Pin<&mut G>` instead of for `G` directly (whatever `G` is here, but it could be a `#[rustc_gen]` block). + * *Downside*: The thing being iterated over must now be pinned for the entire iteration, instead of for each invocation of `next`. This RFC is forward compatible with any such designs, so I will not explore it here. From 118b777fbd1c4833918adac47b63054da297ef28 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Fri, 13 Oct 2023 22:06:43 +0000 Subject: [PATCH 19/45] Fix links to the unresolved questions section We had said `[text][#foo]` when we had meant to say `[text][foo]`, leading to broken internal link elements when referencing the unresolved questions section. Let's fix that. --- text/0000-gen-fn.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/text/0000-gen-fn.md b/text/0000-gen-fn.md index 974b9181aff..d50078914fa 100644 --- a/text/0000-gen-fn.md +++ b/text/0000-gen-fn.md @@ -85,7 +85,7 @@ fn odd_dup(values: impl Iterator) -> u32 { ``` Iterators created with `#[rustc_gen]` return `None` once they `return` (implicitly at the end of the scope or explicitly with `return`). -See [the unresolved questions][#unresolved-questions] for whether `#[rustc_gen]` iterators are fused or may behave strangely after having returned `None` once. +See [the unresolved questions][unresolved-questions] for whether `#[rustc_gen]` iterators are fused or may behave strangely after having returned `None` once. Under no circumstances will it be undefined behavior if `next` is invoked again after having gotten a `None`. # Guide-level explanation @@ -214,7 +214,7 @@ I prefer `iter` because I connect generators with a more powerful scheme than pl The `Generator` trait can do everything that `iter` blocks and `async` blocks can do and more. I believe connecting the `Iterator` trait with `iter` blocks is the right choice, but that would require us to carve out many exceptions for this keyword as `iter` is used for module names and method names everywhere (including libstd/libcore). -It may not be much worse than `gen` (see also [the unresolved questions][#unresolved-questions]). +It may not be much worse than `gen` (see also [the unresolved questions][unresolved-questions]). We may want to use `gen` for full on generators in the future. ## Do not do this From 00335a20bf11d4224fc5ab9b1f242867c5f3f188 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Fri, 13 Oct 2023 19:32:18 +0000 Subject: [PATCH 20/45] Add section noting C# prior art The C# (CSharp) language has a `yield` statement. Let's describe this in a new prior art section. (Thanks to @scottmcm for pointing this out.) --- text/0000-gen-fn.md | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/text/0000-gen-fn.md b/text/0000-gen-fn.md index d50078914fa..3a9990b157b 100644 --- a/text/0000-gen-fn.md +++ b/text/0000-gen-fn.md @@ -297,6 +297,35 @@ def odd_dup(values): yield value * 2 ``` +## C# ## + +In C#, within an [`iterator`][c-sharp-iterators], the [`yield`][c-sharp-yield] +statement is used to either yield the next value or to stop iteration. E.g.: + +```csharp +IEnumerable OddDupUntilNegative(IEnumerable numbers) +{ + foreach (int n in numbers) + { + if (n < 0) + { + yield break; + } + else if (n % 2 == 1) + { + yield return n * 2; + } + } +} +``` + +Analogously with this RFC and with `async` blocks in Rust (but unlike `async +Task` in C#), execution of C# iterators does not start until they are +iterated. + +[c-sharp-iterators]: https://learn.microsoft.com/en-us/dotnet/csharp/iterators +[c-sharp-yield]: https://learn.microsoft.com/en-us/dotnet/csharp/language-reference/statements/yield + # Unresolved questions [unresolved-questions]: #unresolved-questions From 94096d56392a83f0c597bd43b0c8e05bd98715c9 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Fri, 13 Oct 2023 20:30:29 +0000 Subject: [PATCH 21/45] Add example and mention of `std::iter::from_fn` In the motivation, we enumerate some ways that people can create an iterator. One way we didn't demonstrate was the use of `std::iter::from_fn`. Let's do that, and let's add a mention of this in the section about the ever-present option of doing nothing. (Thanks to @estebank for pointing this out.) --- text/0000-gen-fn.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/text/0000-gen-fn.md b/text/0000-gen-fn.md index 3a9990b157b..258b7ed0b65 100644 --- a/text/0000-gen-fn.md +++ b/text/0000-gen-fn.md @@ -45,6 +45,18 @@ fn odd_dup(values: impl Iterator) -> impl Iterator { values.filter(|value| value.is_odd()).map(|value| value * 2) } +// `std::iter::from_fn` +fn odd_dup(mut values: impl Iterator) -> impl Iterator { + std::iter::from_fn(move || { + loop { + let value = values.next()?; + if value % 2 == 1 { + return Some(value * 2); + } + } + }) +} + // `struct` and manual `impl` fn odd_dup(values: impl Iterator) -> impl Iterator { struct Foo(T); @@ -228,6 +240,11 @@ Users can use crates like [`genawaiter`](https://crates.io/crates/genawaiter) or `genawaiter` works on stable and provides `gen!` macro blocks that behave like `#[rustc_gen]` blocks, but don't have compiler support for nice diagnostics or language support for the `?` operator. The `propane` crate uses the `Generator` trait from nightly and works mostly like `#[rustc_gen]` would. +The standard library includes [`std::iter::from_fn`][], which can be used in +some cases, but as we saw in the example [above][motivation], often the +improvement over writing out a manual implementation of `Iterator` is limited. + +[`std::iter::from_fn`]: https://doc.rust-lang.org/std/array/fn.from_fn.html [`array::try_map`]: https://doc.rust-lang.org/std/primitive.array.html#method.try_map ## `return` statements `yield` one last element From a8295c8c1e8882910d96cb69ff84c03f319a0eb7 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Fri, 13 Oct 2023 20:40:58 +0000 Subject: [PATCH 22/45] Clarify rules on the valid return type of a `gen` block We're specifying that `gen` blocks must diverge or return the unit type. Let's articulate exactly what that means. (Thanks to @scottmcm for pointing this out.) --- text/0000-gen-fn.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/text/0000-gen-fn.md b/text/0000-gen-fn.md index 258b7ed0b65..20f7ba8f279 100644 --- a/text/0000-gen-fn.md +++ b/text/0000-gen-fn.md @@ -109,7 +109,7 @@ Starting in the 2024 edition, `gen` is a keyword that cannot be used for naming ## Returning/finishing an iterator -`#[rustc_gen]` block's trailing expression must be of the unit type or the block must diverge before reaching its end. +`#[rustc_gen]` blocks must diverge or return the unit type. Specifically, the trailing expression must be of the unit or `!` type, and any `return` statements in the block must either be given no argument at all or given an argument of the unit or `!` type. ### Diverging iterators From e5c803c40d5b1f24022ee344cf59445284dd8183 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Fri, 13 Oct 2023 21:35:36 +0000 Subject: [PATCH 23/45] Clarify section on error handling The section on error handling discussed very specifically the behavior with `Option` and `Result` in a way that may have made the general case less clear. E.g., it did not discuss the handling of `ControlFlow`, leaving unclear whether arbitrary types that implement the `Try` trait could be used. It also did not discuss whether `From::from` was called as usual in case of short-circuiting. Let's simplify this section a bit by first discussing the desugaring, discussing the non-effect on the required type of trailing elements or arguments to `return` in the block, and discussing the effect on the type of the arguments required by `yield`. Notably, we won't discuss here the details of how `None` is returned to end iteration. This behavior is the same as when `?` is not used, and the desugaring we describe fills in all of the needed details. (Thanks to @scottmcm for pointing out the ambiguous nature of this section.) --- text/0000-gen-fn.md | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/text/0000-gen-fn.md b/text/0000-gen-fn.md index 20f7ba8f279..6f537ba62ac 100644 --- a/text/0000-gen-fn.md +++ b/text/0000-gen-fn.md @@ -129,24 +129,23 @@ If a `#[rustc_gen]` block panics, the behavior is very similar to `return`, exce ## Error handling -Within `#[rustc_gen]` blocks, the `?` operator desugars differently from how it desugars outside of `#[rustc_gen]` blocks. -Instead of returning the `Err` variant, `foo?` yields the `Err` variant and then `return`s immediately afterwards. -This creates an iterator with `Iterator::Item`'s type being `Result`. -Once a `Some(Err(e))` is produced via `?`, the iterator returns `None` on the subsequent call to `Iterator::next`. - -In contrast to other code where you can use `?`, `#[rustc_gen]` blocks do not need to have a trailing `Ok(x)` or `x` expression. -Returning from a `#[rustc_gen]` block will make the `Iterator` return `None`, which needs no value. -Instead, all `yield` operations must be given a `Result`. - -The `?` operator on `Option`s will `yield None` if it is `None`, and require passing an `Option` to all `yield` operations. -As an example: - -```rust -let x = some_option?; -yield Some(x + 1) -``` - -will yield `None` if `some_option` is `None`, but `Some(x + 1)` otherwise. +Within `#[rustc_gen]` blocks, the `?` operator desugars as follows. When its +argument returns a value indicating "do not short circuit" +(e.g. `Option::Some(..)`, `Result::Ok(..)`, `ControlFlow::Continue(..)`), that +value becomes the result of the expression as usual. When its argument +returns a value indicating that short-circuiting is desired +(e.g. `Option::None`, `Result::Err(..)`, `ControlFlow::Break(..)`), the value +is first yielded (after being converted by `From::from` as usual), then the +block returns immediately. + +Even when `?` is used within a `#[rustc_gen]` block, the block must return a +value of type unit or `!`. That is, it does not return a value of `Some(..)`, +`Ok(..)`, or `Continue(..)` as other such blocks might. + +However, note that when `?` is used within a `#[rustc_gen]` block, all `yield` +statements will need to be given an argument of a compatible type. For +example, if `None?` is used in an expression, then all `yield` statements will +need to be given arguments of type `Option`. ## Fusing From b949fec3f23269c74ab6fc6e86bca1b687959fe1 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Mon, 16 Oct 2023 05:34:28 +0000 Subject: [PATCH 24/45] Add more full history of generators in prior art Generators, iterators, and the `yield` keyword have a long history in the programming language literature. Let's describe some of this history in the prior art section and add more examples of languages with these features. --- text/0000-gen-fn.md | 343 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 328 insertions(+), 15 deletions(-) diff --git a/text/0000-gen-fn.md b/text/0000-gen-fn.md index 6f537ba62ac..f13bde8a02b 100644 --- a/text/0000-gen-fn.md +++ b/text/0000-gen-fn.md @@ -301,35 +301,209 @@ The design space here is very large, but either way, I propose to reserve the `g # Prior art [prior-art]: #prior-art +## CLU, Alphard + +The idea of generators that yield their values goes back at least as far as +the Alphard language from circa 1975 (see ["Alphard: Form and +Content"][alphard], Mary Shaw, 1981). This was later refined into the idea of +iterators in the CLU language (see ["A History of CLU"][clu-history], Barbara +Liskov, 1992 and ["CLU Reference Manual"][clu-ref], Liskov et al., 1979). + +The CLU language opened an iterator context with the `iter` keyword and +produced values with `yield` statements. E.g.: + +``` +odds = iter () yields (int) + x: int := 1 + while x <= 20 do + yield x + x := x + 2 + end +end odds +``` + +[alphard]: https://web.archive.org/web/20150926014020/http://repository.cmu.edu/cgi/viewcontent.cgi?article=1868&context=isr +[clu-history]: https://web.archive.org/web/20030917041834/http://www.lcs.mit.edu/publications/pubs/pdf/MIT-LCS-TR-561.pdf +[clu-ref]: https://web.archive.org/web/20211105171453/https://pmg.csail.mit.edu/ftp.lcs.mit.edu/pub/pclu/CLU/3.Documents/MIT-LCS-TR-225.pdf + +## Icon + +In [Icon][icon-language] (introduced circa 1977), generators are woven deeply +into the language, and any function can return a sequence of values. When done +explicitly, the `suspend` keyword is used. E.g.: + +``` +procedure range(i, j) + while i < j do { + suspend i + i +:= 1 + } + fail +end +``` + +[icon-language]: https://web.archive.org/web/20230721102710/https://www2.cs.arizona.edu/icon/ftp/doc/lb1up.pdf + ## Python -Python has equivalent functionality to `#[rustc_gen] fn`: any function that uses `yield` internally. -The main difference is that raising an exception automatically passes the exception outwards, instead of yielding an `Err()` element. +In Python, any function that contains a `yield` statement returns a +generator. E.g.: ```python -def odd_dup(values): - for value in values: - if is_odd(value): - yield value * 2 +def odd_dup(xs): + for x in xs: + if x % 2 == 1: + yield x * 2 +``` + +## ECMAScript / JavaScript + +In JavaScript, `yield` can be used within [`function*`][javascript-function*] +generator functions. E.g.: + +```javascript +function* oddDupUntilNegative(xs) { + for (const x of xs) { + if (x < 0) { + return; + } else if (x % 2 == 1) { + yield x * 2; + } + } +} +``` + +These generator functions are general coroutines. `yield` forms an expression +that returns the value passed to `next`. E.g.: + +```javascript +function* dup(x) { + while (true) { + x = yield x * 2; + } +} + +const g = dup(2); +console.assert(g.next().value === 4); +console.assert(g.next(3).value === 6); +``` + +[javascript-function*]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/function* + +## Ruby + +In Ruby, `yield` can be used with the [`Enumerator`][ruby-enumerator] class to +implement an iterator. E.g.: + +```ruby +def odd_dup_until_negative xs + Enumerator.new do |y| + xs.each do |x| + if x < 0 + return + elsif x % 2 == 1 + y.yield x * 2 + end + end + end +end ``` +Ruby also uses `yield` for a general coroutine mechanism with the +[`Fiber`][ruby-fiber] class. E.g.: + +```ruby +def dup + Fiber.new do |x| + while true + x = Fiber.yield x * 2 + end + end +end + +g = dup +4 == (g.resume 2) +6 == (g.resume 3) +``` + +[ruby-enumerator]: https://ruby-doc.org/3.2.2/Enumerator.html +[ruby-fiber]: https://ruby-doc.org/3.2.2/Fiber.html + +## Kotlin + +In Kotlin, a lazy [`Sequence`][kotlin-sequences] can be built using `sequence` +expressions and `yield`. E.g.: + +```kotlin +fun oddDup(xs: Iterable): Sequence { + return sequence { + for (x in xs) { + if (x % 2 == 1) { + yield(x * 2); + } + } + }; +} + +fun main() { + for (x in oddDup(listOf(1, 2, 3, 4, 5))) { + println(x); + } +} +``` + +[kotlin-sequences]: https://kotlinlang.org/docs/sequences.html#from-elements + +## Swift + +In Swift, [`AsyncStream`][swift-asyncstream] is used with `yield` to produce +asynchronous generators. E.g.: + +```swift +import Foundation + +let sequence = AsyncStream { k in + for x in 0..<20 { + if x % 2 == 1 { + k.yield(x * 2) + } + } + k.finish() +} + +let semaphore = DispatchSemaphore(value: 0) +Task { + for await elem in sequence { + print(elem) + } + semaphore.signal() +} +semaphore.wait() +``` + +Synchronous generators are not yet available in Swift, but [may +be][swift-sync-gen] something they are planning. + +[swift-asyncstream]: https://developer.apple.com/documentation/swift/asyncstream +[swift-sync-gen]: https://forums.swift.org/t/is-it-possible-to-make-an-iterator-that-yelds/53995/7 + ## C# ## -In C#, within an [`iterator`][c-sharp-iterators], the [`yield`][c-sharp-yield] -statement is used to either yield the next value or to stop iteration. E.g.: +In C#, within an [`iterator`][csharp-iterators], the [`yield`][csharp-yield] +statement is used to either yield the next value or to stop iteration. E.g.: ```csharp -IEnumerable OddDupUntilNegative(IEnumerable numbers) +IEnumerable OddDupUntilNegative(IEnumerable xs) { - foreach (int n in numbers) + foreach (int x in xs) { - if (n < 0) + if (x < 0) { yield break; } - else if (n % 2 == 1) + else if (x % 2 == 1) { - yield return n * 2; + yield return x * 2; } } } @@ -339,8 +513,147 @@ Analogously with this RFC and with `async` blocks in Rust (but unlike `async Task` in C#), execution of C# iterators does not start until they are iterated. -[c-sharp-iterators]: https://learn.microsoft.com/en-us/dotnet/csharp/iterators -[c-sharp-yield]: https://learn.microsoft.com/en-us/dotnet/csharp/language-reference/statements/yield +[csharp-iterators]: https://learn.microsoft.com/en-us/dotnet/csharp/iterators +[csharp-yield]: https://learn.microsoft.com/en-us/dotnet/csharp/language-reference/statements/yield + +## D + +In D, `yield` is used when constructing a +[`Generator`][dlang-generators]. E.g.: + +```dlang +import std.concurrency; +import std.stdio: writefln; + +auto odd_dup(int[] xs) { + return new Generator!int({ + foreach(x; xs) { + if (x % 2 == 1) { + yield(x * 2); + } + } + }); +} + +void main() { + auto xs = odd_dup([1, 2, 3, 4, 5]); + foreach (x; xs) { + writefln("%d", x); + } +} +``` + +As in Ruby, generators in D are built on top of a more general +[`Fiber`][dlang-fibers] class that also uses `yield`. + +[dlang-generators]: https://dlang.org/library/std/concurrency/generator.html +[dlang-fibers]: https://dlang.org/library/core/thread/fiber/fiber.html + +## Dart + +In Dart, there are both synchronous and asynchronous [generator +functions][dart-generators]. Synchronous generator functions return an +`Iteratable`. E.g.: + +```dart +Iterable oddDup(Iterable xs) sync* { + for (final x in xs) { + if (x % 2 == 1) { + yield x * 2; + } + } +} + +void main() { + oddDup(List.generate(20, (x) => x + 1)).forEach(print); +} +``` + +Asynchronous generator functions return a `Stream` object. E.g.: + +```dart +Stream oddDup(Iterable xs) async* { + for (final x in xs) { + if (x % 2 == 1) { + yield x * 2; + } + } +} + +void main() { + oddDup(List.generate(20, (x) => x + 1)).forEach(print); +} +``` + +[dart-generators]: https://dart.dev/language/functions#generators + +## F# ## + +In F#, generators can be expressed with [sequence +expressions][fsharp-sequences] using `yield`. E.g.: + +```fsharp +let oddDup xs = seq { + for x in xs do + if x % 2 = 1 then + yield x * 2 } + +for x in oddDup (seq { 1 .. 20 }) do + printfn "%d" x +``` + +[fsharp-sequences]: https://learn.microsoft.com/en-us/dotnet/fsharp/language-reference/sequences + +## Racket + +In Racket, generators can be built using [`generator`][racket-generators] and +`yield`. E.g.: + +```racket +#lang racket +(require racket/generator) + +(define (odd-dup xs) + (generator () + (for ([x xs]) + (when (odd? x) + (yield (* 2 x)))))) + +(define g (odd-dup '(1 2 3 4 5))) +(= (g) 2) +(= (g) 6) +(= (g) 10) +``` + +Note that because of the expressive power of [`call/cc`][racket-callcc] (and +continuations in general), generators can be written in Racket as a normal +library. + +[racket-callcc]: https://docs.racket-lang.org/reference/cont.html +[racket-generators]: https://docs.racket-lang.org/reference/Generators.html + +## Haskell, Idris, Clean, etc. + +In [Haskell][] (and in similar languages such as [Idris][idris-lang], +[Clean][clean-lang], etc.), all functions are lazy unless specially annotated. +Consequently, Haskell does not need a special `yield` operator. Any function +can be a generator by recursively building a list of elements that will be +lazily returned one at a time. E.g.: + +```haskell +oddDup :: (Integral x) => [x] -> [x] +oddDup [] = [] +oddDup (x:xs) + | odd x = x * 2 : oddDup xs + | otherwise = oddDup xs + +main :: IO () +main = putStrLn $ show $ take 5 $ oddDup [1..20] +``` + +[haskell]: https://www.haskell.org/ +[clean-lang]: https://wiki.clean.cs.ru.nl/Clean +[idris-lang]: https://www.idris-lang.org/ # Unresolved questions [unresolved-questions]: #unresolved-questions From c982935b9dd9ff719ccb962d646c08a482d5ce9c Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 18 Oct 2023 15:44:16 +0000 Subject: [PATCH 25/45] Switch back to using `gen` keyword throughout RFC In the 2023-10-18 T-lang triage meeting, the team decided that this RFC should settle of the question of the keyword. Consequently, let's commit to the `gen` keyword and use it throughout the document rather than using an attribute placeholder. --- text/0000-gen-fn.md | 79 ++++++++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 40 deletions(-) diff --git a/text/0000-gen-fn.md b/text/0000-gen-fn.md index f13bde8a02b..7378482abab 100644 --- a/text/0000-gen-fn.md +++ b/text/0000-gen-fn.md @@ -10,14 +10,14 @@ Reserve the `gen` keyword in the 2024 edition and start experimenting on implementing generator functions and blocks that allow implementing `Iterator`s without writing `Iterator::next` impls under placeholder syntax. -Add `#[rustc_gen] {}` blocks to the language. These implement `Iterator` by `yield`ing +Add `gen {}` blocks to the language. These implement `Iterator` by `yield`ing elements. This is simpler and more intuitive than creating a custom type and manually implementing `Iterator` for that type, which requires writing an explicit `Iterator::next` method body. This is a change similar to adding `async {}` blocks that implement `Future` instead of having to manually write futures and their state machines. -Furthermore, add `#[rustc_gen] fn` to the language. `#[rustc_gen] fn foo(arg: X) -> Y` desugars to +Furthermore, add `gen fn` to the language. `gen fn foo(arg: X) -> Y` desugars to `fn foo(arg: X) -> impl Iterator`. # Motivation @@ -76,7 +76,7 @@ fn odd_dup(values: impl Iterator) -> impl Iterator { // `gen block` fn odd_dup(values: impl Iterator) -> impl Iterator { - #[rustc_gen] { + gen { for value in values { if value.is_odd() { yield value * 2; @@ -86,8 +86,7 @@ fn odd_dup(values: impl Iterator) -> impl Iterator { } // `gen fn` -#[rustc_gen] -fn odd_dup(values: impl Iterator) -> u32 { +gen fn odd_dup(values: impl Iterator) -> u32 { for value in values { if value.is_odd() { yield value * 2; @@ -96,8 +95,8 @@ fn odd_dup(values: impl Iterator) -> u32 { } ``` -Iterators created with `#[rustc_gen]` return `None` once they `return` (implicitly at the end of the scope or explicitly with `return`). -See [the unresolved questions][unresolved-questions] for whether `#[rustc_gen]` iterators are fused or may behave strangely after having returned `None` once. +Iterators created with `gen` return `None` once they `return` (implicitly at the end of the scope or explicitly with `return`). +See [the unresolved questions][unresolved-questions] for whether `gen` iterators are fused or may behave strangely after having returned `None` once. Under no circumstances will it be undefined behavior if `next` is invoked again after having gotten a `None`. # Guide-level explanation @@ -109,15 +108,15 @@ Starting in the 2024 edition, `gen` is a keyword that cannot be used for naming ## Returning/finishing an iterator -`#[rustc_gen]` blocks must diverge or return the unit type. Specifically, the trailing expression must be of the unit or `!` type, and any `return` statements in the block must either be given no argument at all or given an argument of the unit or `!` type. +`gen` blocks must diverge or return the unit type. Specifically, the trailing expression must be of the unit or `!` type, and any `return` statements in the block must either be given no argument at all or given an argument of the unit or `!` type. ### Diverging iterators -For example, a `#[rustc_gen]` block that produces the infinite sequence `0, 1, 0, 1, 0, 1, ...`, will never return `None` +For example, a `gen` block that produces the infinite sequence `0, 1, 0, 1, 0, 1, ...`, will never return `None` from `next`, and only drop its captured data when the iterator is dropped: ```rust -#[rustc_gen] { +gen { loop { yield 0; yield 1; @@ -125,11 +124,11 @@ from `next`, and only drop its captured data when the iterator is dropped: } ``` -If a `#[rustc_gen]` block panics, the behavior is very similar to `return`, except that `next` unwinds instead of returning `None`. +If a `gen` block panics, the behavior is very similar to `return`, except that `next` unwinds instead of returning `None`. ## Error handling -Within `#[rustc_gen]` blocks, the `?` operator desugars as follows. When its +Within `gen` blocks, the `?` operator desugars as follows. When its argument returns a value indicating "do not short circuit" (e.g. `Option::Some(..)`, `Result::Ok(..)`, `ControlFlow::Continue(..)`), that value becomes the result of the expression as usual. When its argument @@ -138,18 +137,18 @@ returns a value indicating that short-circuiting is desired is first yielded (after being converted by `From::from` as usual), then the block returns immediately. -Even when `?` is used within a `#[rustc_gen]` block, the block must return a +Even when `?` is used within a `gen` block, the block must return a value of type unit or `!`. That is, it does not return a value of `Some(..)`, `Ok(..)`, or `Continue(..)` as other such blocks might. -However, note that when `?` is used within a `#[rustc_gen]` block, all `yield` +However, note that when `?` is used within a `gen` block, all `yield` statements will need to be given an argument of a compatible type. For example, if `None?` is used in an expression, then all `yield` statements will need to be given arguments of type `Option`. ## Fusing -Like `Generators`, `Iterator`s produced by `#[rustc_gen]` panic when invoked again after they have returned `None` once. +Like `Generators`, `Iterator`s produced by `gen` panic when invoked again after they have returned `None` once. This will probably be fixed by special casing the generator impl if `Generator::Return = ()`, as we can trivially produce infinite values of the unit type. @@ -161,7 +160,7 @@ In the 2024 edition we reserve `gen` as a keyword. Previous editions will use `k ## Error handling -`foo?` in `#[rustc_gen]` blocks will stop iteration after the first error by desugaring to: +`foo?` in `gen` blocks will stop iteration after the first error by desugaring to: ```rust match foo.branch() { @@ -181,25 +180,25 @@ on iterators over `Result`s. This feature is mostly implemented via existing generators. We'll need additional desugarings and lots of work to get good diagnostics. -### `#[rustc_gen] fn` +### `gen fn` -`#[rustc_gen] fn` desugars to the function itself with the return type replaced by `impl Iterator` and its body wrapped in a `#[rustc_gen]` block. -A `#[rustc_gen] fn`'s "return type" is its iterator's `yield` type. +`gen fn` desugars to the function itself with the return type replaced by `impl Iterator` and its body wrapped in a `gen` block. +A `gen fn`'s "return type" is its iterator's `yield` type. -A `#[rustc_gen] fn` captures all lifetimes and #[rustc_gen]eric parameters into the `impl Iterator` return type (just like `async fn`). +A `gen fn` captures all lifetimes and generic parameters into the `impl Iterator` return type (just like `async fn`). If more control over captures is needed, type alias impl trait can be used when it is stabilized. Like other uses of `impl Trait`, auto traits are revealed without being specified. -### `#[rustc_gen]` blocks +### `gen` blocks -`#[rustc_gen]` blocks are the same as an unstable generator... +`gen` blocks are the same as an unstable generator... * ...without arguments, * ...with an additional check forbidding holding borrows across `yield` points, * ...and with an automatic `Iterator` implementation. -We'll probably be able to modularize the generator implementation and make it more robust on the implementation and diagnostics side for the `#[rustc_gen]` block case, but I believe the initial implementation should be a HIR lowering to a generator and wrapping that generator in [`from_generator`][]. +We'll probably be able to modularize the generator implementation and make it more robust on the implementation and diagnostics side for the `gen` block case, but I believe the initial implementation should be a HIR lowering to a generator and wrapping that generator in [`from_generator`][]. ## Fusing @@ -211,7 +210,7 @@ repeatedly produce values of the unit type. It's another language feature for something that can already be written entirely in user code. -In contrast to `Generator`, `#[rustc_gen]` blocks that produce `Iterator`s cannot hold references across `yield` points. +In contrast to `Generator`, `gen` blocks that produce `Iterator`s cannot hold references across `yield` points. See [`from_generator`][] which has an `Unpin` bound on the generator it takes to produce an `Iterator`. [`from_generator`]: https://doc.rust-lang.org/std/iter/fn.from_generator.html @@ -236,8 +235,8 @@ Some of these new methods would need to be very generic. While it's not an `Iterator` example, [`array::try_map`][] is something that has very complex diagnostics that are hard to improve, even if it's nice once it works. Users can use crates like [`genawaiter`](https://crates.io/crates/genawaiter) or [`propane`](https://crates.io/crates/propane) instead. -`genawaiter` works on stable and provides `gen!` macro blocks that behave like `#[rustc_gen]` blocks, but don't have compiler support for nice diagnostics or language support for the `?` operator. The `propane` crate uses the `Generator` trait from nightly and works mostly -like `#[rustc_gen]` would. +`genawaiter` works on stable and provides `gen!` macro blocks that behave like `gen` blocks, but don't have compiler support for nice diagnostics or language support for the `?` operator. The `propane` crate uses the `Generator` trait from nightly and works mostly +like `gen` would. The standard library includes [`std::iter::from_fn`][], which can be used in some cases, but as we saw in the example [above][motivation], often the @@ -256,7 +255,7 @@ value that is `yield`ed before terminating iteration. We could do something magical where returning `()` terminates the iteration, so this code... ```rust -#[rustc_gen] fn foo() -> i32 { +gen fn foo() -> i32 { 42 } ``` @@ -264,7 +263,7 @@ We could do something magical where returning `()` terminates the iteration, so ...could be a way to specify `std::iter::once(42)`. The issue I see with this is that this... ```rust -#[rustc_gen] fn foo() -> i32 { +gen fn foo() -> i32 { 42; // note the semicolon } ``` @@ -274,12 +273,12 @@ We could do something magical where returning `()` terminates the iteration, so Furthermore this would make it unclear what the behaviour of this... ```rust -#[rustc_gen] fn foo() {} +gen fn foo() {} ``` ...is supposed to be, as it could be either `std::iter::once(())` or `std::iter::empty::<()>()`. -## Different syntax for `#[rustc_gen] fn`: +## Different syntax for `gen fn`: This RFC explicitly picks an attribute, as that has no conflicts with any other syntax, even within macros, and does not pick any option that may influence how experimental users think about syntax. @@ -682,7 +681,7 @@ fn main() { ## Panicking -What happens when `Iterator::next` is called again on a `#[rustc_gen]` block that panicked? Do we need to poison the iterator? +What happens when `Iterator::next` is called again on a `gen` block that panicked? Do we need to poison the iterator? ## Contextual keyword @@ -753,37 +752,37 @@ API could possibly be: 2. It has arguments (`yield` returns the arguments passed to it in the subsequent invocations). Similar to the ideas around `async` closures, -I think we could argue for `Generators` to be `#[rustc_gen]` closures while `#[rustc_gen]` blocks are a simpler concept that has no arguments and only captures variables. +I think we could argue for `Generators` to be `gen` closures while `gen` blocks are a simpler concept that has no arguments and only captures variables. Either way, support for full `Generator`s should be discussed and implemented separately, as there are many more open questions around them beyond a simpler way to write `Iterator`s. ## `async` interactions -We could support using `await` in `#[rustc_gen] async` blocks, similar to how we support `?` being used within `#[rustc_gen]` blocks. +We could support using `await` in `gen async` blocks, similar to how we support `?` being used within `gen` blocks. We'd have similar limitations holding references held across `await` points as we do have with `yield` points. -The solution space for `#[rustc_gen] async` is large enough that I will not explore it here. +The solution space for `gen async` is large enough that I will not explore it here. This RFC's design is forward compatible with anything we decide on. -At present it is only possible to have a `#[rustc_gen]` block yield futures, but not `await` within it, similar to how +At present it is only possible to have a `gen` block yield futures, but not `await` within it, similar to how you cannot write iterators that `await`, but that return futures from `next`. -## Self-referential `#[rustc_gen]` blocks +## Self-referential `gen` blocks -We can allow `#[rustc_gen]` blocks to hold borrows across `yield` points in the future. +We can allow `gen` blocks to hold borrows across `yield` points in the future. There are a few options forward (though this list is probably not complete): -* Add a separate trait for pinned iteration that is also usable with `#[rustc_gen]` and `for`. +* Add a separate trait for pinned iteration that is also usable with `gen` and `for`. * *Downside*: We would have very similar traits for the same thing. * Backward-compatibly add a way to change the argument type of `Iterator::next`. * *Downside*: It's unclear whether this is possible. -* Implement `Iterator` for `Pin<&mut G>` instead of for `G` directly (whatever `G` is here, but it could be a `#[rustc_gen]` block). +* Implement `Iterator` for `Pin<&mut G>` instead of for `G` directly (whatever `G` is here, but it could be a `gen` block). * *Downside*: The thing being iterated over must now be pinned for the entire iteration, instead of for each invocation of `next`. This RFC is forward compatible with any such designs, so I will not explore it here. ## `try` interactions -We could allow `#[rustc_gen] try fn foo() -> i32` to mean something akin to `#[rustc_gen] fn foo() -> Result`. +We could allow `gen try fn foo() -> i32` to mean something akin to `gen fn foo() -> Result`. Whatever we do here, it should mirror whatever `try fn` means in the future. From ffcf8811e5fd88280c23bd8225cbbce36d92542c Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 18 Oct 2023 16:50:38 +0000 Subject: [PATCH 26/45] Remove language about an experiment Some of the language in this RFC implied that this is an experiment and that another RFC would be required. This isn't how we want to frame things. Another RFC could always be required, but it's also possible that stabilization could occur on the basis of this RFC. This is a normal RFC. Let's remove all language about this being an experiment. --- text/0000-gen-fn.md | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/text/0000-gen-fn.md b/text/0000-gen-fn.md index 7378482abab..7de569ece91 100644 --- a/text/0000-gen-fn.md +++ b/text/0000-gen-fn.md @@ -6,9 +6,7 @@ # Summary [summary]: #summary -Reserve the `gen` keyword in the 2024 edition and start experimenting on -implementing generator functions and blocks that allow implementing `Iterator`s -without writing `Iterator::next` impls under placeholder syntax. +Reserve the `gen` keyword in the 2024 edition. Add `gen {}` blocks to the language. These implement `Iterator` by `yield`ing elements. This is simpler and more intuitive than creating a custom type and @@ -23,11 +21,12 @@ Furthermore, add `gen fn` to the language. `gen fn foo(arg: X) -> Y` desugars to # Motivation [motivation]: #motivation -The main motivation of this RFC is to reserve a new keyword in the 2024 edition. -The feature used by the keyword described here should be treated as an e-RFC for -experimentation on nightly. I would like to avoid discussion of the semantics -provided here, deferring that discussion until during the experimental -implementation work. +The main motivation of this RFC is to reserve a new keyword in the 2024 +edition. We will discuss the semantic questions of generators in this +document, but we do not have to settle them with this RFC. We'll describe +current thinking on the semantics, but some questions will be left open to be +answered at a later time after we gain more experience with the +implementation. Writing iterators manually can be very painful. Many iterators can be written by chaining `Iterator` methods, but some need to be written as a `struct` and have @@ -280,10 +279,8 @@ gen fn foo() {} ## Different syntax for `gen fn`: -This RFC explicitly picks an attribute, as that has no conflicts with any other syntax, even within macros, and -does not pick any option that may influence how experimental users think about syntax. -There are many options to choose from, and we'll have to decide on one before stabilization. -Some options are: +This RFC selects `gen` as the keyword. But there are other options we might +pick. Here are some alternatives: ```rust fn foo(args) yield item From b524348b8b024b48426ce4ccd2bf266e0fcd6342 Mon Sep 17 00:00:00 2001 From: Oli Scherer Date: Thu, 29 Feb 2024 08:36:16 +0000 Subject: [PATCH 27/45] Address T-lang feedback --- text/0000-gen-fn.md | 99 ++++++++++++++++----------------------------- 1 file changed, 34 insertions(+), 65 deletions(-) diff --git a/text/0000-gen-fn.md b/text/0000-gen-fn.md index 7de569ece91..a0b5e9a841f 100644 --- a/text/0000-gen-fn.md +++ b/text/0000-gen-fn.md @@ -15,9 +15,6 @@ explicit `Iterator::next` method body. This is a change similar to adding `async {}` blocks that implement `Future` instead of having to manually write futures and their state machines. -Furthermore, add `gen fn` to the language. `gen fn foo(arg: X) -> Y` desugars to -`fn foo(arg: X) -> impl Iterator`. - # Motivation [motivation]: #motivation @@ -83,31 +80,24 @@ fn odd_dup(values: impl Iterator) -> impl Iterator { } } } - -// `gen fn` -gen fn odd_dup(values: impl Iterator) -> u32 { - for value in values { - if value.is_odd() { - yield value * 2; - } - } -} ``` Iterators created with `gen` return `None` once they `return` (implicitly at the end of the scope or explicitly with `return`). -See [the unresolved questions][unresolved-questions] for whether `gen` iterators are fused or may behave strangely after having returned `None` once. -Under no circumstances will it be undefined behavior if `next` is invoked again after having gotten a `None`. +`gen` iterators are fused, so after returning `None` once, they will keep returning `None` forever. # Guide-level explanation [guide-level-explanation]: #guide-level-explanation ## New keyword -Starting in the 2024 edition, `gen` is a keyword that cannot be used for naming any items or bindings. This means during the migration to the 2024 edition, all variables, functions, modules, types, etc. named `gen` must be renamed. +Starting in the 2024 edition, `gen` is a keyword that cannot be used for naming any items or bindings. +This means during the migration to the 2024 edition, all variables, functions, modules, types, etc. named `gen` must be renamed +or be referred to via `r#gen`. ## Returning/finishing an iterator -`gen` blocks must diverge or return the unit type. Specifically, the trailing expression must be of the unit or `!` type, and any `return` statements in the block must either be given no argument at all or given an argument of the unit or `!` type. +`gen` blocks must diverge or return the unit type. +Specifically, the trailing expression must be of the unit or `!` type, and any `return` statements in the block must either be given no argument at all or given an argument of the unit or `!` type. ### Diverging iterators @@ -147,15 +137,14 @@ need to be given arguments of type `Option`. ## Fusing -Like `Generators`, `Iterator`s produced by `gen` panic when invoked again after they have returned `None` once. -This will probably be fixed by special casing the generator impl if `Generator::Return = ()`, as we can trivially -produce infinite values of the unit type. +`Iterator`s produced by `gen` keep returning `None` when invoked again after they have returned `None` once. +They do not implement `FusedIterator`, as that is not a language item, but may implement it in the future. # Reference-level explanation [reference-level-explanation]: #reference-level-explanation ## New keyword -In the 2024 edition we reserve `gen` as a keyword. Previous editions will use `k#gen` to get the same features. +In the 2024 edition we reserve `gen` as a keyword. Previous editions will use `r#gen` to get the same features. ## Error handling @@ -176,18 +165,7 @@ on iterators over `Result`s. ## Implementation -This feature is mostly implemented via existing generators. -We'll need additional desugarings and lots of work to get good diagnostics. - -### `gen fn` - -`gen fn` desugars to the function itself with the return type replaced by `impl Iterator` and its body wrapped in a `gen` block. -A `gen fn`'s "return type" is its iterator's `yield` type. - -A `gen fn` captures all lifetimes and generic parameters into the `impl Iterator` return type (just like `async fn`). -If more control over captures is needed, type alias impl trait can be used when it is stabilized. - -Like other uses of `impl Trait`, auto traits are revealed without being specified. +This feature is mostly implemented via existing generators, though there are some special cases. ### `gen` blocks @@ -196,13 +174,7 @@ Like other uses of `impl Trait`, auto traits are revealed without being specifie * ...without arguments, * ...with an additional check forbidding holding borrows across `yield` points, * ...and with an automatic `Iterator` implementation. - -We'll probably be able to modularize the generator implementation and make it more robust on the implementation and diagnostics side for the `gen` block case, but I believe the initial implementation should be a HIR lowering to a generator and wrapping that generator in [`from_generator`][]. - -## Fusing - -Special case the generator implementation if `Generator::Return = ()` to not panic, but -repeatedly produce values of the unit type. +* ...do not panic if invoked again after returning # Drawbacks [drawbacks]: #drawbacks @@ -212,6 +184,8 @@ It's another language feature for something that can already be written entirely In contrast to `Generator`, `gen` blocks that produce `Iterator`s cannot hold references across `yield` points. See [`from_generator`][] which has an `Unpin` bound on the generator it takes to produce an `Iterator`. +The `gen` keyword causes some fallout in the community, mostly around the `rand` crate, which has `gen` methods on its traits. + [`from_generator`]: https://doc.rust-lang.org/std/iter/fn.from_generator.html # Rationale and alternatives @@ -254,16 +228,16 @@ value that is `yield`ed before terminating iteration. We could do something magical where returning `()` terminates the iteration, so this code... ```rust -gen fn foo() -> i32 { - 42 +fn foo() -> impl Iterator { + gen { 42 } } ``` ...could be a way to specify `std::iter::once(42)`. The issue I see with this is that this... ```rust -gen fn foo() -> i32 { - 42; // note the semicolon +fn foo() -> impl Iterator { + gen { 42; } // note the semicolon } ``` @@ -272,28 +246,11 @@ gen fn foo() -> i32 { Furthermore this would make it unclear what the behaviour of this... ```rust -gen fn foo() {} +fn foo() -> impl Iterator { gen {} } ``` ...is supposed to be, as it could be either `std::iter::once(())` or `std::iter::empty::<()>()`. -## Different syntax for `gen fn`: - -This RFC selects `gen` as the keyword. But there are other options we might -pick. Here are some alternatives: - -```rust -fn foo(args) yield item -fn foo(args) yields item -fn foo(args) => item -fn* foo(args) -> item // or any of the `fn foo` variants for the item type -gen fn foo(args) // or any of the above variants for the item type -gen foo(args) // or any of the above variants for the item type -generator fn foo(args) // or any of the above variants for the item type -``` - -The design space here is very large, but either way, I propose to reserve the `gen` keyword. - # Prior art [prior-art]: #prior-art @@ -676,10 +633,6 @@ fn main() { } ``` -## Panicking - -What happens when `Iterator::next` is called again on a `gen` block that panicked? Do we need to poison the iterator? - ## Contextual keyword Popular crates (like `rand`) have methods called [`gen`][Rng::gen]. If we forbid those, we are forcing those crates to make a major version bump when they update their edition, and we are requiring any users of those crates to use `r#gen` instead of `gen` when calling that method. @@ -699,6 +652,7 @@ This should avoid any parsing issues around `gen` followed by `{` in expressions Should we try to compute a conservative `size_hint`? This will reveal information from the body of a generator, but at least for simple cases users will likely expect `size_hint` to not just be the default. +It is backwards compatible to later add support for opportunistically implementing `size_hint`. ## Implement other `Iterator` traits. @@ -783,3 +737,18 @@ This RFC is forward compatible with any such designs, so I will not explore it h We could allow `gen try fn foo() -> i32` to mean something akin to `gen fn foo() -> Result`. Whatever we do here, it should mirror whatever `try fn` means in the future. + +## `gen fn`: + +This does not introduce `gen fn`. The syntax design for them is fairly large +and there are open questions around the difference between returning or yielding a type. + +```rust +fn foo(args) yield item +fn foo(args) yields item +fn foo(args) => item +fn* foo(args) -> item // or any of the `fn foo` variants for the item type +gen fn foo(args) // or any of the above variants for the item type +gen foo(args) // or any of the above variants for the item type +generator fn foo(args) // or any of the above variants for the item type +``` From 349c517d81d965762d62e786914c46d1a0de64ec Mon Sep 17 00:00:00 2001 From: Oli Scherer Date: Thu, 29 Feb 2024 08:45:44 +0000 Subject: [PATCH 28/45] Give some examples of self-referential generators and why they don't work. --- text/0000-gen-fn.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/text/0000-gen-fn.md b/text/0000-gen-fn.md index a0b5e9a841f..2cff0a5a52b 100644 --- a/text/0000-gen-fn.md +++ b/text/0000-gen-fn.md @@ -140,6 +140,32 @@ need to be given arguments of type `Option`. `Iterator`s produced by `gen` keep returning `None` when invoked again after they have returned `None` once. They do not implement `FusedIterator`, as that is not a language item, but may implement it in the future. +## Holding borrows across yields + +Since the `Iterator::next` method takes `&mut self` instead of `Pin<&mut self>`, we cannot create self-referential +`gen` blocks. Self-referential `gen` blocks occur when you hold a borrow to a local variable across a yield point: + +```rust +gen { + let x = vec![1, 2, 3, 4]; + let mut y = x.iter(); + yield y.next(); + yield Some(42); + yield y.next(); +} +``` + +or as a more common example: + +```rust +gen { + let x = vec![1, 2, 3, 4]; + for z in x.iter() { + yield z * 2; + } +} +``` + # Reference-level explanation [reference-level-explanation]: #reference-level-explanation ## New keyword @@ -730,6 +756,7 @@ There are a few options forward (though this list is probably not complete): * *Downside*: It's unclear whether this is possible. * Implement `Iterator` for `Pin<&mut G>` instead of for `G` directly (whatever `G` is here, but it could be a `gen` block). * *Downside*: The thing being iterated over must now be pinned for the entire iteration, instead of for each invocation of `next`. + * *Downside*: Now the `next` method takes a double-indirection as an argument `&mut Pin<&mut G>`, which may not optimize well sometimes. This RFC is forward compatible with any such designs, so I will not explore it here. From b836635a4a2c83156b1ec386e0e40098c411aa05 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 13 Mar 2024 14:18:13 +0000 Subject: [PATCH 29/45] Make self-referential `gen` blocks an open question We had listed self-referential `gen` blocks as a future possibility, but in discussion with T-lang, it's become clear that this should instead be listed as an open question, so let's do that. --- text/0000-gen-fn.md | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/text/0000-gen-fn.md b/text/0000-gen-fn.md index 2cff0a5a52b..b2a0423a355 100644 --- a/text/0000-gen-fn.md +++ b/text/0000-gen-fn.md @@ -637,6 +637,22 @@ main = putStrLn $ show $ take 5 $ oddDup [1..20] # Unresolved questions [unresolved-questions]: #unresolved-questions +## Self-referential `gen` blocks + +We can allow `gen` blocks to hold borrows across `yield` points. Should this be part of the initial stabilization? + +There are a few options for how to do this, either before or after stabilization (though this list is probably not complete): + +* Add a separate trait for pinned iteration that is also usable with `gen` and `for`. + * *Downside*: We would have very similar traits for the same thing. +* Backward-compatibly add a way to change the argument type of `Iterator::next`. + * *Downside*: It's unclear whether this is possible. +* Implement `Iterator` for `Pin<&mut G>` instead of for `G` directly (whatever `G` is here, but it could be a `gen` block). + * *Downside*: The thing being iterated over must now be pinned for the entire iteration, instead of for each invocation of `next`. + * *Downside*: Now the `next` method takes a double-indirection as an argument `&mut Pin<&mut G>`, which may not optimize well sometimes. + +This RFC is forward compatible with any such designs. However, if we were to stabilize `gen` blocks that could not hold borrows across `yield` points, this would be a serious usability limitation that users might find surprising. Consequently, whether we should choose to address this before stabilization is an open question. + ## Keyword Should we use `iter` as the keyword, as we're producing `Iterator`s? @@ -744,22 +760,6 @@ This RFC's design is forward compatible with anything we decide on. At present it is only possible to have a `gen` block yield futures, but not `await` within it, similar to how you cannot write iterators that `await`, but that return futures from `next`. -## Self-referential `gen` blocks - -We can allow `gen` blocks to hold borrows across `yield` points in the future. - -There are a few options forward (though this list is probably not complete): - -* Add a separate trait for pinned iteration that is also usable with `gen` and `for`. - * *Downside*: We would have very similar traits for the same thing. -* Backward-compatibly add a way to change the argument type of `Iterator::next`. - * *Downside*: It's unclear whether this is possible. -* Implement `Iterator` for `Pin<&mut G>` instead of for `G` directly (whatever `G` is here, but it could be a `gen` block). - * *Downside*: The thing being iterated over must now be pinned for the entire iteration, instead of for each invocation of `next`. - * *Downside*: Now the `next` method takes a double-indirection as an argument `&mut Pin<&mut G>`, which may not optimize well sometimes. - -This RFC is forward compatible with any such designs, so I will not explore it here. - ## `try` interactions We could allow `gen try fn foo() -> i32` to mean something akin to `gen fn foo() -> Result`. From 6478947eb43406859d5a63dd728d1b2969726471 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Thu, 14 Mar 2024 23:09:31 +0000 Subject: [PATCH 30/45] Adjust for `Generator -> Coroutine` rename For many years, we had a trait in nightly Rust called `Generator`. We've now renamed this to `Coroutine`, but this RFC still referred to it as `Generator`. Let's use the new name and make a note of the old one. --- text/0000-gen-fn.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/text/0000-gen-fn.md b/text/0000-gen-fn.md index b2a0423a355..3751d483292 100644 --- a/text/0000-gen-fn.md +++ b/text/0000-gen-fn.md @@ -191,11 +191,11 @@ on iterators over `Result`s. ## Implementation -This feature is mostly implemented via existing generators, though there are some special cases. +This feature is mostly implemented via existing coroutines, though there are some special cases. ### `gen` blocks -`gen` blocks are the same as an unstable generator... +`gen` blocks are the same as an unstable coroutine... * ...without arguments, * ...with an additional check forbidding holding borrows across `yield` points, @@ -207,7 +207,7 @@ This feature is mostly implemented via existing generators, though there are som It's another language feature for something that can already be written entirely in user code. -In contrast to `Generator`, `gen` blocks that produce `Iterator`s cannot hold references across `yield` points. +In contrast to `Coroutine`s (currently unstable), `gen` blocks that produce iterators cannot hold references across `yield` points. See [`from_generator`][] which has an `Unpin` bound on the generator it takes to produce an `Iterator`. The `gen` keyword causes some fallout in the community, mostly around the `rand` crate, which has `gen` methods on its traits. @@ -220,7 +220,7 @@ The `gen` keyword causes some fallout in the community, mostly around the `rand` We could use `iter` as the keyword. I prefer `iter` because I connect generators with a more powerful scheme than plain `Iterator`s. -The `Generator` trait can do everything that `iter` blocks and `async` blocks can do and more. +The unstable `Coroutine` trait (which was previously called `Generator`) can do everything that `iter` blocks and `async` blocks can do and more. I believe connecting the `Iterator` trait with `iter` blocks is the right choice, but that would require us to carve out many exceptions for this keyword as `iter` is used for module names and method names everywhere (including libstd/libcore). It may not be much worse than `gen` (see also [the unresolved questions][unresolved-questions]). @@ -234,7 +234,7 @@ Some of these new methods would need to be very generic. While it's not an `Iterator` example, [`array::try_map`][] is something that has very complex diagnostics that are hard to improve, even if it's nice once it works. Users can use crates like [`genawaiter`](https://crates.io/crates/genawaiter) or [`propane`](https://crates.io/crates/propane) instead. -`genawaiter` works on stable and provides `gen!` macro blocks that behave like `gen` blocks, but don't have compiler support for nice diagnostics or language support for the `?` operator. The `propane` crate uses the `Generator` trait from nightly and works mostly +`genawaiter` works on stable and provides `gen!` macro blocks that behave like `gen` blocks, but don't have compiler support for nice diagnostics or language support for the `?` operator. The `propane` crate uses the `Coroutine` trait from nightly and works mostly like `gen` would. The standard library includes [`std::iter::from_fn`][], which can be used in @@ -736,18 +736,18 @@ The macro would expand to a `for` loop + `yield`. yield_all!(iter) ``` -## Complete `Generator` support +## Complete `Coroutine` support -We already have a `Generator` trait on nightly that is more powerful than the `Iterator` +We already have a `Coroutine` trait on nightly (previously called `Generator`) that is more powerful than the `Iterator` API could possibly be: -1. It uses `Pin<&mut Self>`, allowing self-references in the generator across yield points. +1. It uses `Pin<&mut Self>`, allowing self-references across yield points. 2. It has arguments (`yield` returns the arguments passed to it in the subsequent invocations). Similar to the ideas around `async` closures, -I think we could argue for `Generators` to be `gen` closures while `gen` blocks are a simpler concept that has no arguments and only captures variables. +I think we could argue for coroutines to be `gen` closures while `gen` blocks are a simpler concept that has no arguments and only captures variables. -Either way, support for full `Generator`s should be discussed and implemented separately, +Either way, support for full coroutines should be discussed and implemented separately, as there are many more open questions around them beyond a simpler way to write `Iterator`s. ## `async` interactions From a2bf47319c3cb0a5040678dd4fea36adce082575 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Thu, 14 Mar 2024 23:49:53 +0000 Subject: [PATCH 31/45] Add unresolved question about implemented traits To give us better options for supporting self-referential generators we may not want the type returned by `gen` blocks to implement `Iterator` directly. Let's call this out as an open question and weaken claims throughout the document related to this. --- text/0000-gen-fn.md | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/text/0000-gen-fn.md b/text/0000-gen-fn.md index 3751d483292..c4ad4511d0a 100644 --- a/text/0000-gen-fn.md +++ b/text/0000-gen-fn.md @@ -6,14 +6,7 @@ # Summary [summary]: #summary -Reserve the `gen` keyword in the 2024 edition. - -Add `gen {}` blocks to the language. These implement `Iterator` by `yield`ing -elements. This is simpler and more intuitive than creating a custom type and -manually implementing `Iterator` for that type, which requires writing an -explicit `Iterator::next` method body. This is a change similar to adding `async -{}` blocks that implement `Future` instead of having to manually write futures -and their state machines. +This RFC reserves the `gen` keyword in the Rust 2024 edition for generators and adds `gen { .. }` blocks to the language. Similar to how `async` blocks produce values that can be awaited with `.await`, `gen` blocks produce values that can be iterated over using `for` loops. # Motivation [motivation]: #motivation @@ -78,7 +71,7 @@ fn odd_dup(values: impl Iterator) -> impl Iterator { yield value * 2; } } - } + }.into() } ``` @@ -137,13 +130,13 @@ need to be given arguments of type `Option`. ## Fusing -`Iterator`s produced by `gen` keep returning `None` when invoked again after they have returned `None` once. +Iterators produced by `gen` keep returning `None` when invoked again after they have returned `None` once. They do not implement `FusedIterator`, as that is not a language item, but may implement it in the future. ## Holding borrows across yields Since the `Iterator::next` method takes `&mut self` instead of `Pin<&mut self>`, we cannot create self-referential -`gen` blocks. Self-referential `gen` blocks occur when you hold a borrow to a local variable across a yield point: +`gen` blocks (but see the open questions). Self-referential `gen` blocks occur when you hold a borrow to a local variable across a yield point: ```rust gen { @@ -199,7 +192,7 @@ This feature is mostly implemented via existing coroutines, though there are som * ...without arguments, * ...with an additional check forbidding holding borrows across `yield` points, -* ...and with an automatic `Iterator` implementation. +* ...and with an automatic implementation of a trait allowing the type to be used in `for` loops (see the open questions). * ...do not panic if invoked again after returning # Drawbacks @@ -637,6 +630,10 @@ main = putStrLn $ show $ take 5 $ oddDup [1..20] # Unresolved questions [unresolved-questions]: #unresolved-questions +## Whether to implement `Iterator` + +There may be benefits to having the type returned by `gen` blocks *not* implement `Iterator` directly. Instead, these blocks would return a type that implements either `IntoIterator` or a new `IntoGenerator` trait. Such a design could leave us more appealing options for supporting self-referential `gen` blocks. We leave this as an open question. + ## Self-referential `gen` blocks We can allow `gen` blocks to hold borrows across `yield` points. Should this be part of the initial stabilization? From caf93bad67c5fb6f58e679f9cdddf1bbafbf91d0 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Fri, 15 Mar 2024 01:52:26 +0000 Subject: [PATCH 32/45] Fix `k#gen` The language about how to refer to the `gen` keyword in older editions was changed in a recent commit from `k#gen` to `r#gen`. This was probably a search/replace error. Let's fix that. --- text/0000-gen-fn.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/text/0000-gen-fn.md b/text/0000-gen-fn.md index c4ad4511d0a..3388dcbd16c 100644 --- a/text/0000-gen-fn.md +++ b/text/0000-gen-fn.md @@ -85,7 +85,7 @@ Iterators created with `gen` return `None` once they `return` (implicitly at the Starting in the 2024 edition, `gen` is a keyword that cannot be used for naming any items or bindings. This means during the migration to the 2024 edition, all variables, functions, modules, types, etc. named `gen` must be renamed -or be referred to via `r#gen`. +or be referred to via `k#gen`. ## Returning/finishing an iterator From 8846955ab67251647721027b07e9550349584be2 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Fri, 15 Mar 2024 03:12:41 +0000 Subject: [PATCH 33/45] Fix where we fixed `k#gen` We meant to change `r#gen` to `k#gen` on one line to fix an apparent earlier search/replace error, and we erroneously changed it on a different line. Let's fix the correct line and fix the mistaken fix. --- text/0000-gen-fn.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/text/0000-gen-fn.md b/text/0000-gen-fn.md index 3388dcbd16c..c4513863d67 100644 --- a/text/0000-gen-fn.md +++ b/text/0000-gen-fn.md @@ -85,7 +85,7 @@ Iterators created with `gen` return `None` once they `return` (implicitly at the Starting in the 2024 edition, `gen` is a keyword that cannot be used for naming any items or bindings. This means during the migration to the 2024 edition, all variables, functions, modules, types, etc. named `gen` must be renamed -or be referred to via `k#gen`. +or be referred to via `r#gen`. ## Returning/finishing an iterator @@ -163,7 +163,7 @@ gen { [reference-level-explanation]: #reference-level-explanation ## New keyword -In the 2024 edition we reserve `gen` as a keyword. Previous editions will use `r#gen` to get the same features. +In the 2024 edition we reserve `gen` as a keyword. Previous editions will use `k#gen` to get the same features. ## Error handling From 32ffb27eb7f859be40f86d686e17e9916484835e Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 27 Mar 2024 17:34:58 +0000 Subject: [PATCH 34/45] Add tracking issue for RFC 3513 We had already opened a tracking issue for this work, so let's fill that in here. --- text/0000-gen-fn.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/text/0000-gen-fn.md b/text/0000-gen-fn.md index c4513863d67..8b0047b189d 100644 --- a/text/0000-gen-fn.md +++ b/text/0000-gen-fn.md @@ -1,7 +1,7 @@ - Feature Name: `gen-fn` - Start Date: 2023-10-10 - RFC PR: [rust-lang/rfcs#3513](https://github.com/rust-lang/rfcs/pull/3513) -- Rust Issue: [rust-lang/rust#0000](https://github.com/rust-lang/rust/issues/0000) +- Tracking Issue: [rust-lang/rust#117078](https://github.com/rust-lang/rust/issues/117078) # Summary [summary]: #summary From 3f0159d43fe79149486b47557df6b378a8502e66 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 27 Mar 2024 17:37:10 +0000 Subject: [PATCH 35/45] Rename file for RFC 3513 In addition to giving the file the correct number, let's call this `gen-blocks` rather than `gen-fn` since we removed `gen fn` from the main body of this RFC. --- text/{0000-gen-fn.md => 3513-gen-blocks.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename text/{0000-gen-fn.md => 3513-gen-blocks.md} (100%) diff --git a/text/0000-gen-fn.md b/text/3513-gen-blocks.md similarity index 100% rename from text/0000-gen-fn.md rename to text/3513-gen-blocks.md From 787e0e906662501130dccbb1c907c873367470c6 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 27 Mar 2024 18:09:29 +0000 Subject: [PATCH 36/45] Update feature name to `gen_blocks` The feature name in the draft was a placeholder. Let's update this to the actual feature name now in use. --- text/3513-gen-blocks.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/text/3513-gen-blocks.md b/text/3513-gen-blocks.md index 8b0047b189d..7b6959ea4cd 100644 --- a/text/3513-gen-blocks.md +++ b/text/3513-gen-blocks.md @@ -1,4 +1,4 @@ -- Feature Name: `gen-fn` +- Feature Name: `gen_blocks` - Start Date: 2023-10-10 - RFC PR: [rust-lang/rfcs#3513](https://github.com/rust-lang/rfcs/pull/3513) - Tracking Issue: [rust-lang/rust#117078](https://github.com/rust-lang/rust/issues/117078) From ad74905583f206de626c45585749aae6c858f01d Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 27 Mar 2024 18:05:04 +0000 Subject: [PATCH 37/45] Unwrap lines We had a mix between hard wrapped lines of various widths and unwrapped lines. Let's unwrap all lines. --- text/3513-gen-blocks.md | 209 ++++++++++++---------------------------- 1 file changed, 60 insertions(+), 149 deletions(-) diff --git a/text/3513-gen-blocks.md b/text/3513-gen-blocks.md index 7b6959ea4cd..f688db595bd 100644 --- a/text/3513-gen-blocks.md +++ b/text/3513-gen-blocks.md @@ -11,22 +11,11 @@ This RFC reserves the `gen` keyword in the Rust 2024 edition for generators and # Motivation [motivation]: #motivation -The main motivation of this RFC is to reserve a new keyword in the 2024 -edition. We will discuss the semantic questions of generators in this -document, but we do not have to settle them with this RFC. We'll describe -current thinking on the semantics, but some questions will be left open to be -answered at a later time after we gain more experience with the -implementation. - -Writing iterators manually can be very painful. Many iterators can be written by -chaining `Iterator` methods, but some need to be written as a `struct` and have -`Iterator` implemented for them. Some of the code that is written this way -pushes people to avoid iterators and instead execute a `for` loop that eagerly -writes values to mutable state. With this RFC, one can write the `for` loop -and still get a lazy iterator of values. - -As an example, here are multiple ways to write an iterator over something that contains integers -while only keeping the odd integers and multiplying each by 2: +The main motivation of this RFC is to reserve a new keyword in the 2024 edition. We will discuss the semantic questions of generators in this document, but we do not have to settle them with this RFC. We'll describe current thinking on the semantics, but some questions will be left open to be answered at a later time after we gain more experience with the implementation. + +Writing iterators manually can be very painful. Many iterators can be written by chaining `Iterator` methods, but some need to be written as a `struct` and have `Iterator` implemented for them. Some of the code that is written this way pushes people to avoid iterators and instead execute a `for` loop that eagerly writes values to mutable state. With this RFC, one can write the `for` loop and still get a lazy iterator of values. + +As an example, here are multiple ways to write an iterator over something that contains integers while only keeping the odd integers and multiplying each by 2: ```rust // `Iterator` methods @@ -75,27 +64,22 @@ fn odd_dup(values: impl Iterator) -> impl Iterator { } ``` -Iterators created with `gen` return `None` once they `return` (implicitly at the end of the scope or explicitly with `return`). -`gen` iterators are fused, so after returning `None` once, they will keep returning `None` forever. +Iterators created with `gen` return `None` once they `return` (implicitly at the end of the scope or explicitly with `return`). `gen` iterators are fused, so after returning `None` once, they will keep returning `None` forever. # Guide-level explanation [guide-level-explanation]: #guide-level-explanation ## New keyword -Starting in the 2024 edition, `gen` is a keyword that cannot be used for naming any items or bindings. -This means during the migration to the 2024 edition, all variables, functions, modules, types, etc. named `gen` must be renamed -or be referred to via `r#gen`. +Starting in the 2024 edition, `gen` is a keyword that cannot be used for naming any items or bindings. This means during the migration to the 2024 edition, all variables, functions, modules, types, etc. named `gen` must be renamed or be referred to via `r#gen`. ## Returning/finishing an iterator -`gen` blocks must diverge or return the unit type. -Specifically, the trailing expression must be of the unit or `!` type, and any `return` statements in the block must either be given no argument at all or given an argument of the unit or `!` type. +`gen` blocks must diverge or return the unit type. Specifically, the trailing expression must be of the unit or `!` type, and any `return` statements in the block must either be given no argument at all or given an argument of the unit or `!` type. ### Diverging iterators -For example, a `gen` block that produces the infinite sequence `0, 1, 0, 1, 0, 1, ...`, will never return `None` -from `next`, and only drop its captured data when the iterator is dropped: +For example, a `gen` block that produces the infinite sequence `0, 1, 0, 1, 0, 1, ...`, will never return `None` from `next`, and only drop its captured data when the iterator is dropped: ```rust gen { @@ -110,33 +94,19 @@ If a `gen` block panics, the behavior is very similar to `return`, except that ` ## Error handling -Within `gen` blocks, the `?` operator desugars as follows. When its -argument returns a value indicating "do not short circuit" -(e.g. `Option::Some(..)`, `Result::Ok(..)`, `ControlFlow::Continue(..)`), that -value becomes the result of the expression as usual. When its argument -returns a value indicating that short-circuiting is desired -(e.g. `Option::None`, `Result::Err(..)`, `ControlFlow::Break(..)`), the value -is first yielded (after being converted by `From::from` as usual), then the -block returns immediately. +Within `gen` blocks, the `?` operator desugars as follows. When its argument returns a value indicating "do not short circuit" (e.g. `Option::Some(..)`, `Result::Ok(..)`, `ControlFlow::Continue(..)`), that value becomes the result of the expression as usual. When its argument returns a value indicating that short-circuiting is desired (e.g. `Option::None`, `Result::Err(..)`, `ControlFlow::Break(..)`), the value is first yielded (after being converted by `From::from` as usual), then the block returns immediately. -Even when `?` is used within a `gen` block, the block must return a -value of type unit or `!`. That is, it does not return a value of `Some(..)`, -`Ok(..)`, or `Continue(..)` as other such blocks might. +Even when `?` is used within a `gen` block, the block must return a value of type unit or `!`. That is, it does not return a value of `Some(..)`, `Ok(..)`, or `Continue(..)` as other such blocks might. -However, note that when `?` is used within a `gen` block, all `yield` -statements will need to be given an argument of a compatible type. For -example, if `None?` is used in an expression, then all `yield` statements will -need to be given arguments of type `Option`. +However, note that when `?` is used within a `gen` block, all `yield` statements will need to be given an argument of a compatible type. For example, if `None?` is used in an expression, then all `yield` statements will need to be given arguments of type `Option`. ## Fusing -Iterators produced by `gen` keep returning `None` when invoked again after they have returned `None` once. -They do not implement `FusedIterator`, as that is not a language item, but may implement it in the future. +Iterators produced by `gen` keep returning `None` when invoked again after they have returned `None` once. They do not implement `FusedIterator`, as that is not a language item, but may implement it in the future. ## Holding borrows across yields -Since the `Iterator::next` method takes `&mut self` instead of `Pin<&mut self>`, we cannot create self-referential -`gen` blocks (but see the open questions). Self-referential `gen` blocks occur when you hold a borrow to a local variable across a yield point: +Since the `Iterator::next` method takes `&mut self` instead of `Pin<&mut self>`, we cannot create self-referential `gen` blocks (but see the open questions). Self-referential `gen` blocks occur when you hold a borrow to a local variable across a yield point: ```rust gen { @@ -161,9 +131,10 @@ gen { # Reference-level explanation [reference-level-explanation]: #reference-level-explanation + ## New keyword -In the 2024 edition we reserve `gen` as a keyword. Previous editions will use `k#gen` to get the same features. +In the 2024 edition we reserve `gen` as a keyword. Previous editions will use `k#gen` to get the same features. ## Error handling @@ -179,8 +150,7 @@ match foo.branch() { } ``` -This is the same behaviour that `collect::>()` performs -on iterators over `Result`s. +This is the same behaviour that `collect::>()` performs on iterators over `Result`s. ## Implementation @@ -200,8 +170,7 @@ This feature is mostly implemented via existing coroutines, though there are som It's another language feature for something that can already be written entirely in user code. -In contrast to `Coroutine`s (currently unstable), `gen` blocks that produce iterators cannot hold references across `yield` points. -See [`from_generator`][] which has an `Unpin` bound on the generator it takes to produce an `Iterator`. +In contrast to `Coroutine`s (currently unstable), `gen` blocks that produce iterators cannot hold references across `yield` points. See [`from_generator`][] which has an `Unpin` bound on the generator it takes to produce an `Iterator`. The `gen` keyword causes some fallout in the community, mostly around the `rand` crate, which has `gen` methods on its traits. @@ -209,30 +178,18 @@ The `gen` keyword causes some fallout in the community, mostly around the `rand` # Rationale and alternatives [rationale-and-alternatives]: #rationale-and-alternatives + ## Keyword -We could use `iter` as the keyword. -I prefer `iter` because I connect generators with a more powerful scheme than plain `Iterator`s. -The unstable `Coroutine` trait (which was previously called `Generator`) can do everything that `iter` blocks and `async` blocks can do and more. -I believe connecting the `Iterator` trait with `iter` blocks is the right choice, -but that would require us to carve out many exceptions for this keyword as `iter` is used for module names and method names everywhere (including libstd/libcore). -It may not be much worse than `gen` (see also [the unresolved questions][unresolved-questions]). -We may want to use `gen` for full on generators in the future. +We could use `iter` as the keyword. I prefer `iter` because I connect generators with a more powerful scheme than plain `Iterator`s. The unstable `Coroutine` trait (which was previously called `Generator`) can do everything that `iter` blocks and `async` blocks can do and more. I believe connecting the `Iterator` trait with `iter` blocks is the right choice, but that would require us to carve out many exceptions for this keyword as `iter` is used for module names and method names everywhere (including libstd/libcore). It may not be much worse than `gen` (see also [the unresolved questions][unresolved-questions]). We may want to use `gen` for full on generators in the future. ## Do not do this -One alternative is to keep adding more helper methods to `Iterator`. -It is already hard for new Rustaceans to be aware of all the capabilities of `Iterator`. -Some of these new methods would need to be very generic. -While it's not an `Iterator` example, [`array::try_map`][] is something that has very complex diagnostics that are hard to improve, even if it's nice once it works. +One alternative is to keep adding more helper methods to `Iterator`. It is already hard for new Rustaceans to be aware of all the capabilities of `Iterator`. Some of these new methods would need to be very generic. While it's not an `Iterator` example, [`array::try_map`][] is something that has very complex diagnostics that are hard to improve, even if it's nice once it works. -Users can use crates like [`genawaiter`](https://crates.io/crates/genawaiter) or [`propane`](https://crates.io/crates/propane) instead. -`genawaiter` works on stable and provides `gen!` macro blocks that behave like `gen` blocks, but don't have compiler support for nice diagnostics or language support for the `?` operator. The `propane` crate uses the `Coroutine` trait from nightly and works mostly -like `gen` would. +Users can use crates like [`genawaiter`](https://crates.io/crates/genawaiter) or [`propane`](https://crates.io/crates/propane) instead. `genawaiter` works on stable and provides `gen!` macro blocks that behave like `gen` blocks, but don't have compiler support for nice diagnostics or language support for the `?` operator. The `propane` crate uses the `Coroutine` trait from nightly and works mostly like `gen` would. -The standard library includes [`std::iter::from_fn`][], which can be used in -some cases, but as we saw in the example [above][motivation], often the -improvement over writing out a manual implementation of `Iterator` is limited. +The standard library includes [`std::iter::from_fn`][], which can be used in some cases, but as we saw in the example [above][motivation], often the improvement over writing out a manual implementation of `Iterator` is limited. [`std::iter::from_fn`]: https://doc.rust-lang.org/std/array/fn.from_fn.html [`array::try_map`]: https://doc.rust-lang.org/std/primitive.array.html#method.try_map @@ -241,8 +198,7 @@ improvement over writing out a manual implementation of `Iterator` is limited. Similarly to `try` blocks, trailing expressions could yield their element. -There would then be no way to terminate iteration as `return` statements would have to have a -value that is `yield`ed before terminating iteration. +There would then be no way to terminate iteration as `return` statements would have to have a value that is `yield`ed before terminating iteration. We could do something magical where returning `()` terminates the iteration, so this code... @@ -252,7 +208,7 @@ fn foo() -> impl Iterator { } ``` -...could be a way to specify `std::iter::once(42)`. The issue I see with this is that this... +...could be a way to specify `std::iter::once(42)`. The issue I see with this is that this... ```rust fn foo() -> impl Iterator { @@ -275,14 +231,9 @@ fn foo() -> impl Iterator { gen {} } ## CLU, Alphard -The idea of generators that yield their values goes back at least as far as -the Alphard language from circa 1975 (see ["Alphard: Form and -Content"][alphard], Mary Shaw, 1981). This was later refined into the idea of -iterators in the CLU language (see ["A History of CLU"][clu-history], Barbara -Liskov, 1992 and ["CLU Reference Manual"][clu-ref], Liskov et al., 1979). +The idea of generators that yield their values goes back at least as far as the Alphard language from circa 1975 (see ["Alphard: Form and Content"][alphard], Mary Shaw, 1981). This was later refined into the idea of iterators in the CLU language (see ["A History of CLU"][clu-history], Barbara Liskov, 1992 and ["CLU Reference Manual"][clu-ref], Liskov et al., 1979). -The CLU language opened an iterator context with the `iter` keyword and -produced values with `yield` statements. E.g.: +The CLU language opened an iterator context with the `iter` keyword and produced values with `yield` statements. E.g.: ``` odds = iter () yields (int) @@ -300,9 +251,7 @@ end odds ## Icon -In [Icon][icon-language] (introduced circa 1977), generators are woven deeply -into the language, and any function can return a sequence of values. When done -explicitly, the `suspend` keyword is used. E.g.: +In [Icon][icon-language] (introduced circa 1977), generators are woven deeply into the language, and any function can return a sequence of values. When done explicitly, the `suspend` keyword is used. E.g.: ``` procedure range(i, j) @@ -318,8 +267,7 @@ end ## Python -In Python, any function that contains a `yield` statement returns a -generator. E.g.: +In Python, any function that contains a `yield` statement returns a generator. E.g.: ```python def odd_dup(xs): @@ -330,8 +278,7 @@ def odd_dup(xs): ## ECMAScript / JavaScript -In JavaScript, `yield` can be used within [`function*`][javascript-function*] -generator functions. E.g.: +In JavaScript, `yield` can be used within [`function*`][javascript-function*] generator functions. E.g.: ```javascript function* oddDupUntilNegative(xs) { @@ -345,8 +292,7 @@ function* oddDupUntilNegative(xs) { } ``` -These generator functions are general coroutines. `yield` forms an expression -that returns the value passed to `next`. E.g.: +These generator functions are general coroutines. `yield` forms an expression that returns the value passed to `next`. E.g.: ```javascript function* dup(x) { @@ -364,8 +310,7 @@ console.assert(g.next(3).value === 6); ## Ruby -In Ruby, `yield` can be used with the [`Enumerator`][ruby-enumerator] class to -implement an iterator. E.g.: +In Ruby, `yield` can be used with the [`Enumerator`][ruby-enumerator] class to implement an iterator. E.g.: ```ruby def odd_dup_until_negative xs @@ -381,8 +326,7 @@ def odd_dup_until_negative xs end ``` -Ruby also uses `yield` for a general coroutine mechanism with the -[`Fiber`][ruby-fiber] class. E.g.: +Ruby also uses `yield` for a general coroutine mechanism with the [`Fiber`][ruby-fiber] class. E.g.: ```ruby def dup @@ -403,8 +347,7 @@ g = dup ## Kotlin -In Kotlin, a lazy [`Sequence`][kotlin-sequences] can be built using `sequence` -expressions and `yield`. E.g.: +In Kotlin, a lazy [`Sequence`][kotlin-sequences] can be built using `sequence` expressions and `yield`. E.g.: ```kotlin fun oddDup(xs: Iterable): Sequence { @@ -428,8 +371,7 @@ fun main() { ## Swift -In Swift, [`AsyncStream`][swift-asyncstream] is used with `yield` to produce -asynchronous generators. E.g.: +In Swift, [`AsyncStream`][swift-asyncstream] is used with `yield` to produce asynchronous generators. E.g.: ```swift import Foundation @@ -453,16 +395,14 @@ Task { semaphore.wait() ``` -Synchronous generators are not yet available in Swift, but [may -be][swift-sync-gen] something they are planning. +Synchronous generators are not yet available in Swift, but [may be][swift-sync-gen] something they are planning. [swift-asyncstream]: https://developer.apple.com/documentation/swift/asyncstream [swift-sync-gen]: https://forums.swift.org/t/is-it-possible-to-make-an-iterator-that-yelds/53995/7 ## C# ## -In C#, within an [`iterator`][csharp-iterators], the [`yield`][csharp-yield] -statement is used to either yield the next value or to stop iteration. E.g.: +In C#, within an [`iterator`][csharp-iterators], the [`yield`][csharp-yield] statement is used to either yield the next value or to stop iteration. E.g.: ```csharp IEnumerable OddDupUntilNegative(IEnumerable xs) @@ -481,17 +421,14 @@ IEnumerable OddDupUntilNegative(IEnumerable xs) } ``` -Analogously with this RFC and with `async` blocks in Rust (but unlike `async -Task` in C#), execution of C# iterators does not start until they are -iterated. +Analogously with this RFC and with `async` blocks in Rust (but unlike `async Task` in C#), execution of C# iterators does not start until they are iterated. [csharp-iterators]: https://learn.microsoft.com/en-us/dotnet/csharp/iterators [csharp-yield]: https://learn.microsoft.com/en-us/dotnet/csharp/language-reference/statements/yield ## D -In D, `yield` is used when constructing a -[`Generator`][dlang-generators]. E.g.: +In D, `yield` is used when constructing a [`Generator`][dlang-generators]. E.g.: ```dlang import std.concurrency; @@ -515,17 +452,14 @@ void main() { } ``` -As in Ruby, generators in D are built on top of a more general -[`Fiber`][dlang-fibers] class that also uses `yield`. +As in Ruby, generators in D are built on top of a more general [`Fiber`][dlang-fibers] class that also uses `yield`. [dlang-generators]: https://dlang.org/library/std/concurrency/generator.html [dlang-fibers]: https://dlang.org/library/core/thread/fiber/fiber.html ## Dart -In Dart, there are both synchronous and asynchronous [generator -functions][dart-generators]. Synchronous generator functions return an -`Iteratable`. E.g.: +In Dart, there are both synchronous and asynchronous [generator functions][dart-generators]. Synchronous generator functions return an `Iteratable`. E.g.: ```dart Iterable oddDup(Iterable xs) sync* { @@ -541,7 +475,7 @@ void main() { } ``` -Asynchronous generator functions return a `Stream` object. E.g.: +Asynchronous generator functions return a `Stream` object. E.g.: ```dart Stream oddDup(Iterable xs) async* { @@ -561,8 +495,7 @@ void main() { ## F# ## -In F#, generators can be expressed with [sequence -expressions][fsharp-sequences] using `yield`. E.g.: +In F#, generators can be expressed with [sequence expressions][fsharp-sequences] using `yield`. E.g.: ```fsharp let oddDup xs = seq { @@ -578,8 +511,7 @@ for x in oddDup (seq { 1 .. 20 }) do ## Racket -In Racket, generators can be built using [`generator`][racket-generators] and -`yield`. E.g.: +In Racket, generators can be built using [`generator`][racket-generators] and `yield`. E.g.: ```racket #lang racket @@ -597,20 +529,14 @@ In Racket, generators can be built using [`generator`][racket-generators] and (= (g) 10) ``` -Note that because of the expressive power of [`call/cc`][racket-callcc] (and -continuations in general), generators can be written in Racket as a normal -library. +Note that because of the expressive power of [`call/cc`][racket-callcc] (and continuations in general), generators can be written in Racket as a normal library. [racket-callcc]: https://docs.racket-lang.org/reference/cont.html [racket-generators]: https://docs.racket-lang.org/reference/Generators.html ## Haskell, Idris, Clean, etc. -In [Haskell][] (and in similar languages such as [Idris][idris-lang], -[Clean][clean-lang], etc.), all functions are lazy unless specially annotated. -Consequently, Haskell does not need a special `yield` operator. Any function -can be a generator by recursively building a list of elements that will be -lazily returned one at a time. E.g.: +In [Haskell][] (and in similar languages such as [Idris][idris-lang], [Clean][clean-lang], etc.), all functions are lazy unless specially annotated. Consequently, Haskell does not need a special `yield` operator. Any function can be a generator by recursively building a list of elements that will be lazily returned one at a time. E.g.: ```haskell oddDup :: (Integral x) => [x] -> [x] @@ -636,7 +562,7 @@ There may be benefits to having the type returned by `gen` blocks *not* implemen ## Self-referential `gen` blocks -We can allow `gen` blocks to hold borrows across `yield` points. Should this be part of the initial stabilization? +We can allow `gen` blocks to hold borrows across `yield` points. Should this be part of the initial stabilization? There are a few options for how to do this, either before or after stabilization (though this list is probably not complete): @@ -648,12 +574,11 @@ There are a few options for how to do this, either before or after stabilization * *Downside*: The thing being iterated over must now be pinned for the entire iteration, instead of for each invocation of `next`. * *Downside*: Now the `next` method takes a double-indirection as an argument `&mut Pin<&mut G>`, which may not optimize well sometimes. -This RFC is forward compatible with any such designs. However, if we were to stabilize `gen` blocks that could not hold borrows across `yield` points, this would be a serious usability limitation that users might find surprising. Consequently, whether we should choose to address this before stabilization is an open question. +This RFC is forward compatible with any such designs. However, if we were to stabilize `gen` blocks that could not hold borrows across `yield` points, this would be a serious usability limitation that users might find surprising. Consequently, whether we should choose to address this before stabilization is an open question. ## Keyword -Should we use `iter` as the keyword, as we're producing `Iterator`s? -We could use `gen` as proposed in this RFC and later extend its abilities to more powerful generators. +Should we use `iter` as the keyword, as we're producing `Iterator`s? We could use `gen` as proposed in this RFC and later extend its abilities to more powerful generators. [playground](https://play.rust-lang.org/?version=nightly&mode=debug&edition=2021&gist=efeacb803158c2ebd57d43b4e606c0b5) @@ -674,7 +599,7 @@ fn main() { ## Contextual keyword -Popular crates (like `rand`) have methods called [`gen`][Rng::gen]. If we forbid those, we are forcing those crates to make a major version bump when they update their edition, and we are requiring any users of those crates to use `r#gen` instead of `gen` when calling that method. +Popular crates (like `rand`) have methods called [`gen`][Rng::gen]. If we forbid those, we are forcing those crates to make a major version bump when they update their edition, and we are requiring any users of those crates to use `r#gen` instead of `gen` when calling that method. We could choose to use a contextual keyword and only forbid `gen` in: @@ -689,9 +614,7 @@ This should avoid any parsing issues around `gen` followed by `{` in expressions ## `Iterator::size_hint` -Should we try to compute a conservative `size_hint`? This will reveal information from the body of a generator, -but at least for simple cases users will likely expect `size_hint` to not just be the default. -It is backwards compatible to later add support for opportunistically implementing `size_hint`. +Should we try to compute a conservative `size_hint`? This will reveal information from the body of a generator, but at least for simple cases users will likely expect `size_hint` to not just be the default. It is backwards compatible to later add support for opportunistically implementing `size_hint`. ## Implement other `Iterator` traits. @@ -702,9 +625,7 @@ Is there a possibility for implementing traits like `DoubleEndedIterator`, `Exac ## `yield from` (forwarding operation) -Python has the ability to `yield from` an iterator. -Effectively this is syntax sugar for looping over all elements of the iterator and yielding them individually. -There are infinite options to choose from if we want such a feature, so I'm listing general ideas: +Python has the ability to `yield from` an iterator. Effectively this is syntax sugar for looping over all elements of the iterator and yielding them individually. There are infinite options to choose from if we want such a feature, so I'm listing general ideas: ### Do nothing, just use loops @@ -726,8 +647,7 @@ Or we could use an entirely new keyword. ### stdlib macro -We could add a macro to the standard library and prelude. -The macro would expand to a `for` loop + `yield`. +We could add a macro to the standard library and prelude. The macro would expand to a `for` loop + `yield`. ```rust yield_all!(iter) @@ -735,37 +655,28 @@ yield_all!(iter) ## Complete `Coroutine` support -We already have a `Coroutine` trait on nightly (previously called `Generator`) that is more powerful than the `Iterator` -API could possibly be: +We already have a `Coroutine` trait on nightly (previously called `Generator`) that is more powerful than the `Iterator` API could possibly be: 1. It uses `Pin<&mut Self>`, allowing self-references across yield points. 2. It has arguments (`yield` returns the arguments passed to it in the subsequent invocations). -Similar to the ideas around `async` closures, -I think we could argue for coroutines to be `gen` closures while `gen` blocks are a simpler concept that has no arguments and only captures variables. +Similar to the ideas around `async` closures, I think we could argue for coroutines to be `gen` closures while `gen` blocks are a simpler concept that has no arguments and only captures variables. -Either way, support for full coroutines should be discussed and implemented separately, -as there are many more open questions around them beyond a simpler way to write `Iterator`s. +Either way, support for full coroutines should be discussed and implemented separately, as there are many more open questions around them beyond a simpler way to write `Iterator`s. ## `async` interactions -We could support using `await` in `gen async` blocks, similar to how we support `?` being used within `gen` blocks. -We'd have similar limitations holding references held across `await` points as we do have with `yield` points. -The solution space for `gen async` is large enough that I will not explore it here. -This RFC's design is forward compatible with anything we decide on. +We could support using `await` in `gen async` blocks, similar to how we support `?` being used within `gen` blocks. We'd have similar limitations holding references held across `await` points as we do have with `yield` points. The solution space for `gen async` is large enough that I will not explore it here. This RFC's design is forward compatible with anything we decide on. -At present it is only possible to have a `gen` block yield futures, but not `await` within it, similar to how -you cannot write iterators that `await`, but that return futures from `next`. +At present it is only possible to have a `gen` block yield futures, but not `await` within it, similar to how you cannot write iterators that `await`, but that return futures from `next`. ## `try` interactions -We could allow `gen try fn foo() -> i32` to mean something akin to `gen fn foo() -> Result`. -Whatever we do here, it should mirror whatever `try fn` means in the future. +We could allow `gen try fn foo() -> i32` to mean something akin to `gen fn foo() -> Result`. Whatever we do here, it should mirror whatever `try fn` means in the future. ## `gen fn`: -This does not introduce `gen fn`. The syntax design for them is fairly large -and there are open questions around the difference between returning or yielding a type. +This does not introduce `gen fn`. The syntax design for them is fairly large and there are open questions around the difference between returning or yielding a type. ```rust fn foo(args) yield item From f6cd712389cbcc8c649ddc4a516b23baad24ad5d Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 27 Mar 2024 18:42:25 +0000 Subject: [PATCH 38/45] Add future possibility of implementing `FusedIterator` The main body of the RFC discusses how we might implement `FusedIterator` for the iterators produced by `gen` blocks, but this was not listed as a future possibility. Let's do that. --- text/3513-gen-blocks.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/text/3513-gen-blocks.md b/text/3513-gen-blocks.md index f688db595bd..2225d48a34e 100644 --- a/text/3513-gen-blocks.md +++ b/text/3513-gen-blocks.md @@ -687,3 +687,7 @@ gen fn foo(args) // or any of the above variants for the item type gen foo(args) // or any of the above variants for the item type generator fn foo(args) // or any of the above variants for the item type ``` + +## Implement `FusedIterator` + +The iterators produced by `gen` blocks are fused but do not implement `FusedIterator` because it is not a language item. We may in the future want for these iterators to implement `FusedIterator`. From 4bf6106c8060bc8542d44340473ab0643541a5ac Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 27 Mar 2024 20:17:45 +0000 Subject: [PATCH 39/45] Remove incorrect statement about pinning There was a statement in the draft about, as a downside, something needing to be pinned for the entire iteration rather than just for each call to `next`. But, of course, under the pinning guarantees, these are equivalent. Once something is pinned, unless it is `Unpin`, it must be treated as pinned until it is destructed. Let's remove this statement. --- text/3513-gen-blocks.md | 1 - 1 file changed, 1 deletion(-) diff --git a/text/3513-gen-blocks.md b/text/3513-gen-blocks.md index 2225d48a34e..e31aed65812 100644 --- a/text/3513-gen-blocks.md +++ b/text/3513-gen-blocks.md @@ -571,7 +571,6 @@ There are a few options for how to do this, either before or after stabilization * Backward-compatibly add a way to change the argument type of `Iterator::next`. * *Downside*: It's unclear whether this is possible. * Implement `Iterator` for `Pin<&mut G>` instead of for `G` directly (whatever `G` is here, but it could be a `gen` block). - * *Downside*: The thing being iterated over must now be pinned for the entire iteration, instead of for each invocation of `next`. * *Downside*: Now the `next` method takes a double-indirection as an argument `&mut Pin<&mut G>`, which may not optimize well sometimes. This RFC is forward compatible with any such designs. However, if we were to stabilize `gen` blocks that could not hold borrows across `yield` points, this would be a serious usability limitation that users might find surprising. Consequently, whether we should choose to address this before stabilization is an open question. From 59be7f9b7f48b4f410f4829d7553ab24eb9b5b17 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Thu, 28 Mar 2024 01:24:56 +0000 Subject: [PATCH 40/45] Add open question about Rust 2015 and Rust 2018 In RFC 3101 we reserved in Rust 2021 prefixed identifiers such as `prefix#ident`. For this reason, we can make `gen` blocks available in Rust 2021 using `k#gen` as was anticipated in the (currently pending) RFC 3098. It's less clear what to do about Rust 2015 and Rust 2018, however, so let's mark this as an open question. (Thanks to tmandry for raising this point.) --- text/3513-gen-blocks.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/text/3513-gen-blocks.md b/text/3513-gen-blocks.md index e31aed65812..c5fd0d024fd 100644 --- a/text/3513-gen-blocks.md +++ b/text/3513-gen-blocks.md @@ -134,7 +134,7 @@ gen { ## New keyword -In the 2024 edition we reserve `gen` as a keyword. Previous editions will use `k#gen` to get the same features. +In the 2024 edition we reserve `gen` as a keyword. Rust 2021 will use `k#gen` to access the same feature. What to do about earlier editions is left as an open question. ## Error handling @@ -619,6 +619,15 @@ Should we try to compute a conservative `size_hint`? This will reveal informatio Is there a possibility for implementing traits like `DoubleEndedIterator`, `ExactSizeIterator` at all? +## What to do about Rust 2015 and Rust 2018 + +In [RFC 3101][] we reserved prefixed identifiers such as `prefix#ident`. For this reason, we can make `gen` blocks available in Rust 2021 using `k#gen` as was anticipated in the (currently pending) [RFC 3098][]. + +Whether and how to make this feature available in Rust 2015 and Rust 2018, however, we leave as an open question. + +[RFC 3098]: https://github.com/rust-lang/rfcs/pull/3098 +[RFC 3101]: https://github.com/rust-lang/rfcs/pull/3101 + # Future possibilities [future-possibilities]: #future-possibilities From 31c7b288dd44e3c357a6b9ce9693fc0d2307d4d9 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 27 Mar 2024 17:41:15 +0000 Subject: [PATCH 41/45] Do a round of copyediting We had been meaning to do some final copyediting prior to this RFC being merged, so let's do that. In addition to making the text a bit more regular and precise, fixing some minor errors, removing outdated information, and adding references between sections, we've tried to "tighten it up" a bit where possible. We've been careful to not change anything of semantic significance or otherwise of significance to the consensus. --- text/3513-gen-blocks.md | 276 ++++++++++++++++++++-------------------- 1 file changed, 135 insertions(+), 141 deletions(-) diff --git a/text/3513-gen-blocks.md b/text/3513-gen-blocks.md index c5fd0d024fd..5c39fe862ba 100644 --- a/text/3513-gen-blocks.md +++ b/text/3513-gen-blocks.md @@ -6,65 +6,64 @@ # Summary [summary]: #summary -This RFC reserves the `gen` keyword in the Rust 2024 edition for generators and adds `gen { .. }` blocks to the language. Similar to how `async` blocks produce values that can be awaited with `.await`, `gen` blocks produce values that can be iterated over using `for` loops. +This RFC reserves the `gen` keyword in the Rust 2024 edition for generators and adds `gen { .. }` blocks to the language. Similar to how `async` blocks produce values that can be awaited with `.await`, `gen` blocks produce values that can be iterated over with `for`. # Motivation [motivation]: #motivation -The main motivation of this RFC is to reserve a new keyword in the 2024 edition. We will discuss the semantic questions of generators in this document, but we do not have to settle them with this RFC. We'll describe current thinking on the semantics, but some questions will be left open to be answered at a later time after we gain more experience with the implementation. +Writing iterators manually can be painful. Many iterators can be written by chaining together iterator combinators, but some need to be written with a manual implementation of `Iterator`. This can push people to avoid iterators and do worse things such as writing loops that eagerly store values to mutable state. With `gen` blocks, we can now write a simple `for` loop and still get a lazy iterator of values. -Writing iterators manually can be very painful. Many iterators can be written by chaining `Iterator` methods, but some need to be written as a `struct` and have `Iterator` implemented for them. Some of the code that is written this way pushes people to avoid iterators and instead execute a `for` loop that eagerly writes values to mutable state. With this RFC, one can write the `for` loop and still get a lazy iterator of values. - -As an example, here are multiple ways to write an iterator over something that contains integers while only keeping the odd integers and multiplying each by 2: +By way of example, consider these ways of expressing the same function: ```rust -// `Iterator` methods -fn odd_dup(values: impl Iterator) -> impl Iterator { - values.filter(|value| value.is_odd()).map(|value| value * 2) +// This example uses iterator combinators. +fn odd_dup(xs: impl IntoIterator) -> impl Iterator { + xs.into_iter().filter(|x| x.is_odd()).map(|x| x * 2) } -// `std::iter::from_fn` -fn odd_dup(mut values: impl Iterator) -> impl Iterator { +// This example uses `iter::from_fn`. +fn odd_dup(xs: impl IntoIterator) -> impl Iterator { + let mut xs = xs.into_iter(); std::iter::from_fn(move || { - loop { - let value = values.next()?; - if value % 2 == 1 { - return Some(value * 2); + while let Some(x) = xs.next() { + if x.is_odd() { + return Some(x * 2); } } + None }) } -// `struct` and manual `impl` -fn odd_dup(values: impl Iterator) -> impl Iterator { - struct Foo(T); - impl> Iterator for Foo { +// This example uses a manual implementation of `Iterator`. +fn odd_dup(xs: impl IntoIterator) -> impl Iterator { + struct OddDup(T); + impl> Iterator for OddDup { type Item = u32; fn next(&mut self) -> Option { - loop { - let value = self.0.next()?; - if value.is_odd() { + while let Some(x) = self.0.next() { + if x.is_odd() { return Some(x * 2) } } + None } } - Foo(values) + OddDup(xs.into_iter()) } -// `gen block` -fn odd_dup(values: impl Iterator) -> impl Iterator { +// This example uses `gen` blocks, introduced in this RFC. +fn odd_dup(xs: impl IntoIterator) -> impl Iterator { gen { - for value in values { - if value.is_odd() { - yield value * 2; + for x in xs { + if x.is_odd() { + yield x * 2; } } - }.into() + }.into_iter() } ``` -Iterators created with `gen` return `None` once they `return` (implicitly at the end of the scope or explicitly with `return`). `gen` iterators are fused, so after returning `None` once, they will keep returning `None` forever. +Iterators created with `gen` blocks return `None` from `next` once the `gen` block has returned (either implicitly at the end of the scope or explicitly with the `return` statement) and are fused (after `next` returns `None` once, it will keep returning `None` forever). # Guide-level explanation [guide-level-explanation]: #guide-level-explanation @@ -73,13 +72,13 @@ Iterators created with `gen` return `None` once they `return` (implicitly at the Starting in the 2024 edition, `gen` is a keyword that cannot be used for naming any items or bindings. This means during the migration to the 2024 edition, all variables, functions, modules, types, etc. named `gen` must be renamed or be referred to via `r#gen`. -## Returning/finishing an iterator +## Return value `gen` blocks must diverge or return the unit type. Specifically, the trailing expression must be of the unit or `!` type, and any `return` statements in the block must either be given no argument at all or given an argument of the unit or `!` type. -### Diverging iterators +### Diverging -For example, a `gen` block that produces the infinite sequence `0, 1, 0, 1, 0, 1, ...`, will never return `None` from `next`, and only drop its captured data when the iterator is dropped: +For example, a `gen` block that produces the infinite sequence `0, 1, 0, 1, 0, 1, ..` will never return `None` from `next` and will only drop its captured state when the iterator is dropped. E.g.: ```rust gen { @@ -90,45 +89,36 @@ gen { } ``` -If a `gen` block panics, the behavior is very similar to `return`, except that `next` unwinds instead of returning `None`. +If a `gen` block panics, the behavior is similar to that of `return`, except that the call to `next` unwinds instead of returning `None`. ## Error handling -Within `gen` blocks, the `?` operator desugars as follows. When its argument returns a value indicating "do not short circuit" (e.g. `Option::Some(..)`, `Result::Ok(..)`, `ControlFlow::Continue(..)`), that value becomes the result of the expression as usual. When its argument returns a value indicating that short-circuiting is desired (e.g. `Option::None`, `Result::Err(..)`, `ControlFlow::Break(..)`), the value is first yielded (after being converted by `From::from` as usual), then the block returns immediately. +Within `gen` blocks, the `?` operator behaves as follows. When its argument is a value indicating "do not short circuit" (e.g. `Option::Some(..)`, `Result::Ok(..)`, `ControlFlow::Continue(..)`), that value becomes the result of the expression as usual. When its argument is a value indicating that short-circuiting is desired (e.g. `Option::None`, `Result::Err(..)`, `ControlFlow::Break(..)`), the value is first yielded (after being converted by `FromResidual::from_residual` as usual), then the block returns immediately. Even when `?` is used within a `gen` block, the block must return a value of type unit or `!`. That is, it does not return a value of `Some(..)`, `Ok(..)`, or `Continue(..)` as other such blocks might. -However, note that when `?` is used within a `gen` block, all `yield` statements will need to be given an argument of a compatible type. For example, if `None?` is used in an expression, then all `yield` statements will need to be given arguments of type `Option`. +However, note that when `?` is used within a `gen` block, all `yield` statements will need to be given an argument of a compatible type. For example, if `None?` is used in an expression, then all `yield` statements will need to be given arguments of some `Option` type (or of the `!` type) . ## Fusing -Iterators produced by `gen` keep returning `None` when invoked again after they have returned `None` once. They do not implement `FusedIterator`, as that is not a language item, but may implement it in the future. +Iterators produced by `gen` return `None` from `next` repeatedly after having once returned `None` from `next`. However, they do not implement `FusedIterator`, as that is not a language item, but may do so in the future (see the future possibilities). ## Holding borrows across yields -Since the `Iterator::next` method takes `&mut self` instead of `Pin<&mut self>`, we cannot create self-referential `gen` blocks (but see the open questions). Self-referential `gen` blocks occur when you hold a borrow to a local variable across a yield point: - -```rust -gen { - let x = vec![1, 2, 3, 4]; - let mut y = x.iter(); - yield y.next(); - yield Some(42); - yield y.next(); -} -``` - -or as a more common example: +Since the `Iterator::next` method takes `&mut self` instead of `Pin<&mut Self>`, we cannot create self-referential `gen` blocks without taking other steps (see the open questions). Self-referential `gen` blocks occur when holding a borrow to a local variable across a yield point. E.g.: ```rust gen { - let x = vec![1, 2, 3, 4]; - for z in x.iter() { - yield z * 2; + let xs = vec![1, 2, 3, 4]; + for x in xs.iter() { + yield x * 2; } + //~^ ERROR borrow may still be in use when `gen` block yields } ``` +This may in fact be a severe and surprising limitation, and whether we should take the steps necessary to address this before stabilization is left as an open question. + # Reference-level explanation [reference-level-explanation]: #reference-level-explanation @@ -138,69 +128,82 @@ In the 2024 edition we reserve `gen` as a keyword. Rust 2021 will use `k#gen` t ## Error handling -`foo?` in `gen` blocks will stop iteration after the first error by desugaring to: +`foo?` in `gen` blocks will stop iteration after the first error as if it desugared to: ```rust match foo.branch() { ControlFlow::Break(err) => { - yield R::from_residual(err); - return; + yield <_ as FromResidual>::from_residual(err); + return }, ControlFlow::Continue(val) => val, } ``` -This is the same behaviour that `collect::>()` performs on iterators over `Result`s. - ## Implementation This feature is mostly implemented via existing coroutines, though there are some special cases. -### `gen` blocks +We could say that `gen` blocks are the same as unstable coroutines... -`gen` blocks are the same as an unstable coroutine... - -* ...without arguments, -* ...with an additional check forbidding holding borrows across `yield` points, -* ...and with an automatic implementation of a trait allowing the type to be used in `for` loops (see the open questions). -* ...do not panic if invoked again after returning +- ...without arguments, +- ...with an additional check forbidding holding borrows across `yield` points, +- ...with an automatic implementation of a trait allowing the type to be used in `for` loops (see the open questions), +- ...that do not panic if invoked again after returning. # Drawbacks [drawbacks]: #drawbacks -It's another language feature for something that can already be written entirely in user code. +The main drawback is that this adds a language feature for something that can already be written entirely (if more painfully) in user code. -In contrast to `Coroutine`s (currently unstable), `gen` blocks that produce iterators cannot hold references across `yield` points. See [`from_generator`][] which has an `Unpin` bound on the generator it takes to produce an `Iterator`. +In contrast to full coroutines (currently unstable), `gen` blocks cannot hold references across `yield` points (see the open questions, and see [`from_coroutine`][] which has an `Unpin` bound on the generator it takes to produce an `Iterator`). -The `gen` keyword causes some fallout in the community, mostly around the `rand` crate, which has `gen` methods on its traits. +Reserving the `gen` keyword will require some adaptation from the ecosystem mostly due to the `rand` crate which has `gen` methods on its traits. -[`from_generator`]: https://doc.rust-lang.org/std/iter/fn.from_generator.html +[`from_coroutine`]: https://doc.rust-lang.org/1.77.0/core/iter/fn.from_coroutine.html # Rationale and alternatives [rationale-and-alternatives]: #rationale-and-alternatives ## Keyword -We could use `iter` as the keyword. I prefer `iter` because I connect generators with a more powerful scheme than plain `Iterator`s. The unstable `Coroutine` trait (which was previously called `Generator`) can do everything that `iter` blocks and `async` blocks can do and more. I believe connecting the `Iterator` trait with `iter` blocks is the right choice, but that would require us to carve out many exceptions for this keyword as `iter` is used for module names and method names everywhere (including libstd/libcore). It may not be much worse than `gen` (see also [the unresolved questions][unresolved-questions]). We may want to use `gen` for full on generators in the future. +We could use `iter` as the keyword. + +Due to unstable coroutines having originally been named "generators" within `rustc` and nightly Rust, some of the authors connect "generators" with this more powerful control flow construct that can do everything that `gen` blocks and `async` blocks can do and more. + +There is some appeal in syntactically connecting the `Iterator` trait with `iter` blocks, but that would require us to carve out many exceptions for this keyword as `iter` is widely used for module names and method names, not just in the ecosystem, but also in `libstd` and `libcore`. To what degree this might be worse than the situation for the `gen` keyword we leave as an open question. + +Not using the `gen` keyword now would leave open the possibility of using the `gen` keyword in the future for a kind of block that might produce types that implement a more powerful `Generator` trait (perhaps one that takes `self` by pinned reference) or that implement `Coroutine`. ## Do not do this -One alternative is to keep adding more helper methods to `Iterator`. It is already hard for new Rustaceans to be aware of all the capabilities of `Iterator`. Some of these new methods would need to be very generic. While it's not an `Iterator` example, [`array::try_map`][] is something that has very complex diagnostics that are hard to improve, even if it's nice once it works. +### Add more combinators + +One alternative is to instead add more helper methods to `Iterator`. -Users can use crates like [`genawaiter`](https://crates.io/crates/genawaiter) or [`propane`](https://crates.io/crates/propane) instead. `genawaiter` works on stable and provides `gen!` macro blocks that behave like `gen` blocks, but don't have compiler support for nice diagnostics or language support for the `?` operator. The `propane` crate uses the `Coroutine` trait from nightly and works mostly like `gen` would. +However, it is already difficult for new users of Rust to become familiar with all of the many existing methods on `Iterator`. Further, some of the new methods we might want would need to be quite generic (similar to [`array::try_map`][]). -The standard library includes [`std::iter::from_fn`][], which can be used in some cases, but as we saw in the example [above][motivation], often the improvement over writing out a manual implementation of `Iterator` is limited. +[`array::try_map`]: https://doc.rust-lang.org/1.77.0/std/primitive.array.html#method.try_map -[`std::iter::from_fn`]: https://doc.rust-lang.org/std/array/fn.from_fn.html -[`array::try_map`]: https://doc.rust-lang.org/std/primitive.array.html#method.try_map +### Use crates -## `return` statements `yield` one last element +We could suggest that people use crates like [`genawaiter`][], [`propane`][], or [`iterator_item`][] instead. `genawaiter` works on stable Rust and provides `gen!` macro blocks that behave like `gen` blocks, but it doesn't have compiler support for nice diagnostics or language support for the `?` operator. The `propane` and `iterator_item` crates use the `Coroutine` trait from nightly and work mostly like `gen` would (but therefore require unstable Rust). -Similarly to `try` blocks, trailing expressions could yield their element. +[`genawaiter`]: https://crates.io/crates/genawaiter +[`propane`]: https://crates.io/crates/propane +[`iterator_item`]: https://crates.io/crates/iterator_item -There would then be no way to terminate iteration as `return` statements would have to have a value that is `yield`ed before terminating iteration. +### Use `iter::from_fn` -We could do something magical where returning `()` terminates the iteration, so this code... +The standard library includes [`std::iter::from_fn`][] which can be used in some cases, but as we saw in the [motivating example][motivation], often the improvement over writing out a manual implementation of `Iterator` is limited. + +[`std::iter::from_fn`]: https://doc.rust-lang.org/1.77.0/std/array/fn.from_fn.html + +## Have trailing expressions yield one last element + +Trailing expressions could have a meaningful value that is yielded before terminating iteration. + +However, if we were to do this, we would need to add some other way to immediately terminate iteration without yielding a value. We could do something magical where returning `()` terminates the iteration, so that this code... ```rust fn foo() -> impl Iterator { @@ -208,23 +211,23 @@ fn foo() -> impl Iterator { } ``` -...could be a way to specify `std::iter::once(42)`. The issue I see with this is that this... +...could be a way to specify `std::iter::once(42)`. However, then logically this code... ```rust fn foo() -> impl Iterator { - gen { 42; } // note the semicolon + gen { 42; } // Note the semicolon. } ``` -...would then not return a value. +...would then not return a value due to the semicolon. -Furthermore this would make it unclear what the behaviour of this... +Further, this would make it unclear what the behavior of this... ```rust fn foo() -> impl Iterator { gen {} } ``` -...is supposed to be, as it could be either `std::iter::once(())` or `std::iter::empty::<()>()`. +...should be, as it could reasonably be either `std::iter::once(())` or `std::iter::empty::<()>()`. # Prior art [prior-art]: #prior-art @@ -529,7 +532,7 @@ In Racket, generators can be built using [`generator`][racket-generators] and `y (= (g) 10) ``` -Note that because of the expressive power of [`call/cc`][racket-callcc] (and continuations in general), generators can be written in Racket as a normal library. +Note that because of the expressive power of [`call/cc`][racket-callcc] (and continuations in general), generators can be written in Racket (and in other Scheme dialects) as a normal library. [racket-callcc]: https://docs.racket-lang.org/reference/cont.html [racket-generators]: https://docs.racket-lang.org/reference/Generators.html @@ -564,60 +567,47 @@ There may be benefits to having the type returned by `gen` blocks *not* implemen We can allow `gen` blocks to hold borrows across `yield` points. Should this be part of the initial stabilization? -There are a few options for how to do this, either before or after stabilization (though this list is probably not complete): +Here are some options for how we might do this, either before or after stabilization: -* Add a separate trait for pinned iteration that is also usable with `gen` and `for`. - * *Downside*: We would have very similar traits for the same thing. -* Backward-compatibly add a way to change the argument type of `Iterator::next`. - * *Downside*: It's unclear whether this is possible. -* Implement `Iterator` for `Pin<&mut G>` instead of for `G` directly (whatever `G` is here, but it could be a `gen` block). - * *Downside*: Now the `next` method takes a double-indirection as an argument `&mut Pin<&mut G>`, which may not optimize well sometimes. +- Add a separate trait for pinned iteration that is also usable with `gen` and `for`. + - *Downside*: We would have very similar traits for the same thing. +- Backward compatibly add a way to change the argument type of `Iterator::next`. + - *Downside*: It's unclear whether this is possible. +- Implement `Iterator` for `Pin<&mut G>` instead of for `G` directly (for some type `G` that can be produced by `gen` blocks). + - *Downside*: The `next` method would take a double indirected reference of the form `&mut Pin<&mut G>` which may present challenges for optimization. -This RFC is forward compatible with any such designs. However, if we were to stabilize `gen` blocks that could not hold borrows across `yield` points, this would be a serious usability limitation that users might find surprising. Consequently, whether we should choose to address this before stabilization is an open question. +If we were to stabilize `gen` blocks that could not hold borrows across `yield` points, this would be a serious usability limitation that users might find surprising. Consequently, whether we should choose to address this before stabilization is an open question. ## Keyword -Should we use `iter` as the keyword, as we're producing `Iterator`s? We could use `gen` as proposed in this RFC and later extend its abilities to more powerful generators. +Should we use `iter` as the keyword since we're producing `Iterator`s? -[playground](https://play.rust-lang.org/?version=nightly&mode=debug&edition=2021&gist=efeacb803158c2ebd57d43b4e606c0b5) - -```rust -#![feature(generators)] -#![feature(iter_from_generator)] - -fn main() { - let mut it = std::iter::from_generator(|| { - yield 1 - }); - - assert_eq!(it.next(), Some(1)); - assert_eq!(it.next(), None); - it.next(); // panics -} -``` +Alternatively, we could use `gen` as proposed in this RFC and then later extend its abilities to include those of more powerful generators or coroutines, thereby justifying use of the more general name. ## Contextual keyword -Popular crates (like `rand`) have methods called [`gen`][Rng::gen]. If we forbid those, we are forcing those crates to make a major version bump when they update their edition, and we are requiring any users of those crates to use `r#gen` instead of `gen` when calling that method. - -We could choose to use a contextual keyword and only forbid `gen` in: +Popular crates (like `rand`) have methods named [`gen`][Rng::gen]. If we reserve `gen` as a full keyword, users of Rust 2024 and later editions would need to call these methods as `r#gen` until these crates update to make some accommodation. -* bindings -* field names (due to destructuring bindings) -* enum variants -* type names +We could instead choose to make `gen` a contextual keyword and only forbid it in: -This should avoid any parsing issues around `gen` followed by `{` in expressions. +- bindings +- field names (due to destructuring bindings) +- enum variants +- type names [Rng::gen]: https://docs.rs/rand/latest/rand/trait.Rng.html#method.gen ## `Iterator::size_hint` -Should we try to compute a conservative `size_hint`? This will reveal information from the body of a generator, but at least for simple cases users will likely expect `size_hint` to not just be the default. It is backwards compatible to later add support for opportunistically implementing `size_hint`. +Should we try to compute a conservative `size_hint`? -## Implement other `Iterator` traits. +Doing this would reveal information from the body of a generator. But, at least for simple cases, users would likely expect `size_hint` to not just be the default. -Is there a possibility for implementing traits like `DoubleEndedIterator`, `ExactSizeIterator` at all? +It is backward compatible to later add support for opportunistically implementing `size_hint`. + +## Implement other `Iterator` traits + +Might we later want to or be able to implement traits such as `DoubleEndedIterator`, `ExactSizeIterator`, etc.? ## What to do about Rust 2015 and Rust 2018 @@ -631,12 +621,14 @@ Whether and how to make this feature available in Rust 2015 and Rust 2018, howev # Future possibilities [future-possibilities]: #future-possibilities -## `yield from` (forwarding operation) +## `yield from` (forwarding operator) -Python has the ability to `yield from` an iterator. Effectively this is syntax sugar for looping over all elements of the iterator and yielding them individually. There are infinite options to choose from if we want such a feature, so I'm listing general ideas: +Python has the ability to `yield from` an iterator. Effectively this is syntactic sugar for looping over all elements of the iterator and yielding each individually. There is a wide design space here, but some options are included in the following subsections. ### Do nothing, just use loops +Instead of adding special support for this, we could expect that users would write, e.g.: + ```rust for x in iter { yield x @@ -645,17 +637,17 @@ for x in iter { ### Language support -We could do something like postfix `yield`: +We could do something like postfix `yield`, e.g.: ```rust iter.yield ``` -Or we could use an entirely new keyword. +Alternatively, we could use an entirely new keyword. ### stdlib macro -We could add a macro to the standard library and prelude. The macro would expand to a `for` loop + `yield`. +We could add a macro to the standard library and prelude. The macro would expand to a `for` loop + `yield`. E.g.: ```rust yield_all!(iter) @@ -663,37 +655,39 @@ yield_all!(iter) ## Complete `Coroutine` support -We already have a `Coroutine` trait on nightly (previously called `Generator`) that is more powerful than the `Iterator` API could possibly be: +We have a `Coroutine` trait on nightly (previously called `Generator`) that is more powerful than the `Iterator` API could possibly be: -1. It uses `Pin<&mut Self>`, allowing self-references across yield points. -2. It has arguments (`yield` returns the arguments passed to it in the subsequent invocations). +1. `resume` takes `Pin<&mut Self>`, allowing self-references across yield points. +2. `yield` returns the argument passed to `resume`. -Similar to the ideas around `async` closures, I think we could argue for coroutines to be `gen` closures while `gen` blocks are a simpler concept that has no arguments and only captures variables. +We could perhaps argue for coroutines to be `gen` closures while leaving `gen` blocks as a simpler concept. -Either way, support for full coroutines should be discussed and implemented separately, as there are many more open questions around them beyond a simpler way to write `Iterator`s. +There are many open questions here, so we leave this to future work. ## `async` interactions -We could support using `await` in `gen async` blocks, similar to how we support `?` being used within `gen` blocks. We'd have similar limitations holding references held across `await` points as we do have with `yield` points. The solution space for `gen async` is large enough that I will not explore it here. This RFC's design is forward compatible with anything we decide on. +We could support using `await` in `gen async` blocks in a similar way to how we support `?` being used within `gen` blocks. Without a solution for self-referential generators, we'd have the limitation that these blocks could not hold references across `await` points. -At present it is only possible to have a `gen` block yield futures, but not `await` within it, similar to how you cannot write iterators that `await`, but that return futures from `next`. +The solution space here is large. This RFC is forward compatible with the solutions we can foresee, so we leave this to later work. ## `try` interactions We could allow `gen try fn foo() -> i32` to mean something akin to `gen fn foo() -> Result`. Whatever we do here, it should mirror whatever `try fn` means in the future. -## `gen fn`: +## `gen fn` -This does not introduce `gen fn`. The syntax design for them is fairly large and there are open questions around the difference between returning or yielding a type. +This RFC does not introduce `gen fn`. The syntax design space for this is large and there are open questions around the difference between returning or yielding a type. The options currently known include, e.g.: ```rust -fn foo(args) yield item -fn foo(args) yields item -fn foo(args) => item -fn* foo(args) -> item // or any of the `fn foo` variants for the item type -gen fn foo(args) // or any of the above variants for the item type -gen foo(args) // or any of the above variants for the item type -generator fn foo(args) // or any of the above variants for the item type +fn foo(..) yield .. { .. } +fn foo(..) yields .. { .. } +fn foo(..) => .. { .. } +// Each of the below may instead be combined +// with `yield`, `yields`, or `=>`. +fn* foo(..) -> .. { .. } +gen fn foo(..) -> .. { .. } +gen foo(..) -> .. { .. } +generator fn foo(..) -> .. { .. } ``` ## Implement `FusedIterator` From 7eacd063df88d949c1c6834e1750d9312a2a9e26 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Thu, 28 Mar 2024 05:50:29 +0000 Subject: [PATCH 42/45] Add Koka example to prior art The Koka language provides an interesting alternative data point for how generators and other powerful control flow constructs could work in a typed language such as Rust. Let's include an example in the prior art section. (Thanks to zesterer for asking for this.) --- text/3513-gen-blocks.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/text/3513-gen-blocks.md b/text/3513-gen-blocks.md index 5c39fe862ba..2defa4a09bc 100644 --- a/text/3513-gen-blocks.md +++ b/text/3513-gen-blocks.md @@ -556,6 +556,32 @@ main = putStrLn $ show $ take 5 $ oddDup [1..20] [clean-lang]: https://wiki.clean.cs.ru.nl/Clean [idris-lang]: https://www.idris-lang.org/ +## Koka + +The [Koka][] language, by contrast, does not lean on laziness. Instead, like Scheme, Koka provides powerful general control flow constructs from which generators, async, coroutines, and other such things fall out naturally. Unlike Scheme, these powerful control flow constructs are *typed* and are called effect handlers. E.g.: + +```koka +effect yield + fun yield(x : a) : () + +fun odd_dup(xs : list) : yield () + match xs + Cons(x,xx) -> + if x % 2 == 1 then + yield(x * 2) + odd_dup(xx) + Nil -> () + +fun main() : console () + with fun yield(i : int) + println(i.show) + list(1,20).odd_dup +``` + +Note that there is no library being used here and that `yield` is not a keyword or feature of the language. In Koka, the code above is all that is needed to express generators. + +[koka]: https://koka-lang.github.io/ + # Unresolved questions [unresolved-questions]: #unresolved-questions From a02190547f16286ae5a4fefa1f7e89a12675ff24 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Thu, 28 Mar 2024 19:17:44 +0000 Subject: [PATCH 43/45] Add Rust example to prior art Using the no-op `Waker`, we can express generators and coroutines in Rust. Let's close our list of prior art examples with that. --- text/3513-gen-blocks.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/text/3513-gen-blocks.md b/text/3513-gen-blocks.md index 2defa4a09bc..bfff8676c7e 100644 --- a/text/3513-gen-blocks.md +++ b/text/3513-gen-blocks.md @@ -582,6 +582,31 @@ Note that there is no library being used here and that `yield` is not a keyword [koka]: https://koka-lang.github.io/ +## Rust + +In Rust, `async` blocks are built on top of the coroutine transformation. Using a no-op `Waker`, it's possible to expose this transformation. With that, we can build generators. Without the assistance of macros, the result looks like this: + +```rust +let odd_dup = |xs| { + Gen::new(async move |mut y| { + for x in xs { + if x % 2 == 1 { + y.r#yield(x * 2).await; + } + } + }) +}; + +let odd_dup = pin!(odd_dup(1u8..20)); +let odd_dup = odd_dup.init(); + +for (i, x) in odd_dup.enumerate() { + assert_eq!((i as u8 * 2 + 1) * 2, x); +} +``` + +Crates such as [`genawaiter`][] use this technique. + # Unresolved questions [unresolved-questions]: #unresolved-questions From 354abf6626fab98787bce1c7e44e2bb7d974b762 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Fri, 29 Mar 2024 05:43:52 +0000 Subject: [PATCH 44/45] Add note about `return yield EXPR` Under this RFC, it's possible to yield one last value concisely with `return yield EXPR`. Let's make a note of that. (Thanks to Nemo157 for pointing this out and to pnkfelix for suggesting that this be noted in the RFC.) --- text/3513-gen-blocks.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/text/3513-gen-blocks.md b/text/3513-gen-blocks.md index bfff8676c7e..fb86109aa76 100644 --- a/text/3513-gen-blocks.md +++ b/text/3513-gen-blocks.md @@ -229,6 +229,8 @@ fn foo() -> impl Iterator { gen {} } ...should be, as it could reasonably be either `std::iter::once(())` or `std::iter::empty::<()>()`. +Note that, under this RFC, because `return` within `gen` blocks accepts an argument of type `()` and `yield` within `gen` blocks returns the `()` type, it is possible to yield one last element concisely with `return yield EXPR`. + # Prior art [prior-art]: #prior-art From 22f726707af999eddf6105d64bdbc78173e8b3ea Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Sat, 30 Mar 2024 05:26:55 +0000 Subject: [PATCH 45/45] Use RLE as a stronger motivating example The motivating example we had given for `gen` blocks admitted too easy an implementation with existing stable iterator combinators. Let's make the example more *motivating* by showing a simple algorithm, run-length encoding, that's more difficult to implement in other ways. (Thanks to Ralf Jung for pointing out the need for a better example.) --- text/3513-gen-blocks.md | 119 +++++++++++++++++++++++++++++----------- 1 file changed, 87 insertions(+), 32 deletions(-) diff --git a/text/3513-gen-blocks.md b/text/3513-gen-blocks.md index fb86109aa76..fe27a819115 100644 --- a/text/3513-gen-blocks.md +++ b/text/3513-gen-blocks.md @@ -13,53 +13,108 @@ This RFC reserves the `gen` keyword in the Rust 2024 edition for generators and Writing iterators manually can be painful. Many iterators can be written by chaining together iterator combinators, but some need to be written with a manual implementation of `Iterator`. This can push people to avoid iterators and do worse things such as writing loops that eagerly store values to mutable state. With `gen` blocks, we can now write a simple `for` loop and still get a lazy iterator of values. -By way of example, consider these ways of expressing the same function: +By way of example, consider these alternate ways of expressing [run-length encoding][]: -```rust -// This example uses iterator combinators. -fn odd_dup(xs: impl IntoIterator) -> impl Iterator { - xs.into_iter().filter(|x| x.is_odd()).map(|x| x * 2) -} +[run-length encoding]: https://en.wikipedia.org/wiki/Run-length_encoding -// This example uses `iter::from_fn`. -fn odd_dup(xs: impl IntoIterator) -> impl Iterator { - let mut xs = xs.into_iter(); - std::iter::from_fn(move || { - while let Some(x) = xs.next() { - if x.is_odd() { - return Some(x * 2); +```rust +// This example uses `gen` blocks, introduced in this RFC. +fn rl_encode>( + xs: I, +) -> impl Iterator { + gen { + let mut xs = xs.into_iter(); + let (Some(mut cur), mut n) = (xs.next(), 0) else { return }; + for x in xs { + if x == cur && n < u8::MAX { + n += 1; + } else { + yield n; yield cur; + (cur, n) = (x, 0); } } - None - }) + yield n; yield cur; + }.into_iter() } // This example uses a manual implementation of `Iterator`. -fn odd_dup(xs: impl IntoIterator) -> impl Iterator { - struct OddDup(T); - impl> Iterator for OddDup { - type Item = u32; - fn next(&mut self) -> Option { - while let Some(x) = self.0.next() { - if x.is_odd() { - return Some(x * 2) +fn rl_encode>( + xs: I, +) -> impl Iterator { + struct RlEncode> { + into_xs: Option, + xs: Option<::IntoIter>, + cur: Option<::Item>, + n: u8, + yield_x: Option<::Item>, + } + impl> Iterator for RlEncode { + type Item = u8; + fn next(&mut self) -> Option { + let xs = self.xs.get_or_insert_with(|| unsafe { + self.into_xs.take().unwrap_unchecked().into_iter() + }); + if let Some(x) = self.yield_x.take() { + return Some(x); + } + loop { + match (xs.next(), self.cur) { + (Some(x), Some(cx)) + if x == cx && self.n < u8::MAX => self.n += 1, + (Some(x), Some(cx)) => { + let n_ = self.n; + (self.cur, self.n) = (Some(x), 0); + self.yield_x = Some(cx); + return Some(n_); + } + (Some(x), None) => { + (self.cur, self.n) = (Some(x), 0); + } + (None, Some(cx)) => { + self.cur = None; + self.yield_x = Some(cx); + return Some(self.n); + } + (None, None) => return None, } } - None } } - OddDup(xs.into_iter()) + RlEncode { + into_xs: Some(xs), xs: None, cur: None, n: 0, yield_x: None, + } } -// This example uses `gen` blocks, introduced in this RFC. -fn odd_dup(xs: impl IntoIterator) -> impl Iterator { - gen { - for x in xs { - if x.is_odd() { - yield x * 2; +// This example uses `iter::from_fn`. +fn rl_encode>( + xs: I, +) -> impl Iterator { + let (mut cur, mut n, mut yield_x) = (None, 0, None); + let (mut into_xs, mut xs) = (Some(xs), None); + core::iter::from_fn(move || loop { + let xs = xs.get_or_insert_with(|| unsafe { + into_xs.take().unwrap_unchecked().into_iter() + }); + if let Some(x) = yield_x.take() { + return Some(x); + } + match (xs.next(), cur) { + (Some(x), Some(cx)) if x == cx && n < u8::MAX => n += 1, + (Some(x), Some(cx)) => { + let n_ = n; + (cur, n) = (Some(x), 0); + yield_x = Some(cx); + return Some(n_); + } + (Some(x), None) => (cur, n) = (Some(x), 0), + (None, Some(cx)) => { + cur = None; + yield_x = Some(cx); + return Some(n); } + (None, None) => return None, } - }.into_iter() + }) } ```