From 6e0289c97b0593be2b5628ac2c769559a8c57954 Mon Sep 17 00:00:00 2001 From: Jason Walton Date: Mon, 24 Apr 2023 15:40:28 -0400 Subject: [PATCH] More proofreading. --- docs/ch00-intro.md | 2 +- docs/ch09-error-handling.md | 6 + docs/ch12-io-project-cli.md | 2 +- docs/ch15-smart-pointers.md | 128 +++++---- docs/ch16-fearless-concurrency.md | 42 +-- docs/ch17-object-oriented-features.md | 40 ++- docs/ch18-patterns-and-matching.md | 9 +- docs/ch19/ch19-01-unsafe.md | 18 +- docs/ch19/ch19-02-advanced-traits.md | 54 ++-- docs/ch19/ch19-03-advanced-types.md | 10 +- ...ch19-04-advanced-functions-and-closures.md | 4 +- docs/ch19/ch19-05-macros.md | 16 +- .../ch20-01-single-threaded-web-server.md | 10 +- .../ch20/ch20-02-multi-threaded-web-server.md | 23 +- docs/ch20/ch20-03-graceful-shutdown.md | 7 +- docs/ch21-async.md | 32 +-- docs/images/conslist.svg | 271 ++++++++++++++++++ examples/ch20-graceful-shutdown/src/lib.rs | 1 + examples/ch20-graceful-shutdown/src/main.rs | 2 +- .../ch20-multi-threaded-web-server/src/lib.rs | 1 + .../src/main.rs | 2 +- .../src/main.rs | 2 +- 22 files changed, 493 insertions(+), 189 deletions(-) create mode 100755 docs/images/conslist.svg diff --git a/docs/ch00-intro.md b/docs/ch00-intro.md index 7dfa658..c5d7854 100644 --- a/docs/ch00-intro.md +++ b/docs/ch00-intro.md @@ -7,7 +7,7 @@ hide_title: true

The Rust Book (Abridged)

-

v0.0.2 - Draft

+

v0.1.0 - Draft

By Jason Walton

Based on "The Rust Programming Language" by Steve Klabnik and Carol Nichols.

diff --git a/docs/ch09-error-handling.md b/docs/ch09-error-handling.md index d7bc57d..9a0bc8a 100644 --- a/docs/ch09-error-handling.md +++ b/docs/ch09-error-handling.md @@ -23,6 +23,12 @@ fn main() { When a panic occurs in the main thread, it halts the program. If the `RUST_BACKTRACE=1` environment variable is set, then the program will also print a stack trace showing where the panic happened, although this only works if the binary contains debug symbols. (If a panic occurs in another thread, it will only halt that thread. See [chapter 16][chap16].) +:::tip + +There's also a `todo!` macro which works just like the `panic!` macro, which can be used to mark places in your code where you have yet to fill in an implementation. + +::: + ### Unwinding the Stack or Aborting in Response to a Panic There are two options for what happens when a panic occurs. By default, the program starts _unwinding_, which means it starts walking back up the stack, freeing memory and cleaning up data. The alternative is _aborting_ in which the program just immediately halts and lets the OS clean up everything (if you've ever written a C program, you've probably at some point seen the dreaded message "segmentation fault (core dumped)" - aborting is a bit like this). diff --git a/docs/ch12-io-project-cli.md b/docs/ch12-io-project-cli.md index 4a6e93b..909ad90 100644 --- a/docs/ch12-io-project-cli.md +++ b/docs/ch12-io-project-cli.md @@ -160,7 +160,7 @@ use std::{error::Error, fs}; pub fn run(config: Config) -> Result<(), Box> { let contents = fs::read_to_string(config.file_path)?; - // TODO: Implement me! + todo!("Implement me!"); Ok(()) } diff --git a/docs/ch15-smart-pointers.md b/docs/ch15-smart-pointers.md index 6af49fb..cb5b367 100644 --- a/docs/ch15-smart-pointers.md +++ b/docs/ch15-smart-pointers.md @@ -2,15 +2,15 @@ In C++, whenever we want to store an object on the heap, we `new` that object to allocate some memory. At some later point in time, we have to `delete` that memory. This is much like `malloc` and `free` in standard C. -C++ has a few different "smart pointers" that will delete that memory for you at the appropriate time. The most commonly used is probably `shared_ptr`, which keeps a "reference count". Every time your copy a `shared_ptr` it increments the reference count, and every time one is destroyed it decrements the count. Once the count reaches 0, `shared_ptr` knows there are no more references to the underlying memory it is safe to be freed. +C++ has a few different "smart pointers" that will delete that memory for you at the appropriate time. The most commonly used is probably `shared_ptr`, which keeps a _reference count_ on the heap. Every time your clone a `shared_ptr` it increments the reference count (which is shared between all the clones), and every time one is destroyed it decrements the count. Once the count reaches 0, `shared_ptr` knows there are no more references to the underlying memory so it is safe to be freed. -Rust has a variety of smart pointer objects as well, which allow us to store values on the heap, including `Rc` which works much like `shared_ptr` and allows us to share ownership of a value across multiple variables in code. This chapter will explore a few of the different smart pointer implementations in Rust and where you might want to use them. +Rust has a variety of smart pointer objects as well, which allow us to store values on the heap, including `Rc` which works much like C++'s `shared_ptr` and allows us to effectively share ownership of a value across multiple variables in code. This chapter will explore a few of the different smart pointer implementations in Rust and where you might want to use them. Smart pointers in Rust generally implement the `Drop` trait (so they can run some custom code when they are dropped, like decrementing a reference count) and the `Deref` trait (which lets a smart pointer be used in place of a reference to the underlying value). ## 15.1 - Using `Box` to Point to Data on the Heap -`Box` is not a very exciting smart pointer. It's perhaps the "least smart" of the smart pointers. `Box` lets us store a single piece of data on the heap instead of on the stack: +`Box` is perhaps the "least smart" of the smart pointers. `Box` lets us store a single piece of data on the heap instead of on the stack: ```rust title="src/main.rs" fn main() { @@ -21,11 +21,11 @@ fn main() { Here "5" gets stored as four bytes on the heap instead of as four bytes on the stack. Notice that we can use `b` exactly like a `&i32` when we pass it to `println!`. -Why would we want to do this, though? When we're passing data around on the stack, Rust has to know the size of that data at compile time. When we pass an `i32` as a parameter, for example, Rust knows that it's going to need 4 bytes on the stack to hold that parameter. But sometimes we don't know the size of a value ahead of time, and this is where `Box` is useful - examples would be recursive data structures (which can be "infinitely" large since they can contain more of themselves) and trait objects, where we want to claim that a parameter implements a specific trait but we don't care what concrete type the parameter is (we'll talk more about these in [chapter 17](./ch17-object-oriented-features.md#172---using-trait-objects-that-allow-for-values-of-different-types)). +Why would we want to do this? When we're passing data around on the stack, Rust has to know the size of that data at compile time. When we pass an `i32` as a parameter, for example, Rust knows that it's going to need 4 bytes on the stack to hold that parameter. But sometimes we don't know the size of a value ahead of time, and this is where `Box` is useful - examples would be recursive data structures (which can be "infinitely" large since they can contain more of themselves) and trait objects, where we want to claim that a parameter implements a specific trait but we don't care what concrete type the parameter is (we'll talk more about these in [chapter 17](./ch17-object-oriented-features.md#172---using-trait-objects-that-allow-for-values-of-different-types)). -In these cases, instead of passing the value directly on the stack, we pass the `Box` on the stack and put the unknown-sized value on the heap. +In these cases, instead of passing the value directly on the stack, we pass the `Box` on the stack and put the unknown-sized value on the heap. The size of `Box` is known at compile time, so the compiler can do it's thing. -Another example where `Box` would be useful is where you have some particularly large piece of data that you want to pass around. Values passed on the stack are passed-by-copy, and copying large amounts of data can be inefficient. Storing the data on the heap lets us pass around copies of the relatively small `Box` instead. +Another example where `Box` would be useful is where you have some particularly large piece of data that you want to pass around. Values passed on the stack are pass-by-copy, and copying large amounts of data can be inefficient. Storing the data on the heap lets us pass around copies of the relatively small `Box` instead. ### Enabling Recursive Types with Boxes @@ -50,7 +50,7 @@ fn main() { This is probably not a data structure you'd actually want to use in Rust, but it's a recursive data structure that's convenient for this example. If you try to compile the above, it will fail, because Rust can't work out the size of the `list` variable to store it on the stack. -For an enum, Rust will allocate enough memory to store the largest of the enum's variants. Here the largest is going to be `Cons`, which can hold an `i32` and a `List`, so it's four bytes long plus the size of a `List`. But this is a recursive definition - `sizeof(List) = 4 + sizeof(List)`. This makes `rustc` unhappy. +For an enum, Rust will allocate enough memory to store the largest of the enum's variants. Here the largest is going to be `Cons`, which can hold an `i32` and a `List`, so it's four bytes long plus the size of a `List`. But this is a recursive definition - `sizeof(List) = 4 + sizeof(List)`. This makes `rustc` an unhappy compiler. The solution is to move this to the heap: @@ -67,11 +67,11 @@ fn main() { } ``` -Now `sizeof(Cons) = 4 + sizeof(Box)`, and the size of `Box` doesn't depend on the size of `` (since that part is stored on the heap), so this is something we can work out. +Now `sizeof(Cons) = 4 + sizeof(Box)`, and the size of `Box` doesn't depend on the size of `` (since that part is stored on the heap), so the size of the `list` variable is known at compile time. -## 15.2 Treating Smart Pointers Like Regular References with the Deref Trait +## 15.2 Treating Smart Pointers Like Regular References with the `Deref` Trait -In this section we're going to implement our own smart pointer called `MyBox`. Our smart pointer won't actually store anything on the heap, it will just store things on the stack, because what we really want to do here is explore the `Deref` trait. +In this section we're going to implement our own smart pointer called `MyBox`. Our smart pointer won't actually store anything on the heap, it will just store things on the stack. It will be even less smart than a regular `Box`, but it will give us a chance to explore the `Deref` trait. ### Following the Pointer to the Value @@ -87,11 +87,11 @@ fn main() { } ``` -Here `x` is of type `i32`, but `y` is of type `&i32`. `y` is essentially a pointer to `x`. We can assert that x is equal to 5, but in order to get to the value of `y` we have to _dereference_ it to get to the value that `y` points to. +Here `x` is of type `i32`, but `y` is of type `&i32`. `y` is essentially a pointer to `x`. We can assert that x is equal to 5, but in order to get to the value of `y` we have to _dereference_ it to get to the value that `y` points to. Rust will automatically dereference a value for you in many places, so the `*` operator doesn't get much use in Rust, but there are places (like in this example) where it is required. :::info -If you're coming from a language like C or Go, this is probably second nature to you. If you're coming from JavaScript, this might be a new concept. `y` here points to the memory that stores the `x` value, so `*y` is basically an alias for `x`. If `x` and `y` were mutable, we could use `*y` to change x: +If you're coming from a language like C or Go, this is probably second nature to you. If you're coming from JavaScript, this might be a new concept. Because `y` here points to the memory that stores the `x` value, you can think about `*y` as basically an alias for `x`. If `x` and `y` were mutable, we could use `*y` to change x because it points to the memory where `x` is stored: ```rust title="src/main.rs" fn main() { @@ -102,25 +102,29 @@ fn main() { } ``` -A reference in Rust is a little bit like a `Ref` from React that points to an object, if that helps: +Another way to think about this is that a reference in Rust is a little bit like a `Ref` from React that points to an object: ```ts title="typescript.ts" +interface Num { + value: number; +} + interface Ref { current: T; } function main() { - const x = { value: 5 }; - const y = { current: x }; + const x: Num = { value: 5 }; + const y: Ref = { current: x }; assert.equal(x.value, 5); assert.equal(y.current.value, 5); } ``` -::: +In our Rust example, the `*y` is basically doing the same thing as `y.current` in our TypeScript example. -In our Rust example, the `*y` is basically doing the same thing as `y.current` in our TypeScript example. Rust will automatically dereference a value for you in many places, so the `*` operator doesn't get much use in Rust, but there are places (like in this example) where it is required. +::: ### Using `Box` Like a Reference @@ -138,7 +142,7 @@ fn main() { ### Defining Our Own Smart Pointer -Let's create our own smart pointer and learn how the `Deref` trait works. To do this we'll create a simple "pointer" that stores the value in a generic named tuple: +Let's create our own smart pointer so we can implement the `Deref` trait. To do this we'll create a simple "pointer" that stores a value in a generic named tuple: ```rust struct MyBox(T); @@ -150,7 +154,7 @@ impl MyBox { } ``` -And then, in order to let us use the `*` operator on `MyBox`, we implement the `Deref` trait. This trait has only one required method we have to implement called `deref` which borrows self and returns the inner value: +And then, in order to let us use the `*` operator on `MyBox`, we implement the `Deref` trait. This trait has only one required method for us to implement called `deref`, which borrows self and returns the inner value: ```rust use std::ops::Deref; @@ -171,14 +175,14 @@ And now we can do: ```rust title="src/main.rs" fn main() { let x = 5; - let y = Box::new(x); + let y = MyBox::new(x); assert_eq!(5, x); assert_eq!(5, *y); } ``` -When we write `*y` here, what's actually happening is Rust is going to replace this with `*(y.deref())`. +When we write `*y` here (or on any object that implements `Deref`), what's actually happening is Rust is going to replace this with `*(y.deref())`. ### Implicit Deref Coercions with Functions and Methods @@ -195,9 +199,9 @@ fn main() { } ``` -The reason this works is because of a feature called _deref coercion_. If we pass a ref of the wrong type to a function, but that ref implements `Deref`, then Rust will call `deref` on the value (possibly more than once) to convert it to the correct type. For example, `String` [implements the `Deref` trait](https://github.com/rust-lang/rust/blob/b0884a3528c45a5d575e182f407c759d243fdcba/library/alloc/src/string.rs#L2442-L2450) and returns a `&str`, so we can convert `&String` to `&str`. +The reason this works is because of a feature called _deref coercion_. If we call a function that takes a reference to type `X`, but instead we pass a reference to type `Y`, then if `Y` implements `Deref` Rust will call `deref` on the value we passed in (possible more than once!) to convert it into a reference to the correct type. For example, `String` [implements the `Deref` trait](https://github.com/rust-lang/rust/blob/b0884a3528c45a5d575e182f407c759d243fdcba/library/alloc/src/string.rs#L2442-L2450) and returns a `&str`, so Rust can automatically convert a `&String` to `&str`. -If we were to pass a `&MyBox` to the `foo` function above, Rust would convert it to a `&String` via `MyBox`'s `deref` method, and then into a `&str` via `String`'s `deref` method. +If we were to pass a `&MyBox` to the `hello` function above, Rust would convert it to a `&String` via `MyBox`'s `deref` method, and then into a `&str` via `String`'s `deref` method. If Rust didn't implement deref coercion, we'd have to write something like: @@ -214,13 +218,17 @@ And no one wants to write that. The `Deref` trait only works with immutable references, but there is also a `DerefMut` trait for mutable references. Rust will do deref coercion in three cases: -Rust does deref coercion when it finds types and trait implementations in three cases. If you have a `&T` and you want an `&U`, then if `T` implements `Deref` to type `U`, then rust will take care of this for you, just like we saw above. The same is true if you have a `&mut T` and want an `&mut U`, but here the conversion will happen via the `DerefMut` trait instead. If you want to mix these, if you have a `&mut T` and you want a `&U`, then Rust will use the `Deref` trait on type `T` to convert to a `U`. Obviously ownership rules prevent Rust from automatically converting a `&T` to a `&mut U`. +- If you have a `&T` and you want an `&U`, then if `T` implements `Deref` to type `&U`, then rust will take care of this for you, just like we saw above. +- If you have a `&mut T` and want an `&mut U`, this will happen in exactly the same way but here the conversion will happen via the `DerefMut` trait instead. +- If you have a `&mut T` and you want a `&U`, then Rust will use the `Deref` trait on type `T` to convert the mutable ref to an immutable `&U`. + +Obviously ownership rules prevent Rust from automatically converting a `&T` to a `&mut U`. ## 15.3 - Running Code on Cleanup with the `Drop` trait -The `Drop` trait allows us to specify some code that must be run whenever a struct is dropped (when it goes out of scope). The `Drop` trait is almost always used when implementing a smart pointer. `Box` implements `Drop` so it can clean up the memory it is using on the heap. The `Rc` type (which will talk about in the [next section](#154---rct-the-reference-counted-smart-pointer)) implements `Drop` so it can decrement the reference count. +The `Drop` trait allows us to specify some code that must be run whenever a struct is dropped (i.e. when it goes out of scope). The `Drop` trait is almost always used when implementing a smart pointer. `Box` implements `Drop` so it can clean up the memory it is using on the heap. The `Rc` type (which will talk about in the [next section](#154---rct-the-reference-counted-smart-pointer)) implements `Drop` so it can decrement a reference count. -`Drop` can also be used like [RAII in C++](https://en.cppreference.com/w/cpp/language/raii). If you have a struct that opens a network connection in its constructor, you can implement the `Drop` trait to ensure the network connection is closed when the struct is dropped, ensuring you won't leak any resources. +`Drop` can also be used to clean up other resources. If you have a struct that opens a network connection in its constructor, you can implement the `Drop` trait to ensure the network connection is closed when the struct is dropped, ensuring you won't leak any resources. This is a pattern borrowed from C++ called ["Resource Acquisition Is Initialization" or RAII](https://en.cppreference.com/w/cpp/language/raii). The `Drop` trait is included in the prelude, and has only one required method named `drop`. Let's see an example: @@ -243,6 +251,9 @@ fn main() { data: String::from("other stuff"), }; println!("CustomSmartPointers created."); + + // `drop` is called automatically on `c` + // and `d` here. } ``` @@ -250,7 +261,7 @@ If you run this, you'll see the `drop` method gets called automatically for `c` ### Dropping a Value Early with `std::mem::drop` -Sometimes we may want to drop a value earlier than it would normally get dropped at the end of the scope. For example, if we're using RAII to acquire some resource like a lock or a network connection, we may want to drop a value to release that resource before we reach the end of the function. +Sometimes we may want to drop a value earlier than it would normally get dropped at the end of the scope. For example, if we're using the RAII pattern to acquire some resource like a lock or a network connection, we may want to drop that value early to release that resource before we reach the end of the function. We cannot simply call the `drop` method on a type, however, as the Rust compiler is going to call it for us, and we don't want to _double free_ any memory or resources by calling `drop` twice. Instead we can call `std::mem::drop`, passing in the value we want to drop: @@ -267,13 +278,11 @@ fn main() { ## 15.4 - `Rc`, the Reference Counted Smart Pointer -`Rc` is a reference counting smart pointer (this is why it's named `Rc`), conceptually very similar to C++'s `shared_ptr`. Note that `Rc` isn't thread safe - we'll talk about how to solve this problem in a multithreaded program in [chapter 16][chap16]. - -`Rc` is used in the case where we have some data we want to use in multiple places, but we're not sure at compile time who is going to be finished with this data first. +`Rc` is a _reference counting_ smart pointer (this is why it's named `Rc`), conceptually very similar to C++'s `shared_ptr`. Note that `Rc` isn't thread safe - we'll talk about a multithreaded alternative called `Arc` in [chapter 16][chap16]. `Rc` is used in the case where we have some data we want to use in multiple places, but we're not sure at compile time who is going to be finished with this data first. -If we model a graph as a collection of edges and nodes, then we might decide that an edge owns the nodes it connects to, but obviously any given node could connect to more than one edge, and in Rust any piece of data can only have one owner. But what we want want is for a node to be dropped once it's no longer attached to any edges. +If we model a graph as a collection of edges and nodes, then we might decide that an edge owns the nodes it connects to. But, any node could be connected to by multiple edges, and in Rust any piece of data can only have one owner. What we want want is for a node to be dropped once it's no longer attached to any edges, so we want some kind of shared ownership. -The idea behind `Rc` is that it allocates some data on the heap and a counter on the heap, and sets that counter to 1. Whenever we make a copy of an `Rc`, the copy points to the same memory and the same counter, an increments the counter by one. Whenever an `Rc` is dropped, it decrements the counter by 1 and if the counter is 0 then it can safely free the memory on the heap. Each instance of `Rc` is only owned by one variable, just like normal Rust ownership rules, and since `Rc` is quite a small data structure - really just a pointer - it is quite inexpensive to copy, so we're free to copy it and have lots of owners. +The idea behind `Rc` is that it allocates some data on the heap and a counter on the heap, and sets that counter to 1. Whenever we make a clone of an `Rc`, the clone points to the same memory and the same counter, and increments the counter by one. Whenever an `Rc` is dropped, it decrements the counter by 1 and if the counter is 0 then it can safely free the memory on the heap. Each instance of `Rc` is only owned by one variable, just like normal Rust ownership rules. `Rc` is quite a small data structure - really just a pointer - so it is quite inexpensive to copy. The end result is something that looks and behaves a lot lie multiple ownership. ### Using `Rc` to Share Data @@ -294,7 +303,11 @@ fn main() { } ``` -We have list `a`, and then we make this the tail of both list `b` and list `c`. The problem we're going to run into here is that the `Box` type owns the value we put in it, so when we create `b` we move `a` into a `Box`. When we try to create `c`, `a` has already been moved, so we can't move it again. +We have list `a`, and then we make this the tail of both list `b` and list `c`. We're essentially trying to create this data structure: + +![Diagram of Cons list](./images/conslist.svg) + +The problem we're going to run into here is that the `Box` type owns the value we put in it, so when we create `b` we move `a` into a `Box`. When we try to create `c`, `a` has already been moved, so we can't move it again. We could fix this particular example with some lifetime references, but that won't work in all situations, so instead we'll fix this with `Rc`: @@ -316,11 +329,15 @@ fn main() { } ``` -Now instead of `b` and `c` taking ownership of `a`, then make a clone of `a` which increments `Rc`'s internal reference count by one. We could have called `a.clone()` instead of `Rc::clone(&a)` here - these do the same thing. The reason we don't is more for reasons of convention. For most types, `a.clone()` would perform a deep copy of the value and all of it's data, so `a.clone()` stands out to the experienced Rust programmer as a potential performance problem. Here we use `Rc::clone(&a)` instead to signal to the reader "No, this is OK, we're just cloning an `Rc`. +Now instead `a` is an `Rc`, and instead of `b` and `c` taking ownership of `a`, they each make a clone of `a` instead. Each clone increments `Rc`'s internal reference count by one. -We've also shown here that we can get the reference count out of an `Rc`. Try experimenting with the above code and see what the count is at various points during execution. If you create a scope around `c`, you can see the reference count decrement when `c` is dropped. +:::info -You may have noticed that we're calling `Rc::strong_count` to get the reference count. If you know what a weak reference is, you'll be unsurprised to learn there's also an `Rc::weak_count`, which we'll hear about more a little later in this chapter. (TODO: link) +We could have called `a.clone()` instead of `Rc::clone(&a)` here - these do the same thing. We use `Rc::clone` for reasons of convention. For most types, `a.clone()` would perform a deep copy of the value and all of it's data, so a call to `a.clone()` stands out to the experienced Rust programmer as a potential performance problem. Here we use `Rc::clone(&a)` instead to signal to the reader "This is OK, we're just cloning an `Rc`. + +::: + +We've also shown here that we can get the reference count out of an `Rc`. Try experimenting with the above code and see what the count is at various points during execution. If you create a scope around `c`, you can see the reference count decrement when `c` is dropped. You may have noticed that we're calling `Rc::strong_count` to get the reference count. If you know what a weak reference is, you'll be unsurprised to learn there's also an `Rc::weak_count`, which we'll hear about more a little [later in this chapter](#preventing-reference-cycles-turning-an-rct-into-a-weakt). Since there are multiple references to the data held by `Rc`, then by Rust ownership rules, this data is going to be read only - we can't get a mutable reference to it. @@ -334,15 +351,13 @@ pub fn to_lowercase(&self) -> String { // --snip-- ``` -Oh... wait... `to_lowercase` borrows an immutable reference to `self`, so we can't mutate self. And obviously we can't change the signature of `to_lowercase` without breaking a lot of code. - -The borrow checker stops us here, because what we're doing isn't _safe_ - we're mutating an immutable data structure and this could cause a bug elsewhere in the code. But... you an I are smarter than the compiler here. You and I know that incrementing this counter and then reading it out somewhere else isn't going to hurt anything. From any code outside the standard library, this `String` will still look like an immutable `String`. +This would work in most languages, but in Rust `to_lowercase` borrows an immutable reference to `self`, so we can't mutate self. And obviously we can't change the signature of `to_lowercase` without breaking a lot of code. -Rust has a way of doing such things which is called writing _unsafe_ code, for cases where we're smarter than the compiler, and we know we can do something correctly. (Of course sometimes we only _think_ we're smarter than the compiler, and what we're doing is something that is both unsafe and incorrect, which will usually end in a panic at runtime, but in this case, totally correct.) +The borrow checker stops us here, because what we're doing isn't _safe_ - we're mutating an immutable data structure and this could cause a bug elsewhere in the code. But... you and I are smarter than the compiler here. You and I know that incrementing this counter and then reading it out somewhere else isn't going to hurt anything. From any code outside the standard library, this `String` will still look like an immutable `String`. This code isn't _safe_ from a Rust compiler perspective, but neither is in _incorrect_. -It's important to note that "unsafe" doesn't necessarily mean incorrect. There are plenty of C programs in the world performing useful tasks that are correct (or reasonably correct) and C doesn't even have a borrow checker, so all C code is unsafe as far as a Rust programmer is concerned. +Rust has a way of doing such things which is called writing _unsafe_ code, for cases where we're smarter than the compiler, and we know we can do something correctly. (Of course sometimes we only _think_ we're smarter than the compiler, and what we're doing is something that is both unsafe and incorrect, which will usually end in a panic at runtime. But in this case, where we're incrementing a counter, we're totally correct.) -In this section we're not going to write any unsafe code ourselves, but we're going to make use of `RefCell` which is implemented with unsafe code. `RefCell` is used in a patter called _interior mutability_ which is essentially exactly what we just described with our `to_lowercase` example. We have some object that we want to look like an immutable object from the outside world, but we want to have some part of it that's mutable for our own purposes on the interior. +In this section we're not going to write any unsafe code ourselves (see [chapter 19](./ch19/ch19-01-unsafe.md)), but we're going to make use of `RefCell` which is implemented with unsafe code. `RefCell` is used in a pattern called _interior mutability_ which is essentially exactly what we just described with our `to_lowercase` example. We have some object that we want to look like an immutable object from the outside world, but we want to have some internal state we can still mutate. ### A Use Case for Interior Mutability: Mock Objects @@ -393,7 +408,7 @@ where } ``` -We want to write a test case for `set_value`. To do this we'll create a `MockMessenger` that doesn't actually send a message, but just records all the messages it would have sent. We can create a private `Vec` to store all these messages for testing purposes. But just like our `to_lowercase` example above we have a problem: in order to implement the `Messenger` trait, the `send` method on our `MockMessenger` must borrow `self` immutably. We'll use `RefCell` to implement the interior mutability pattern here: +We want to write a test case for `set_value`. To do this we'll create a `MockMessenger` that doesn't actually send a message, but just records all the messages it would have sent. We can create a private `Vec` to store all these messages for testing purposes. But just like our `to_lowercase` example above we have a problem: in order to implement the `Messenger` trait, the `send` method on our `MockMessenger` must borrow `self` immutably, which means we can't mutate our vector. We'll use `RefCell` to implement the interior mutability pattern here: ```rust #[cfg(test)] @@ -428,17 +443,17 @@ mod tests { } ``` -`RefCell` is essentially a new kind of smart pointer. It stores some value on the heap, but it lets us call `borrow` to get an immutable reference to that something and `borrow_mut` to get a mutable reference, even though the `RefCell` itself is immutable. You can think of `RefCell` as two `Rc` in one - it has a reference count for immutable references, and a second reference count for mutable references (which is always either 0 or 1). +`RefCell` is essentially a new kind of smart pointer. It stores some value on the heap, but it lets us call `borrow` to get an immutable reference to that something and `borrow_mut` to get a mutable reference, even though the `RefCell` itself is immutable. -`RefCell` enforces the exact same safety rules as the borrow checker does. You can only have a single mutable reference at a time, and if you have one you can't also have any immutable references. The key difference is that normally these checks happen at compile time, but with `RefCell` they happen at runtime. If we get things wrong, instead of a compiler error before we ship, our users get a `panic`. +`RefCell` enforces the exact same safety rules as the borrow checker does. You can only have a single mutable reference at a time, and if you have one you can't also have any immutable references. The key difference is that normally these checks happen at compile time, but with `RefCell` they happen at runtime. If we get things wrong, instead of a compiler error before we ship, our users get a panic. You can think of `RefCell` as two `Rc` in one - it has a reference count for immutable references, and a second reference count for mutable references (which is always either 0 or 1). -Inside `RefCell` this is all managed with unsafe code, but it bundles it up behind an easy-to-understand API we can use. We say the `RefCell` provides a safe API around unsafe code. +Inside `RefCell` this is all managed with unsafe code, but it bundles it up behind an easy-to-understand API we can use. We say the `RefCell` provides a safe API around unsafe code, which is a common idiom for unsafe code in Rust. One final note about `RefCell` is that, like `Rc`, it is not thread safe. ### Having Multiple Owners of Mutable Data by Combining `Rc` and `RefCell` -`Rc` lets us have multiple owners, `RefCell` lets us mutate internal state. We can combine these power together to make something mutable with multiple owners. Looking back to our cons list example: +`Rc` lets us have multiple owners, `RefCell` lets us mutate internal state. We can combine these powers together to make something mutable with multiple owners. Looking back to our cons list example: ```rust title="src/main.rs" #[derive(Debug)] @@ -459,6 +474,9 @@ fn main() { let b = Cons(Rc::new(RefCell::new(3)), Rc::clone(&a)); let c = Cons(Rc::new(RefCell::new(4)), Rc::clone(&a)); + // We can modify the value at the end of the list, + // even though there are multiple references + // to it. *value.borrow_mut() += 10; println!("a after = {:?}", a); @@ -469,9 +487,9 @@ fn main() { ## 15.6 - Reference Cycles Can Leak Memory -In C it's easy to create a memory leak; just `malloc` some memory and never free it. In a language like Rust it's not so simple, but it can definitely be done. One way to do it is with `Rc`, `RefCell`, and a circular reference. The problem is that `Rc` uses a simple reference count to know when memory is safe to free, but if we have two `Rc`s that point to each other (which isn't an easy thing to do), then even with no one else referencing them, they'll both have a reference count of 1. +In C it's easy to create a memory leak; just `malloc` some memory and never free it. In a language like Rust it's not so simple, but it can definitely be done. One way to do it is with `Rc`, `RefCell`, and a circular reference. The problem is that `Rc` uses a simple reference count to know when memory is safe to free, but if we have two `Rc`s that point to each other, then even with no one else referencing them, they'll both have a reference count of 1. -In a garbage collected language like Java or JavaScript, this problem is solved using _reachability_. The two values are reachable from each other, but neither is reachable from the _root set_. We have no garbage collector in Rust, and `Rc` is simply not smart enough to get out of this situation in its own, so we leak memory. +In a garbage collected language like Java or JavaScript, this problem is solved using _reachability_. The two values are reachable from each other, but neither is reachable from the _root set_. We have no garbage collector in Rust, and `Rc` is simply not smart enough to get out of this situation on its own, so we leak memory. ### Creating a Reference Cycle @@ -498,9 +516,9 @@ impl List { } fn main() { - // Create `a` which is essentially `[5]`. + // Create `a` which represents the list `[5]`. let a = Rc::new(Cons(5, RefCell::new(Rc::new(Nil)))); - // Create `b` which is essentially `[10, 5]` + // Create `b` which represents the list `[10, 5]` let b = Rc::new(Cons(10, RefCell::new(Rc::clone(&a)))); // Set a's `next` to be `b`. `a` is now the list `[5, 10, 5, 10, 5, 10...]`. @@ -520,11 +538,11 @@ fn main() { } ``` -Have a quick read through that example and you'll see that both `a` and `b` end pointing to each other. Both `a` and `b` end up with a `strong_count` of 2. When we hit the end of the `main` function, `a` will be dropped, reducing the ref count for a's `Rc` to 1 (the one from `b`), and the same will happen to `b`. As a result, even though there are no more `Rc` objects left using this memory, the memory will never be freed. +Have a quick read through that example and you'll see that both `a` and `b` end up pointing to each other. Both `a` and `b` end up with a `strong_count` of 2. When we hit the end of the `main` function, `a` will be dropped, reducing the ref count for a's `Rc` to 1 (the one from `b`), and the same will happen to `b`. As a result, even though there are no more `Rc` objects left using this memory, the count is never reduced to zero and the memory will never be freed. ### Preventing Reference Cycles: Turning an `Rc` into a `Weak` -One way to solve the problem we presented in the previous section is to make it so some objects have ownership over the values pointed to with a smart point, and some do not. It doesn't lend itself well to the example we just used, so we're going to use a new example here, using a tree data structure. We're going to have `Node`s that have a mutable list of references to their children, and each child will have a reference to the parent. This structure is full of circular references: a parent node points to each child, and each child points back to the parent. +One way to solve the problem we presented in the previous section is to make it so some of these pointers confer ownership semantics and some do not. It doesn't lend itself well to the example we just used, so we're going to use a new example here, using a tree data structure. We're going to have `Node`s that have a mutable list of references to their children, and each child will have a reference to the parent. This structure is full of circular references: a parent node points to each child, and each child points back to the parent. To prevent a possible memory leak, here we'll make the parent references _strong_ and the child references _weak_. In other words, if a child has a reference to a parent, that reference won't count towards the reference count that `Rc` uses: @@ -556,7 +574,7 @@ fn main() { children: RefCell::new(vec![Rc::clone(&leaf)]), }); - // Wire up `leaf`'s parent to point `branch` + // Wire up `leaf`'s parent pointer *leaf.parent.borrow_mut() = Rc::downgrade(&branch); println!("leaf parent = {:?}", leaf.parent.borrow().upgrade()); @@ -565,7 +583,7 @@ fn main() { We already know that calling `rc::clone` will increment the `strong_count` for that `Rc` and return back a new `Rc` that points to the same memory. Here `Rc::downgrade` works the same way, except instead of returning an `Rc` it returns a `Weak` and instead of incrementing `strong_count` it increments `weak_count`. When an `Rc` is dropped, if the `strong_count` is decremented to 0 the underlying memory will be freed, even if the `weak_count` is still positive. -This means that whenever we want to deference a `Weak`, we just have to check that there's still something in there. We do this by calling `Weak::upgrade` on the `Weak`, which will return an `Option>`. If the underlying memory hasn't been cleaned up yet, then `Weak::upgrade` returns a new `Rc` instance (which increments the strong_count, as you might expect) and if not, it returns a `None` to let you know your weak reference isn't valid anymore. +This means that whenever we want to deference a `Weak`, we have to check that there's still something in there, and the underlying memory hasn't been freed. We do this by calling `Weak::upgrade` on the `Weak`, which will return an `Option>`. If the underlying memory hasn't been cleaned up yet, then `Weak::upgrade` returns a `Some>` (the new `Rc` increments the `strong_count`, as you might expect) and if not, it returns a `None` to let you know your weak reference isn't valid anymore. Since the relationship from child-to-parent is weak, if we drop a parent, it's `strong_count` will drop to 0, and the entire tree will end up being freed. No more leaks! diff --git a/docs/ch16-fearless-concurrency.md b/docs/ch16-fearless-concurrency.md index e1126fd..17aa154 100644 --- a/docs/ch16-fearless-concurrency.md +++ b/docs/ch16-fearless-concurrency.md @@ -1,36 +1,36 @@ # 16 - Fearless Concurrency -"Fearless concurrency" is the idea that Rust takes care of a lot of concurrency problems for you that are hard problems in other languages. Access to memory is handled through Rust's type system and ownership rules, and it turns out these rules can do an excellent job of catching many concurrency problems at compile time. These are the sorts of problems that in other languages wouldn't show up until runtime, and would show up as tricky to reproduce race conditions. +Concurrent programming has a lot of potential pit falls - race conditions, thread safe access to variables - in other languages these problems show up in production as tricky to reproduce problems. Access to memory is handled through Rust's type system and ownership rules, and it turns out these rules can do an excellent job of catching many concurrency problems at compile time too. -Throughout this chapter we'll use the term _concurrent_, but we really mean _concurrent or parallel_. A lot of what we discuss in this chapter applies to both multithreaded code and async code. We'll talk a little bit about async code in the [chapter 21][chap21]. +Throughout this chapter we'll use the term _concurrent_, but we really mean _concurrent or parallel_ (concurrent meaning multiple things happening on a single CPU, and parallel meaning multiple things happening on many CPUs). A lot of what we discuss in this chapter applies to both multithreaded code and async code. We'll talk a little bit about async code in [chapter 21][chap21]. -When writing a multithreaded program in Java, you need to worry about things like object synchronization and deadlocks. In Go, some of these problems are solved with message passing over channels, but message passing isn't a great model in some situations, and Go programs tend to be prone to goroutine leaks where a goroutine is started but never terminates resulting in a slow but ever accumulating drain on system resources. Rust doesn't dictate a solution to you: you can use message passing or mutexes as you see fit. +Different languages tend to use different abstractions to deal with thread safety, each with their own strengths and weaknesses. For example, Java makes use of `synchronized` blocks which take ownership of a monitor (very similar to a mutex), and your most frequent headaches in Java are deadlocks. In Go, concurrency is most frequently handled using message passing over a channel, and while deadlocks are thus rare in Go, goroutine leaks (where a goroutine is started but never terminates) are frighteningly common. Rust doesn't dictate a solution to you: Rust has both channels and mutexes and you can use whichever is a better fit for your particular problem. ## Threads for JavaScript Programmers -This book is intended to people who already know another language, and so we're skipping a lot of beginner concepts. JavaScript is one of the most popular languages in the world, and it doesn't deal much with threads, though, so we'll briefly cover some thread concepts here. If you know what "thread" and "mutex" mean, feel free to skip ahead to the next section. If not, this is far from a complete introduction to threads, but it will at least introduce the terminology you need to make it through this chapter. +This book is intended to people who already know another language, and so we're skipping a lot of beginner concepts. However, JavaScript is one of the most popular languages in the world, and it doesn't deal much with threads, so we'll briefly cover some thread concepts here. If you know what "thread" and "mutex" mean, feel free to skip ahead to the next section. If not, this is far from a complete introduction to threads, but it will at least introduce the terminology you need to make it through this chapter. -If you've used JavaScript much, you know that JavaScript has an event loop. In node.js, if you call `fs.readFile(filename, { encoding: "utf-8"}, cb)`, then node will ask the OS to open `filename` and read it's contents, and once that data is available node will call into your callback. The actual reading of the file may or may not happen in some other thread, but your code all executes in a single thread, inside the event loop. Because of this, a calculation-heavy JavaScript program has a hard time making use of multiple CPUs. We say that JavaScript code can do a lot of things _concurrently_, but not so much in _parallel_ (on multiple CPUs). (At least, without using [web workers](https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Using_web_workers).) +If you've used JavaScript much, you know that JavaScript has an event loop. In node.js, if you call `fs.readFile(filename, {encoding: "utf-8"}, cb)`, then node will ask the OS to open `filename` and read it's contents, and once that data is available node will call into your callback. The actual reading of the file may or may not happen in some other thread, but your code all executes in a single thread, inside the event loop. Because of this, a calculation-heavy JavaScript program has a hard time making use of multiple CPUs. We say that JavaScript code can do a lot of things _concurrently_, but not so much in _parallel_. (At least, without using [web workers](https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Using_web_workers).) You probably already know that spawning a _thread_ is a bit like starting a second program running. You can run two threads in parallel on two different CPUs. The biggest difference between running two separate programs and running two threads is that the threads can read and write the same memory. -When two threads want to read and write to the same memory, we need some way to synchronize access to that memory. If one thread is writing part of a data structure while another is reading it, then the reader might get a partially updated data structure, or one with pointers to data that has been allocated but hasn't been initialized yet. These introduce bugs in our programs called _race conditions_. +When two threads want to read and write to the same memory, we need some way to synchronize access to that memory. If one thread is writing part of a data structure while another is reading it, then the reader might get a partially updated data structure, or one with pointers to data that has been allocated but hasn't been initialized yet. These introduce bugs in our program called _race conditions_. -One of the most common ways to synchronize access is called a _mutex_ (short for "mutual exclusion). Whenever a thread wants to read or write to shared memory, it _locks_ the mutex. Only one thread is allowed to own the lock on the mutex at a time, so if a second thread tries to lock the mutex, it will block until the mutex is available. +One of the most common ways to synchronize access is called a _mutex_ (short for "mutual exclusion"). Whenever a thread wants to read or write to shared memory, it _locks_ the mutex. Only one thread is allowed to own the lock on the mutex at a time, so if a second thread tries to lock the mutex, it will block until the mutex is available. -Sometimes a thread needs to access two different parts of memory, protected by two different mutexes. If a first thread tries to lock mutex `a` and then mutex `b`, and a second thread tries to lock mutex `b` and then mutex `a`, it's possible that the first thread will end up with `a` and get stuck waiting for `b`, while the second thread ends up with `b` and waits for `a`. This is called a _deadlock_ because both threads will end up waiting forever. +Sometimes a thread needs to access two different parts of memory, protected by two different mutexes. If a first thread tries to lock mutex `a` and then mutex `b`, and a second thread tries to lock mutex `b` and then mutex `a`, it's possible that the first thread will end up with `a` and get stuck waiting for `b`, while the second thread ends up with `b` and waits for `a`. This is called a _deadlock_ and both threads will end up waiting forever. -We're going to talk about how to use mutexes in Rust a little later on, and we're going to talk about channels, which are a way for threads to pass messages back and forth to each other (if you are familiar with web workers, this is a little bit like `postMessage`). +In this chapter we're going to talk about how to spawn threads, how to use mutexes to synchronize access, and we'll talk about channels which are a way for threads to pass messages back and forth to each other (if you are familiar with web workers, this is a little bit like `postMessage`). ## 16.1 - Using Threads to Run Code Simultaneously -If you're coming from a language like Go or Erlang, it's important to realize that threads in Rust are real, actual, OS level threads. In Go, _goroutines_ are "green threads", where multiple goroutines map to a single OS thread. In Rust, each thread is bare metal OS thread (although, it's worth noting that there are creates that implement other models of threading). +If you're coming from a language like Go or Erlang, it's important to realize that threads in Rust are real, actual, OS level threads. In Go, _goroutines_ are "green threads", where multiple goroutines map to a single OS thread. In Rust, each thread is bare metal OS thread (although, it's worth noting that there are crates that implement other models of threading). In [chapter 21][chap21] we'll talk about async programming where many I/O bound tasks (like many incoming requests to a web server) can be handled in parallel with a small number of threads, but in this chapter we're talking about plain old vanilla threads. ### Creating a New Thread with `spawn` -We start a new thread by call `thread::spawn`, passing it a closure which will be run in the new thread: +We start a new thread by calling `thread::spawn`, passing it a closure which will be run in the new thread: ```rust title="src/main.rs" use std::thread; @@ -112,7 +112,7 @@ fn main() { } ``` -`mpsc` here stands for "multiple producers, single consumer", because this is how the channels from the standard library are implemented. The call to `mpsc::channel` returns a `Sender` and `Receiver` and `Receiver` in a tuple, which we're assigning to `tx` and `rx` via a destructuring assignment. As we saw in previous examples, we use `move` to move ownership of the `Sender` to the spawned thread. The thread needs to own the `Sender` in order to use it, which it does by calling `tx.send`. It's important to realize that `send` doesn't make a copy of the message being passed in, it takes ownership of the value and moves it to the receiving side. In effect the only thing being sent from sender to receiver is a pointer. @@ -152,7 +152,7 @@ fn main() { } ``` -Notice that the sending thread is sleeping between sending each value. The for loop here will call `recv`, so it will block and wait for each message. +Notice that the sending thread is sleeping between sending each value. The `for received in rx` loop here will call `recv`, so it will block and wait for each message. ## 16.3 - Shared-State Concurrency @@ -185,11 +185,9 @@ Mutexes in rust are _not_ reentrant - if you hold a lock and try to acquire it a ### Sharing a `Mutex` Between Multiple Threads -A Mutex on only one thread isn't very useful. If you recall back to how we shared a single `Sender` between multiple threads in our channels example, you might expect us to clone the mutex to pass it between multiple threads, but you'd be wrong - `Mutex` doesn't implement `Clone`. +A Mutex on only one thread isn't very useful. If you recall back to how we shared a single `Sender` between multiple threads in our channels example, you might expect us to clone the mutex to pass it between multiple threads, but you'd be wrong - `Mutex` doesn't implement `Clone`. We saw in [chapter 15][chap15] how we could use the `Rc` smart pointer to have a single piece of data with multiple owners, but we noted it isn't thread safe, so we can't use it here. -We saw in [chapter 15][chap15] how we could use the `Rc` smart pointer to have a single piece of data with multiple owners, but we noted it isn't thread safe, so we can't use it here. - -This is where `Arc` comes in - the "atomic reference counted" smart pointer. `Arc` uses the [std::sync::atomic](https://doc.rust-lang.org/stable/std/sync/atomic/index.html) library to atomically increment and decrement it's reference count, which makes it thread safe. Why doesn't `Rc` use the atomic library? How come everything isn't thread safe? Because thread safety comes with a performance penalty here, so `Rc` is there when you don't need thread safety and `Arc` is there for when you do. +But there's a thread-safe version of `Rc` called `Arc` - the "atomic reference counted" smart pointer. `Arc` uses the [std::sync::atomic](https://doc.rust-lang.org/stable/std/sync/atomic/index.html) library to atomically increment and decrement it's reference count, which makes it thread safe. Why doesn't `Rc` use the atomic library? How come everything isn't thread safe? Because thread safety comes with a performance penalty here, so `Rc` is there when you don't need thread safety and `Arc` is there for when you do. Here's how we share a `Mutex` with an `Arc`: @@ -219,17 +217,19 @@ fn main() { } ``` -Very similar to our channels example, we `clone` the `Arc` whenever we want to `move` it into a new thread (so you were right - there was a clone in there somewhere). Note that we're still not cloning the `Mutex` though. The `Arc` holds a reference to the mutex, not the mutex itself, so we're sharing a single mutex across multiple threads. We can do this without violating ownership rules because the reference is immutable. Much like with `RefCell`, a `Mutex` provides interior mutability, and we're allowed to change the value held in a mutex even if we have an immutable reference to the mutex. +Very similar to our channels example, we `clone` the `Arc` whenever we want to `move` it into a new thread (so you were right - there was a clone in there somewhere). Note that we're still not cloning the `Mutex` though. The `Arc` keeps ownership of the Mutex, allowing us to share a single mutex across multiple threads. + +Notice that the `counter` variable above is declared as immutable. Much like with `RefCell`, a `Mutex` provides interior mutability, and we're allowed to change the value held in a mutex even if we have an immutable reference to the mutex. ## 16.4 - Extensible Concurrency with the `Sync` and `Send` traits `Send` is a marker trait that indicates a type can be transferred between threads. Any type composed entirely of `Send` traits is automatically `Send`. Almost every type in Rust is `Send`, but we've already seen an example of one that isn't: `Rc` isn't `Send`, since if you cloned it and transferred it between threads, the clone and the original might try to modify the reference count concurrently resulting in a data race. If you give this a try, you'll get a compile-time error, since `Rc` is not `Send`. Raw pointers in rust (see [chapter 19][chap19]) are also not `Send`. -The closely related `Sync` marker trait is implemented by types that are safe to be referenced from multiple threads. To put it formally, a type `T` is `Sync` if `&T` is `Send`. If we can send an immutable reference to `T` to another thread, then `T` is `Sync`. `RefCell` is not `Sync`. `Mutex` is `Sync` which is why we can share an immutable reference across threads, as we did with `Arc` in the example above. +The closely related `Sync` marker trait is implemented by types that are safe to be referenced from multiple threads. To put it formally, a type `T` is `Sync` if `&T` is `Send`. If we can send an immutable reference to `T` to another thread, then `T` is `Sync`. `RefCell` is not `Sync`. `Mutex` is `Sync` which is why we can share an immutable reference across threads, as we did with `Arc` in the example above. -In general we never have to implement `Send` and `Sync` on a type ourselves. They are just marker traits with no methods, and they're implemented automatically on a type if that type is composed entirely of `Send`/`Sync` members. The only cases where you'd want to implement these yourself is if you're creating new concurrency primitives, in which case you'll be using some _unsafe_ code (see [chapter 19][chap19]). There are many safety guarantees you'll have to implement yourself if you're going down this path, and you should consult [The Rustonomicon](https://doc.rust-lang.org/stable/nomicon) if you want to learn about this. +In general we never have to implement `Send` and `Sync` on a type ourselves. They are just marker traits with no methods, and they're implemented automatically on a type if that type is composed entirely of `Send`/`Sync` members. The only cases where you'd want to implement these yourself is if you're creating new concurrency primitives, in which case you'll be using some _unsafe_ code (see [chapter 19][chap19]). There are many safety guarantees you'll have to implement yourself if you're going down this path, and you should consult [The Rustonomicon](https://doc.rust-lang.org/stable/nomicon) if you want to learn about this. -What's interesting to note here is that, aside from the `Sync` and `Send` traits, everything we've looked at in this chapter is implemented in the standard library instead of being part of the core Rust language. Many concurrency solutions are implemented in crates (such as the popular [parking_lot crate](https://crates.io/crates/parking_lot)). When we dig into async programming in [chapter 21][chap21] we'll see the same thing there, with Rust providing the `async` and `await` keywords, but with the actual runtime behavior being provided by a crate. +What's interesting to note here is that, aside from the `Sync` and `Send` traits, everything we've looked at in this chapter is implemented in the standard library instead of being part of the core Rust language. Many concurrency solutions are implemented in crates (such as the popular [parking_lot crate](https://crates.io/crates/parking_lot)). When we dig into async programming in [chapter 21][chap21] we'll see the same thing there, with Rust providing the `async` and `await` keywords, but with the actual runtime behavior being provided by a crate. Continue to [chapter 17][chap17]. diff --git a/docs/ch17-object-oriented-features.md b/docs/ch17-object-oriented-features.md index bb46d65..210ece5 100644 --- a/docs/ch17-object-oriented-features.md +++ b/docs/ch17-object-oriented-features.md @@ -11,23 +11,21 @@ What makes an Object Oriented language? There are many different definitions, bu It's easy to see how Rust borrows from these concepts and can be used as an OO language. `struct`s in Rust have data and can have methods defined on them and so are similar to objects. Members and methods of a `struct` can be `pub` or private (privacy in Rust is a little different than in other OO languages, but that's something that can be said of many OO languages). -Rust doesn't really have inheritance. But inheritance has fallen out of style in modern program design, often being replaced with composition instead, and traits allow us to provide default implementations for methods which allows a lot of the same sort of code reuse that inheritance traditionally gives us. +Rust doesn't really have inheritance. But inheritance has fallen out of style in modern software design, often being replaced with composition instead, and traits allow us to provide default implementations for methods which allows a lot of the same sort of code reuse that inheritance traditionally gives us. Using traits we can easily implement polymorphism in Rust, and we've already seen some examples of this; the `Iterator` trait allows us to pass any number of different types of objects to a `for` loop, for example. _Trait objects_ let us take this a step further. ## 17.2 - Using Trait Objects That Allow for Values of Different Types -In [chapter 8][chap8] we mentioned that a vector can only hold one type. We showed a workaround where we stored a `SpreadsheetCell` enum in a vector, and then used different variants of the enum to store different types. But let's suppose we were implementing a GUI library. We might want a vector of "components" we need to draw on the screen - buttons, select boxes, links, etc... We could use the `enum` trick here to represent all these different component types, but a common feature of GUI libraries is letting users define their own custom components. We can't possibly know all the custom component types ahead of time, so an enum here is going to let us down. +In [chapter 8][chap8] we mentioned that a vector can only hold one type. We showed a workaround where we stored a `SpreadsheetCell` enum in a vector, and then used different variants of the enum to store different types. But let's suppose we were implementing a GUI library. We might want a vector of "components" we need to draw on the screen - buttons, select boxes, links, etc... We could use the `enum` trick here to represent all these different component types, but a common feature of GUI libraries is letting users define their own custom components. We can't possibly know all the custom component types ahead of time, so here an enum is going to let us down. To do this in a class based language we might define a `Component` abstract class with a `draw` method, and then various subclasses of `Component` could implement `draw` differently. In Rust we don't have inheritance, so to do this we'll have to use traits. ### Defining a Trait for Common Behavior -What we'll do is create a `Draw` trait, with a single method called `draw`. Component implementations can implement the `Draw` trait, and we can have a vector which is a collection of _trait objects_. - At runtime, a trait object will be a pair of pointers in memory - one to the instance of a specific type that implements our trait, and another to a table of methods defined on the trait to call at runtime (similar to a v-table in C++). We create a trait object by specifying a pointer (this could be a simple reference or a smart pointer like a `Box`) and the `dyn` keyword (for "dynamic"). In [chapter 19][chap19] we'll talk about the `Sized` trait, and why a pointer is required here. -Let's see some code. Here's our library in _src/lib.rs_: +Fo our GUI library, we'll create a `Draw` trait, with a single method called `draw`. Instead of storing a vector of buttons or a vector of dialogs, we'll store a vector of _trait objects_: ```rust title="src/lib.rs" pub trait Draw { @@ -59,9 +57,9 @@ impl Draw for Button { } ``` -The `Draw` trait here should look familiar - if you skipped ahead in this book and this syntax looks unfamiliar, then see [chapter 10][chap10]. +The `Draw` trait should look familiar - if you skipped ahead in this book and this syntax looks unfamiliar, then see [chapter 10][chap10]. -The `Screen` struct here has a `components` which has some new syntax: it is a vector of `Box`, or in other words a vector of trait objects that implement the `Draw` trait. `Box` is a stand-in for any type inside a `Box` that implements `Draw`. `Screen` also has a `run` method which calls the draw method on each member of `components`. +The `Screen` struct has a `components` which has some new syntax: it is a vector of `Box`, or in other words a vector of trait objects that implement the `Draw` trait. `Box` is a stand-in for any type inside a `Box` that implements `Draw`. `Screen` also has a `run` method which calls the draw method on each member of `components`. It's important to note that a trait object is very different from a trait bound. If we'd implemented Screen as a generic type with a trait bound: @@ -92,6 +90,8 @@ Let's look at a crate that uses this library in _src/main.rs_: ```rust title="src/main.rs" use gui::{Button, Screen}; +// Define a custom SelectBox component, not +// in the gui library. struct SelectBox { width: u32, height: u32, @@ -146,15 +146,15 @@ let button = Button { button.draw(); ``` -Here, at compile time we know that `button` is of type `Button`, and we can work out which `draw` function to call at compile time. This is called _static dispatch_. +Here, the compiler knows that `button` is of type `Button`, and can work out which `draw` function to call at compile time. This is called _static dispatch_. There's a small performance impact to dynamic dispatch, since we have this extra pointer to follow at runtime. Also, in the static dispatch case we can do performance optimizations like inlining which are not available in the dynamic dispatch case. ## 17.3 - Implementing an Object-Oriented Design Pattern -In this chapter we're going to implement a simple blogging server. A post on the server can be in one of three states: when first created a post will be a "draft". Once the user is done creating the draft, they can ask for a review which will move the post to the "review" state. Finally once reviewer, the post will move to the "published" state. We want to make sure the text for a post isn't published to our blog site until the post is in the published state. +In this section we're going to implement a simple blogging server. A post on the server can be in one of three states: when first created a post will be a "draft". Once the user is done creating the draft, they can ask for a review which will move the post to the "review" state. Finally once reviewed, the post will move to the "published" state. We want to make sure the text for a post isn't published to our blog site until the post is in the published state. -This is a pretty simple example, and I'm sure you could easily imagine implementing this with a state enum and some methods on the Post, but since this is a chapter about OO design, we'll represent the state of the post using the [state pattern](https://en.wikipedia.org/wiki/State_pattern), one of the original twenty-three design patterns documented by the Gang of Four. (We're going to actually implement this twice - once using the OO pattern, and once in a way that's a bit more natural for Rust.) You can find the finished code for this example on [this book's github page](https://github.com/jwalton/rust-book-abridged/tree/master/examples/ch17-post-state-pattern]. +This is a pretty simple example, and I'm sure you could easily imagine implementing this with a state enum and some methods on the Post, but since this is a chapter about OO design, we'll represent the state of the post using the [state pattern](https://en.wikipedia.org/wiki/State_pattern), one of the original twenty-three design patterns documented by the [Gang of Four](https://en.wikipedia.org/wiki/Design_Patterns). (We're going to actually implement this twice - once using the OO pattern, and once in a way that's a bit more natural for Rust.) You can find the finished code for this example in [this book's github repo](https://github.com/jwalton/rust-book-abridged/tree/master/examples/ch17-post-state-pattern). As [Wikipedia](https://en.wikipedia.org/wiki/State_pattern) puts it: @@ -163,8 +163,6 @@ As [Wikipedia](https://en.wikipedia.org/wiki/State_pattern) puts it: > - An object should change its behavior when its internal state changes. > - State-specific behavior should be defined independently. That is, adding new states should not affect the behavior of existing states. -The advantage to using the state pattern here is that we can add new states without affecting the existing states. - In _src/lib.rs_, let's write a quick unit test to walk through what our API and workflow will look like: ```rust title="src/lib.rs" @@ -188,7 +186,7 @@ mod tests { } ``` -Notice that our public API doesn't event know anything about our state pattern. The idea here will be that `Post` has a `state` which will be a State object. There will be a `Draft` struct, a `PendingReview` struct, and a `Published` struct that represent our different states and all are going to implement the `State` trait. When you call into a method on `Post` like `post.request_review()`, this method will delegate to the current state by doing roughly the equivalent of `this.state = this.state.request_review()`, so the state can control what the next state will be. +Notice that our public API doesn't know anything about our state pattern. The idea here will be that `Post` has a `state` which will be a state object. There will be a `Draft` struct, a `PendingReview` struct, and a `Published` struct that represent our different states and all are going to implement the `State` trait. When you call into a method on `Post` like `post.request_review()`, this method will delegate to the current state by doing roughly the equivalent of `this.state = this.state.request_review()`, so the state can control what the next state will be. Here's the implementation of `Post`, also in _src/lib.rs_: @@ -236,11 +234,11 @@ impl Post { } ``` -The `State` trait has `request_review`, `approve`, and `content` methods. State has a default implementation of `content`, so not all the implementors have to reimplement it, but the others will need to be implemented by each `State` individually. The `request_review` and `approve` methods on `State` take a `self: Box` as their first parameter. This means this method will only be available on a `Box` holding the type. This also takes ownership of the `Box`, effectively invalidating the previous state. +The `State` trait has `request_review`, `approve`, and `content` methods. State has a default implementation of `content`, so not all the implementors have to reimplement it, but the other methods will need to be implemented by each `State` individually. The `request_review` and `approve` methods on `State` take a `self: Box` as their first parameter. This means this method will only be available on a `Box` holding the type. This also takes ownership of the `Box`, effectively invalidating the previous state. `Post` has some state and some content, both of which are private. We could have made `content` public, but we want to make the content of a `Post` hidden until the post is in the published state, so we created a `content` getter method which delegates to the current state. -Post's constructor creates a new `Draft` state, since this is the state we want to start out in. Since the `state` field is private, can't create a Post in any other state. Post's state is an optional trait object of type `Option>`. We'll talk about why it's an `Option` in just a second. +Post's constructor creates a new `Draft` state, since this is the state we want to start out in. Since the `state` field is private, we can't create a Post in any other state. Post's state is an optional trait object of type `Option>`. We'll talk about why it's an `Option` in just a second. The `add_text` method takes a mutable reference to self, since it modifies the content of the post. Notice it doesn't interact with the state at all, because no matter what state a post is in, we want to be able to add text. All the other methods on `Post` delegate to the current state. The `request_review` and `approve` methods look very similar - they call into the current state to get the new state, and set self.state. But... they're also a little wordy: @@ -252,7 +250,7 @@ The `add_text` method takes a mutable reference to self, since it modifies the c } ``` -Why not just `self.state = self.state.request_review()` here? The problem here is that `self.state.request_review()` would try to take ownership of `self.state`, but you can't take ownership of a _part_ of a struct. Remember that ownership is about controlling some allocated memory, and a struct is allocated as a single block of memory. If we took ownership of `self.state` and didn't fill it back in, what would be there in memory? To get around this we make `self.state` an `Option`, and then `self.state.take()` will take ownership of the value in the `Option` and replace it with `None` temporarily. It's never `None` for more than a moment. +Why not just `self.state = self.state.request_review()` here? The problem here is that `self.state.request_review()` would try to take ownership of `self.state`, but you can't take ownership of a _part_ of a struct. Remember that ownership is about controlling some allocated memory, and a struct is allocated as a single block of memory. If we took ownership of `self.state`, what would be there in memory inn its place? To get around this we make `self.state` an `Option`, and then `self.state.take()` will take ownership of the value in the `Option` and replace it with `None` temporarily. Since after the `take` we immediately reassign it, it's never `None` for more than an instant. The `content` method also needs to deal with the fact that `self.state` is an `Option`: @@ -262,7 +260,7 @@ The `content` method also needs to deal with the fact that `self.state` is an `O } ``` -`as_ref` gives us back a reference to the contents of the `Option`. We call `as_ref` on the Option because we don't want to take ownership of the `Box` in the `Option`. Again, we can't here - `self.state` is an immutable reference, so we can't take ownership of the value inside the `Option` even if we wanted to, since `self`, and by extension `self.option` are both immutable in this context. We call `unwrap()` because we know that `self.state` will always contain a value. This is one of those examples of us knowing more than the compiler - we know `self.state` wil never be `None` here, so we can just panic instead of trying to deal with the case where it's `None`. After the `unwrap` we have a `&Box`, so deref coercion will take place until we ultimately call `content` on the current State's implementation. +We call `as_ref` on the Option to convert the owned value into a ref, because we don't want to take ownership of the `Box` in the `Option`. (`self.state` is an immutable reference, so we can't take ownership of the value inside the `Option` even if we wanted to, since `self`, and by extension `self.option` are both immutable in this context.) We call `unwrap()` because we know that `self.state` will always contain a value. This is one of those examples of us knowing more than the compiler - we know `self.state` wil never be `None` here, so we can just panic instead of trying to deal with the case where it's `None`. After the `unwrap` we have a `&Box`, so deref coercion will take place until we ultimately call `content` on the current State's implementation. Let's have a look at the `Draft` state: @@ -314,7 +312,7 @@ impl State for Published { } ``` -The only thing interesting there is that `Published` overrides the default implementation of the `content` method. We need lifetime annotations on this method, since the returned reference will only be valid as long as the passed in `Post`. +The only thing interesting here is that `Published` overrides the default implementation of the `content` method. We need lifetime annotations on this method, since the returned reference will only be valid as long as the passed in `Post`. ### Trade-offs of the State Pattern @@ -354,7 +352,7 @@ I encourage you to give this a try, but you may find ownership rules will make t ### Encoding States and Behavior as Types -Let's take a look at another way of implementing the same behavior, but we're not going to implement it in exactly the same way we would in a traditional OO language. We're going to instead try to encode our state and associated behavior as explicit types. You can find the finished code for this example on [this book's github page](https://github.com/jwalton/rust-book-abridged/tree/master/examples/ch17-post-state-types]. First let's create a `Post` and a `DraftPost`: +Let's take a look at another way of implementing the same behavior, but we're not going to implement it in exactly the same way we would in a traditional OO language. We're going to instead try to encode our state and associated behavior as explicit types. You can find the finished code for this example in [this book's github repo](https://github.com/jwalton/rust-book-abridged/tree/master/examples/ch17-post-state-types). First let's create a `Post` and a `DraftPost`: ```rust title="src/lib.rs" pub struct Post { @@ -384,7 +382,7 @@ impl DraftPost { } ``` -We can still call `Post::new`, but this now returns a new `DraftPost` type. `DraftPost` doesn't even implement `content`, so we can't even ask for the content of a `DraftPost` without creating a compile time error. This is an example of "making invalid state unrepresentable" - we don't want to let you get the content of a draft post, and now it's impossible to even write the code that would do such a thing. We want to be able to request a review on our `DraftPost`, so let's add a method for that: +We can still call `Post::new`, but this now returns a new `DraftPost` type. `DraftPost` doesn't even implement `content`, so we can't even ask for the content of a `DraftPost` without generating a compile time error. This is an example of "making invalid state unrepresentable" - we don't want to let you get the content of a draft post, and now it's impossible to even write the code that would do such a thing. We want to be able to request a review on our `DraftPost`, so let's add a method for that: ```rust title="src/lib.rs" // --snip-- @@ -435,7 +433,7 @@ mod tests { } ``` -This isn't completely better than the previous example, rather there are trade offs here that are different. In this test case, you can see that whenever a post changes state, we have used variable shadowing to create a new variable with a new type. If we change our internal implementation so that `add_text` transitioned to a new state, then this called would break, so our implementation is not nearly as encapsulated as it was before. It's also a little more challenging in this model to create a vector of "posts" - we'd have to wrap these different post states in an enum or in some common trait and use a trait object to stuff them into a vector together, and both of those would "undo" some of the benefits we've just outlined in different ways. +This isn't better in every way than the previous example, rather there are trade offs here that are different. In this test case, you can see that whenever a post changes state, we have used variable shadowing to create a new variable with a new type. If we change our internal implementation so that `add_text` transitioned to a new state, then the caller here would break, so our implementation is not nearly as encapsulated as it was before. It's also a little more challenging in this model to create a vector of "posts" - we'd have to wrap these different post states in an enum or in some common trait and use a trait object to store them in a vector together, and both of those would "undo" some of the benefits we've just outlined in different ways. Which of these tradeoffs you make are going to depend heavily on what you're trying to implement, but hopefully this chapter has given you some new tools to use to approach different problems. diff --git a/docs/ch18-patterns-and-matching.md b/docs/ch18-patterns-and-matching.md index cb7e437..54790a4 100644 --- a/docs/ch18-patterns-and-matching.md +++ b/docs/ch18-patterns-and-matching.md @@ -18,7 +18,7 @@ match VALUE { The patterns in a `match` need to be _exhaustive_ - they need to cover every possibility. The `_` pattern will match anything and not bind to a variable, so it will often be used as a catch-all at the end of a `match`. -In this example, we extract a value from a `Some`. Note that value we extract shadows the outer variable: +In this example, we extract a value from a `Some`. Note that value we extract in this example will shadow the outer variable: ```rust match i { @@ -87,7 +87,7 @@ In a `for` loop, the bit immediately after the `for` keyword is actually a patte ### `let` statements -Simple let statements actually use patterns too: +Simple let statements use patterns too: ```rust let x = 5; @@ -134,7 +134,7 @@ There are some places where we're only allowed to use irrefutable patterns. For let Some(x) = value; ``` -Here if `value` is `Some(1)`, then we expect `x` to get the value 1. But if `value` were `None`, what would `x` be here? This statement makes no sense, and will result in a compiler error, because `let` needs an irrefutable pattern. (Although we could fix this with an `if let` instead.) +Here if `value` is `Some(1)`, then we expect `x` to get the value 1. But if `value` were `None`, what would `x` be here? This statement makes no sense, and will result in a compiler error, because an assignment needs an irrefutable pattern. (Although we could fix this with an `if let` instead.) There are also places where an irrefutable parameter is allowed, but is somewhat pointless, which will generate compiler warnings, such as this: @@ -432,7 +432,7 @@ One downside to match guards is that they generally require the match to have a match x { Some(x) if y => println!("{x}"), Some(x) if !y => println!("{x}"), - None => println!("no"), + None => panic!("Silly compiler"), } ``` @@ -450,6 +450,7 @@ enum Message { let msg = Message::Hello { id: 5 }; match msg { + // Match `id` and bind it to `id_variable`. Message::Hello { id: id_variable @ 3..=7, } => println!("Found an id in range: {}", id_variable), diff --git a/docs/ch19/ch19-01-unsafe.md b/docs/ch19/ch19-01-unsafe.md index ebcc89f..b834af3 100644 --- a/docs/ch19/ch19-01-unsafe.md +++ b/docs/ch19/ch19-01-unsafe.md @@ -6,7 +6,7 @@ Imagine we have a vector with six elements in it. We could create a mutable slic _Unsafe_ code in Rust is code where we're allowed to ignore or bypass some of the restrictions Rust places on us, and tell the compiler "Don't worry, I got this." Of course, sometimes we only think we know better than the compiler when in fact what we're actually doing is creating a hard-to-diagnose problem that we won't fund until our code is running in production. So it's not a bad idea to keep unsafe code to a minimum. -But it's important to realize that "unsafe" code isn't "dangerous" code, it's just code that hasn't been inspected by the eagle eye of the Rust compiler. If you could teach the Rust compiler to look at C code, then pretty much all C code would be considered unsafe, but there are plenty of C programs out there doing useful work every day. +But it's important to note that "unsafe" doesn't necessarily mean incorrect, it's just code that hasn't been inspected by the eagle eye of the Rust compiler. There are plenty of C programs in the world performing useful tasks that are correct (or reasonably correct) and C doesn't even have a borrow checker, so all C code is unsafe as far as a Rust programmer is concerned. We can write code inside an unsafe block or inside an unsafe function: @@ -64,7 +64,7 @@ let address = 0x012345usize; let r = address as *const i32; ``` -Note that we're actually allowed to create pointers outside of unsafe code. Creating a pointer never hurt anyone, it's dereferencing a pointer that gets us into trouble, so the dereference is only allowed to happen inside an `unsafe` block. +We're allowed to create pointers outside of unsafe code. Creating a pointer never hurt anyone, it's dereferencing a pointer that gets us into trouble, so the dereference is only allowed to happen inside an `unsafe` block. Why would you want to use a raw pointer instead of a reference? One case is for calling into C code. Another is when you want to build a "safe" abstraction that the borrow checker won't understand, like our "two mutable slices" example above. We'll see examples of both of these. @@ -97,9 +97,9 @@ assert_eq!(a, &mut [1, 2, 3]); assert_eq!(b, &mut [4, 5, 6]); ``` -Here `split_at_mut` is going to call unsafe code, but that doesn't mean that it also has to be unsafe. In fact, the above code works because vector has this method on it already! +`split_at_mut` is going to call unsafe code, but that doesn't mean that it also has to be unsafe. In fact, the above code works because vector has this method on it already! -What we're doing here is creating a "safe abstraction". This is a very common pattern - we hide away the unsafe stuff behind an API that's easy and safe to use. This makes it so we only have to reason about our small API. Here's the implementation: +What `split_at_mut` is doing here is creating a "safe abstraction". This is a very common pattern - we hide away the unsafe stuff behind an API that's easy and safe to use. This makes it so we only have to reason about our small API. Here's the implementation of `split_at_mut`: ```rust use std::slice; @@ -119,7 +119,7 @@ fn split_at_mut(values: &mut [i32], mid: usize) -> (&mut [i32], &mut [i32]) { } ``` -Here the `slice::from_raw_parts_mut` is unsafe (because it uses a raw pointer to the underlying slice) so we need to call this inside an `unsafe` block. +`slice::from_raw_parts_mut` is unsafe (because it uses a raw pointer to the underlying slice) so we need to call this inside an `unsafe` block. ### Using `extern` Functions to Call External Code @@ -164,7 +164,7 @@ fn main() { Static variables are similar to constants, but we name them in `SCREAMING_SNAKE_CASE`. These variables are always in the `'static` lifetime, and accessing an immutable static variable is considered safe. -Constants can be duplicated in memory, but static variables are always guaranteed to occupy the exact same memory, no matter where they are referenced in code. Unlike constants, static variables can also be `mut`, but accessing or modifying a mutable static variable is always unsafe: +When we use a constant in Rust, the compiler may duplicate the constant in multiple places in memory if they are referenced in multiple places. Static variables, on the other hand, are always guaranteed to occur once in memory, so no matter where they are referenced in code you'll get back the same instance. Unlike constants, static variables can also be `mut`, but accessing or modifying a mutable static variable is always unsafe: ```rust static mut COUNTER: u32 = 0; @@ -204,7 +204,9 @@ unsafe impl Foo for i32 { ## Accessing Fields of a Union -A `union` is like a `struct`, but each field in the union occupies the same memory. Only one of the fields is ever safe to access at a time, depending on what is stored in the union. This example, for instance, will be four bytes long and holds either a `u32` or an `f32`: +Unions are included in Rust mainly for calling into C code that uses them. If you want to access a union, it has to be done from an `unsafe` block. + +For the non-C programmers reading this, a `union` is like a `struct`, but each field in the union occupies the same memory. Only one of the fields is ever correct to access at a time, depending on what is stored in the union. This example, for instance, will be four bytes long and holds either a `u32` or an `f32`: ```rust #[repr(C)] @@ -215,5 +217,3 @@ union MyUnion { ``` Rust has no idea what's stored in this union, and you'll get back a `u32` or an `f32` depending on which one you access, but odds are only one of them contains a meaningful value. You can learn more about unions in [the Rust Reference](https://doc.rust-lang.org/stable/reference/items/unions.html). - -Unions are included in Rust mainly for calling into C code that uses them. If you want to access a union, it has to be done from an `unsafe` block. diff --git a/docs/ch19/ch19-02-advanced-traits.md b/docs/ch19/ch19-02-advanced-traits.md index b12bed3..f6d81ce 100644 --- a/docs/ch19/ch19-02-advanced-traits.md +++ b/docs/ch19/ch19-02-advanced-traits.md @@ -54,7 +54,7 @@ pub trait GenericIterator { } ``` -Well, actually, we _can_ do this. You can have generic traits, but there's an important difference: a trait with an associated type can only be implemented for a given struct once, but a trait with a generic type could be implemented for a given struct multiple times for different generic types. +Well, actually, we _can_ do this. You can have generic traits, but there's an important difference: a trait with an associated type can only be implemented for a given type once, but a trait with a generic type could be implemented for a given type multiple times for different generic types. This means, practically speaking, that if someone implemented `GenericIterator` then whenever we called `next`, we'd have to explicitly annotate the type of the return value so we'd know which version of `next` to call. @@ -85,6 +85,23 @@ This isn't a problem for associated types, because we know there can only ever b ## Default Generic Type Parameters and Operator Overloading +When we have a generic type, we can specify a _default type parameter_ that will be used if no generic type is specified: + +```rust +struct Point { + x: T, + y: T, +} + +// Don't need to specify `Point` here. +fn foo(p: Point) { + println!("{}, {}", p.x, p.y) +} + +``` + +Generally there are two cases where a default type parameter is useful. You can use it to make a non-generic type generic without breaking existing uses, and you can allow customization in places where most users won't need it. + _Operator overloading_ lets you define custom behavior for certain operators. For example, we all understand what happens when we apply the `+` operator to two `i32`s. But, what if we want to add two `Point`s together? ```rust @@ -119,7 +136,7 @@ impl Add for Point { } ``` -The [`std:ops` section of the standard library](https://doc.rust-lang.org/std/ops/index.html) describes what operators you can overload this way. If we have a look at the `Add` trait, it has an `Output` associated item, but the `Add` trait is also generic, and lets us specify the `Rhs` or "right-hand-side": +The [`std:ops` section of the standard library](https://doc.rust-lang.org/std/ops/index.html) describes what operators you can overload this way. If we have a look at the `Add` trait, it has an `Output` associated type, but the `Add` trait is also generic, and lets us specify the `Rhs` or "right-hand-side": ```rust trait Add { @@ -129,9 +146,9 @@ trait Add { } ``` -We didn't specify an `Rhs` above though when we wrote `impl Add for Point`. That's because the `Add` trait uses a _default type parameter_ for `Rhs`. Since we didn't specify one, it defaulted to `Self` (which in our case is another `Point`. +Again, this is an example of a generic with a default type parameter. We didn't specify an `Rhs` above so it defaults to `Self` (or in this case `Point`). Generally when you want to add a thing to another thing, they're going to be of the same type, so here the default saves us some typing. -This generic parameter lets us specify what happens when you try to add together two different types. Here's an example where we defined a `Millimeters` and `Meters` type, and specify how to add meters to millimeters: +But having the `Rhs` be a generic type means we can also implement `Add` for cases where we're adding together two different types. Here's an example where we define a `Millimeters` and `Meters` type, and specify how to add meters to millimeters: ```rust use std::ops::Add; @@ -148,13 +165,9 @@ impl Add for Millimeters { } ``` -There are two cases where a default type parameter is useful. You can use it to make a non-generic type generic without breaking existing uses, and you can allow customization in places where most users won't need it. - ## Fully Qualified Syntax for Disambiguation: Calling Methods with the Same Name -If you're like me, the first time you saw that `impl TRAIT for TYPE` syntax, you realized you could have two different traits that each defined a function called `foo`, and then you could create a type that implemented both traits. - -You absolutely can do this. In fact, you can also have a trait that defines a method named `foo` that differs from a method defined on the struct outside any trait also called `foo`. The `Human` struct in this next example has three different methods called `fly`: +The first time you saw that `impl TRAIT for TYPE` syntax, you probably realized you could have two different traits that each defined a function called `foo`, and then you could create a type that implemented both traits. In fact, you can also have a trait that defines a method named `foo` that differs from a method defined on the struct outside any trait also called `foo`. The `Human` struct in this next example has three different methods called `fly`: ```rust trait Pilot { @@ -204,9 +217,9 @@ fn main() { } ``` -When we define these methods, we always have that `self` parameter for the receiver. I like to think of this syntax as calling this like an associated function and explicitly passing in `self`. +When we call these methods explicitly like this, we have to pass in the `self` parameter, as if we were calling these like an associated function. (We've already seen an example of this syntax when we called `Rc::clone`, although we didn't know it at the time!) -Although, this brings up an interesting point. One thing we haven't done yet is to define an associated function on a trait: +Although, this brings up an interesting point; if we can call a method on a trait using the associated function syntax, can we define an associated function on a trait? ```rust trait Animal { @@ -226,7 +239,7 @@ fn main() { } ``` -But what happens here if `Dog` has an associated function also called `baby_name`? +But what happens here if `Dog` also has an associated function also called `baby_name`? ```rust impl Dog { @@ -252,9 +265,9 @@ You could use this same syntax in our `Human` example above: ::fly(&person); ``` -The general syntax is `::function(receiver_if_method, next_arg, ...)`, but you can omit any part of this that Rust can work out on it's own. +These are actually all different examples of the same thing. The general syntax is `::function(receiver_if_method, next_arg, ...)`, but you can omit any part of this that Rust can work out on it's own. -## Using Supertraits to Require One Trait’s Functionality Within Another Trait +## Using Supertraits to Require One Trait's Functionality Within Another Trait Let's say we want to define a trait called `OutlinePrint`. Any type that implements `OutlinePrint` will have a method called `outline_print` that will print the value with a box made of `*`s around it: @@ -266,7 +279,7 @@ Let's say we want to define a trait called `OutlinePrint`. Any type that impleme ********** ``` -We can provide a default implementation of `outline_print`, but in order to do so we'd have to call into `self.to_string()`, which means that `self` has to implement `fmt:Display`. +We can provide a default implementation of `outline_print`, but in order to do so we'd have to call into `self.fmt()`, which means that `self` has to implement `fmt:Display`. We can write this trait like this: @@ -286,7 +299,7 @@ trait OutlinePrint: fmt::Display { } ``` -We say here that `fmt::Display` is a _supertrait_ of `OutlinePrint`. This is kind of like adding a trait bounds to `OutlinePrint` - saying that in order to implement OutlinePrint, your type also has to implement `fmt::Display`. +We say here that `fmt::Display` is a _supertrait_ of `OutlinePrint`. This is kind of like adding a trait bounds to `OutlinePrint` - saying that in order to implement OutlinePrint, your type also has to implement `fmt::Display`. It's also kind of like saying that `OutlinePrint` inherits from `fmt:Display` which is why we call it a supertrait (although you can't define `fmt` in the `impl` block for `OutlinePrint`, so it's not quite like OO style inheritance). We can implement this on a `Point`: @@ -304,6 +317,9 @@ impl fmt::Display for Point { } } +// No need to implement the outline_print method as we get +// the default definition, which automatically calls into +// `fmt` above. impl OutlinePrint for Point {} ``` @@ -311,13 +327,15 @@ impl OutlinePrint for Point {} Back in [chapter 10](../ch10/ch10-02-traits.md#implementing-a-trait-on-a-type), we mentioned the "orphan rule". If you want to implement a trait on a type, then either the trait or the type (or both) need to be defined locally in your crate. -It's possible to get around this using the _newtype_ pattern. The basic idea is to create a tuple "wrapper" around the existing type. Let's suppose we want to implement `Display` on `Vec`. These are both from the standard library, so normally we couldn't do this. We'll use the newtype pattern here: +It's possible to get around this using the _newtype_ pattern (borrowed from Haskell). The basic idea is to create a tuple "wrapper" around the existing type. Let's suppose we want to implement `Display` on `Vec`. These are both from the standard library, so normally we couldn't do this. We'll use the newtype pattern here: ```rust title="src/main.rs" use std::fmt; +// Create a newtype wrapper around `Vec`. struct Wrapper(Vec); +// Implement `Display` trait on the wrapper. impl fmt::Display for Wrapper { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "[{}]", self.0.join(", ")) @@ -330,4 +348,4 @@ fn main() { } ``` -The disadvantage to this approach is that we have a new type `Wrapper` here, and we can't just treat `w` like we could a regular vector. Most of the methods we want to call on `Vec` aren't defined on Wrapper. We could redefine just the methods we want to call on `Wrapper`. We could also implement the `Deref` trait so we can treat a `w` like vector. +The disadvantage to this approach is that we have a new type `Wrapper` here, and we can't just treat `w` like we could a regular vector. Most of the methods we want to call on `Vec` aren't defined on Wrapper. We could redefine just the methods we want to call on `Wrapper` (which could ve an advantage if we want to present a subset of it's API as our API). We could also implement the `Deref` trait so we can treat a `w` like vector. diff --git a/docs/ch19/ch19-03-advanced-types.md b/docs/ch19/ch19-03-advanced-types.md index 6f5bff5..eb22096 100644 --- a/docs/ch19/ch19-03-advanced-types.md +++ b/docs/ch19/ch19-03-advanced-types.md @@ -4,7 +4,7 @@ In the [previous section](./ch19-02-advanced-traits.md#using-the-newtype-pattern-to-implement-external-traits-on-external-types), we discussed using the _newtype_ pattern to wrap an existing type in a tuple. -The newtype pattern is useful in a few other scenarios too. If we create a `Millimeter` type: +The newtype pattern is useful in a few other scenarios too. If we create a `Millisecond` type: ```rust struct Millisecond(u32); @@ -14,7 +14,7 @@ fn sleep(duration: Millisecond) { } ``` -It makes it very clear that `sleep` expects a value in milliseconds. The newtype pattern can also be used to wrap a type and give it a different public API, or to give a nicer API to a data structure. +This makes it very clear that `sleep` expects a value in milliseconds (although in this particular example you'd be better off using [`std::time::Duration`](https://doc.rust-lang.org/std/time/struct.Duration.html). The newtype pattern can also be used to wrap a type and give it a different public API. ## Creating Type Synonyms with Type Aliases @@ -61,15 +61,13 @@ fn returns_long_type() -> Thunk { } ``` -A meaningful name for your alias can make your code much easier to read and write. - -Another example of this is in `std::io`. Many functions here return a `Result` with a `std::io::Error` as the error type, so `std:io` defines: +A meaningful name for your alias can make your code much easier to read and write. Another example of this is in `std::io`. Many functions here return a `Result` with a `std::io::Error` as the error type, so `std:io` defines: ```rust type Result = std::result::Result; ``` -which shortens up a lot of code in this module. +which makes a lot of function signatures in this module much shorter and easier to read. ## The Never Type that Never Returns diff --git a/docs/ch19/ch19-04-advanced-functions-and-closures.md b/docs/ch19/ch19-04-advanced-functions-and-closures.md index f60fc05..7e840b3 100644 --- a/docs/ch19/ch19-04-advanced-functions-and-closures.md +++ b/docs/ch19/ch19-04-advanced-functions-and-closures.md @@ -20,9 +20,9 @@ fn main() { } ``` -The `fn` type her is called a _function pointer_. You may recall [from chapter 13][chap13] that to declare a function that takes a closure as a parameter, we declared a generic function and used a trait bound on the generic type, just as `FnOnce()`. The difference between a closure and a function pointer is that the function pointer is a concrete type. +The `fn` type her is called a _function pointer_. You may recall [from chapter 13][chap13] that to pass a closure as a parameter, we declared a generic function and used a trait bound on the generic type to `FnOnce`, `FnMut`, or `Fn`. The difference between a closure and a function pointer is that the function pointer is a concrete type. -Function pointers implement all three generic traits (`FnOnce`, `FnMut`, and `Fn`) so you can always pass a function pointer to a function that expects a trait. For this reason, it's generally more flexible to write a function that takes a closure. If you're interacting with C code, you might have to use a function pointer however. +Function pointers implement all three generic traits (`FnOnce`, `FnMut`, and `Fn`) so you can always pass a function pointer to a function that expects a trait. For this reason, it's generally more flexible to write a function that takes a closure. You'll likely have to use a function pointer instead If you're interacting with C code. Here's an example of using a function in place of a closure: diff --git a/docs/ch19/ch19-05-macros.md b/docs/ch19/ch19-05-macros.md index 33d2568..773c733 100644 --- a/docs/ch19/ch19-05-macros.md +++ b/docs/ch19/ch19-05-macros.md @@ -6,7 +6,7 @@ Macros are a kind of "metaprogramming". When we write a Macro, we're actually wr - Macros run at compile time, so they have no runtime performance impact (although they can generate code that runs at runtime, which might). - Macros can take a variable number of parameters (such as the `println!` marco does) which normal Rust functions cannot. -- Macros must be brought into scope or defined before they are called. +- Macros must be brought into scope or defined locally before they are called. ## Declarative Macros with `macro_rules!` for General Metaprogramming @@ -26,7 +26,7 @@ fn main() { ``` -The `macro_rules! four` says we're going to declare a macro named `four!`. Inside the `{}`, the rest of this macro is a little similar to a `match` expression. Each rule in a `macro_rules!` is of the format `(MATCHER) => {EXPANSION};`. When we call a macro, we don't actually pass in parameters like `i32`s or `&str`s, instead we're passing in a snippet of Rust code. When the macro runs, it will try to match the passed in token tree to each matcher in turn. Once it finds a match, we'll replace the whole macro with whatever is in the expansion part. +The `macro_rules! four` says we're going to declare a macro named `four!`. Inside the `{}`, the rest of this macro is similar to a `match` expression (in this example we only have one arm). Each rule in a `macro_rules!` is of the format `(MATCHER) => {EXPANSION};`. When we call a macro, we don't actually pass in parameters like `i32`s or `&str`s, instead we're passing in a snippet of Rust code. When the macro runs, it will try to match the passed in token tree to each matcher in turn. Once it finds a match, we'll replace the whole macro with whatever is in the expansion part. In the case of our macro above, we just have a single "empty matcher". If you were to try calling `let x = four!("hello");`, you'd get an error telling you `` no rules expected the token `"hello"` ``. @@ -56,14 +56,12 @@ macro_rules! vec { ``` :::info -This is actually a slightly simplified version of `vec!`, because the original tries to preallocate the correct amount of data in the new vector, and this would only serve to make this example even more confusing than it already is. +This is actually a slightly simplified version of `vec!`. The original tries to preallocate the correct amount of data in the new vector. ::: First, notice we've added the `#[macro_export]` annotation. Without this annotation, this macro can't be used outside of the crate it is defined in. -The `$(),*` part of the matcher here is called a _repetition_. These have the form `$ (...) sep rep`, where `( ... )` is the part that's being repeated, `sep` is an optional separator token, and `rep` defines how many times the pattern can repeat - `?` for zero or one, `*` for zero or more, and `+` for one or more (like in a regular expression). - -So `( $( $x:expr ),* )` matches zero or more expressions, separated by commas. +The `$(),*` part of the matcher here is called a _repetition_. These have the form `$ (...) sep rep`, where `( ... )` is the part that's being repeated, `sep` is an optional separator token, and `rep` defines how many times the pattern can repeat - `?` for zero or one, `*` for zero or more, and `+` for one or more (like in a regular expression). So `( $( $x:expr ),* )` matches zero or more expressions, separated by commas, and each time through the repetition we assign the matched part to the `$x` metavariable. On the right hand side of the `=>` we have the code we're going to expand this to. Inside the `$()` is the repetition part - this code will be inserted once for each time the repetition matches on the matcher side. @@ -81,9 +79,7 @@ So if we were to write `vec![1, 2, 3]`, at compile time this would get replaced ## Procedural Macros for Generating Code from Attributes -A _procedural macro_ is a Rust function that takes in a `TokenStream` of some input source code and produces `TokenStream` of some generated code. There are three kinds of procedural macros: custom derive, attribute-like, and function-like. When we `#[derive()]` a trait, it's going through a custom-derive macro. - -Right now procedural macros need to be defined in their own special crate for technical reasons we're going to hand wave away for this book. +A _procedural macro_ is a Rust function that takes in a `TokenStream` of some input source code and produces a `TokenStream` of some generated code. There are three kinds of procedural macros: custom derive, attribute-like, and function-like. When we `#[derive()]` a trait, it's going through a custom-derive macro. Procedural macros need to be defined in their own special crate for technical reasons we're going to hand wave away for this book, although this will likely change in the future. ### How to Write a Custom `derive` Macro @@ -203,7 +199,7 @@ fn impl_hello_macro(ast: &syn::DeriveInput) -> TokenStream { } ``` -The `quote!` macro here helps us define the code we want to generate. Note the `#name` template inside of `quote!`. `quote!` has other cool template tricks, so be sure to [check out its documentation](https://docs.rs/quote/latest/quote/). The `stringify!` macro is built into rust and here turns an expression like `1 + 2` into a string like `"1 + 2"`, or here `Pancakes` into `"Pancakes"`. +The `quote!` macro here helps us define the code we want to generate. Note the `#name` template inside of `quote!`. `quote!` has other cool template tricks, so be sure to [check out its documentation](https://docs.rs/quote/latest/quote/). The `stringify!` macro is built into rust and turns an expression like `1 + 2` into a string like `"1 + 2"`, or here `Pancakes` into `"Pancakes"`. If you want to run this, there's just one thing left to do. In our _pancakes_ project, we need to add dependencies to _Cargo.toml_ so it can find our trait and macro: diff --git a/docs/ch20/ch20-01-single-threaded-web-server.md b/docs/ch20/ch20-01-single-threaded-web-server.md index 672e85f..115875a 100644 --- a/docs/ch20/ch20-01-single-threaded-web-server.md +++ b/docs/ch20/ch20-01-single-threaded-web-server.md @@ -1,6 +1,6 @@ # 20.1 - Building a Single-Threaded Web Server -In this chapter we're going to build a simple HTTP server to put together a number of things we've learned so far. As usual, the code for this project is [available on the GitHub repo](https://github.com/jwalton/rust-book-abridged/tree/master/examples/ch20-single-threaded-web-server). +In this chapter we're going to build a simple HTTP server to put together a number of things we've learned so far. As usual, the code for this project is [available in the GitHub repo](https://github.com/jwalton/rust-book-abridged/tree/master/examples/ch20-single-threaded-web-server). ## HTTP Requests @@ -102,7 +102,7 @@ fn handle_connection(mut stream: TcpStream) { let http_request: Vec<_> = buf_reader .lines() .map(|result| result.unwrap()) - .take_while(|line| !line.is_empty()) // Blank line is end of request. + .take_while(|line| !line.is_empty()) // Blank line is end of headers. .collect(); let request_line = &http_request[0]; @@ -136,15 +136,13 @@ Note that we're using `io::prelude::*`. The `io` library has it's own "prelude" ## Listening to the TCP Connection -Let's start with the `main` function. We call `TcpListener::bind` to start listening on a port. This returns a `TcpListener` instance, so it's basically a constructor for `TcpListener`. Note that we're binding to "127.0.0.1", so you'll only be able to access this web server from the same machine you're running it on. `bind` can fail for a variety of reasons. For example, if we tried to bind to port 80 and we weren't root, this would fail because we don't have sufficient permissions. We're glossing over all the error handling with a call to `unwrap`. +Let's start with the `main` function. We call `TcpListener::bind` to start listening on a port. This returns a `TcpListener` instance, so it's basically a constructor for `TcpListener`. Note that we're binding to "127.0.0.1", so you'll only be able to access this web server from the same machine you're running it on. We could bind to "0.0.0.0" - the [unspecified address](https://doc.rust-lang.org/std/net/struct.Ipv4Addr.html#method.is_unspecified) - to bind to all local interfaces. `bind` can fail for a variety of reasons. For example, if we tried to bind to port 80 and we weren't root, this would fail because we don't have sufficient permissions, or some other process might have already bound the port. We're glossing over all the error handling with a call to `unwrap`. Once we have out `TcpListener` we call `incoming` on it, which returns an iterator of `Result`. We'll get an item from this iterator every time a client tries to connect. Note this iterator will never return `None`! This loop is going to go on forever (or at least until we hit CTRL-C to terminate this program). A connection attempt can fail for a variety of reasons. In a production web server we'd want to handle these, but here we're once again just calling `unwrap`. Finally we hand of the connection to `handle_connection`. ## Parsing the Request -Our `handle_connection` function creates a new buffered reader to read the incoming bytes from the stream. We user our reader to read in the request, split it into lines, then collect lines into a vector until we reach an empty line. - -As we've seen before, calling `collect` requires us to annotate the type of `http_request` so `collect` will know what kind of collection to return. +Our `handle_connection` function creates a new buffered reader to read the incoming bytes from the stream. We user our reader to read in the request, split it into lines, then collect lines into a vector until we reach an empty line. As we've seen before, calling `collect` requires us to annotate the type of `http_request` so `collect` will know what kind of collection to return. Once we have our request, we call into `send_response` to generate an appropriate response back to the client. diff --git a/docs/ch20/ch20-02-multi-threaded-web-server.md b/docs/ch20/ch20-02-multi-threaded-web-server.md index 4ffb3d7..d86d362 100644 --- a/docs/ch20/ch20-02-multi-threaded-web-server.md +++ b/docs/ch20/ch20-02-multi-threaded-web-server.md @@ -35,9 +35,9 @@ We've switched from an `if` to a `match`, and added a "/sleep" route. We have to The important thing here is, if you open up your browser and try to load [http://localhost:7878/sleep](http://localhost:7878/sleep), it'll take about five seconds for the page to load. If you tap CTRL-R to reload the page twice in quick succession, it will take about 10 seconds! Your browser sent two requests, and is waiting for the second one to finish. -## Spawning New Threads +## Improving Throughput with a Thread Pool -We could solve this problem by just creating a new thread for each incoming connection: +We _could_ solve this problem by just creating a new thread for each incoming connection: ```rust for stream in listener.incoming() { @@ -49,8 +49,6 @@ for stream in listener.incoming() { } ``` -## Improving Throughput with a Thread Pool - Starting up an OS level thread has some costs associated with it, and if we start up too many of them we may run out of system resources, so a common pattern for a situation like this is to use a _thread pool_. We pre-allocate a number of threads that will be sitting idle, and then whenever a request comes in we hand it off to an idle worker from the pool. ```rust @@ -64,13 +62,11 @@ for stream in listener.incoming() { } ``` -That's all there is too it! Except... wait... Rust can't find the `ThreadPool` symbol. We'll have to bring it into scope to use it, but before that we'll have to build a ThreadPool! +That's all there is too it! Except Rust can't find the `ThreadPool` symbol. We'll have to bring it into scope to use it, but before that we'll have to build a ThreadPool! ## Building a ThreadPool -Before we show the code for a ThreadPool, let's take a moment to think through what it's going to look like. - -We want to store a collection of threads. We won't know the number of threads until runtime so a vector is a reasonable choice here, but what exactly is being stored in the vector? How do you store a thread? If we have a look at the signature for `thread::spawn`: +Before we show the code for a ThreadPool, let's take a moment to think through what it's going to look like. We want to store a collection of threads. We won't know the number of threads until runtime so a vector is a reasonable choice here, but what exactly is being stored in the vector? How do you store a thread? If we have a look at the signature for `thread::spawn`: ```rust pub fn spawn(f: F) -> JoinHandle @@ -97,7 +93,7 @@ impl Worker { /// Create a new Worker with the given id. pub fn new(id: usize) -> Worker { let thread = thread::spawn(|| { - // TODO: ??? + todo!("Zhu Li, do the thing!"); }); Worker { id, thread } @@ -105,13 +101,13 @@ impl Worker { } ``` -We're going to execute jobs on these threads, but what's a job? We already know they are closures. Since we want our API to be similar to `thread::spawn`, a job is going to be the same as type `F` above. It'll be `FnOnce()` since it's a function we want to call exactly once. It will also need to be `Send` so we can transfer it to our worker thread, and `'static` because we don't know how long the thread will take to run. So we'll define a `Job` as: +We're going to execute jobs on these threads, but what's a job? We already know they are closures. Since we want our API to be similar to `thread::spawn`, a job is going to be the same type as `F` in `thread::spawn` above. It'll be `FnOnce()` since it's a function we want to call exactly once. It will also need to be `Send` so we can transfer it to our worker thread, and `'static` because we don't know how long the thread will take to run. So we'll define `Job` as an alias for: ```rust type Job = Box; ``` -Whenever we call `pool.execute` and pass in a job, we want that job to be run by a free thread from the pool. How does this happen? What happens inside the thread we spawn inside the Worker? We've conveniently left this out of our `Worker` above. There are many ways we could do this, but the approach we will use here is to send each job we want to execute to a worker over a channel. +Whenever we call `pool.execute` and pass in a job, we want that job to be run by a free thread from the pool. How does this happen? What happens inside the thread we spawn inside the Worker? We've conveniently left this out of our `Worker` above. There are many ways we could do this, but the approach we will use here is to send each job over a channel. Each `Worker` will hang on to the receiver side of a channel. The thread inside a `Worker` can just iterate on the channel and execute each job it receives in series. But you may recall that the channels we've been using are from the `mpsc` library, which stands for "multiple producers, single consumer". If we're creating four threads, we could create four channels and give one receiver from each to each worker. In this case, though, we'd have to decide which sender to send a new job to. How do we know which threads are free to accept new jobs? @@ -153,6 +149,7 @@ impl ThreadPool { // all of our threads. let mut workers = Vec::with_capacity(size); + // Create new workers and add them to the pool. for id in 0..size { workers.push(Worker::new(id, Arc::clone(&receiver))); } @@ -220,10 +217,10 @@ If you give this a try, it will appear to work, but our "double-reload" example }); ``` -We'll end up with one of our threads doing all the work. +One thread will take the mutex and then loop with it held, so one of our threads doing all the work. There are also a few things wrong with this code as it stands. First, we're obviously glossing over some error handling, which is fine for this example. Second, if you reload the "/sleep" route many times, you'll find eventually it will start taking a long time to load. What's happening here is that we're queueing up jobs in the channel. -Ideally if all the workers are busy, we'd return a 503 to let the client know we are too busy to handle the request. We could do this in a few ways; we could use the `atomic` package to increment a counter when we start a job and decrement it when we finish one, so we know how many jobs are in progress. There's also a `channel::sync_channel` which allows creating a channel with a bounded size. The sender in this case has a `try_send` which will return an error if the channel is full. +Ideally if all the workers are busy, we'd return a 503 to let the client know we are too busy to handle the request. We could do this in a few ways; we could use the `atomic` package to increment a counter when we start a job and decrement it when we finish one, so we know how many jobs are in progress. There's also a `channel::sync_channel` which allows creating a channel with a bounded size. The sender in this case has a `try_send` which will return an error if the channel is full. This is left as an exercise for the reader. Next we'll look at how to adapt our web server to [shut down gracefully](ch20-03-graceful-shutdown.md). diff --git a/docs/ch20/ch20-03-graceful-shutdown.md b/docs/ch20/ch20-03-graceful-shutdown.md index 09dbde3..0e2dc5d 100644 --- a/docs/ch20/ch20-03-graceful-shutdown.md +++ b/docs/ch20/ch20-03-graceful-shutdown.md @@ -10,7 +10,7 @@ If you're looking for the full source for this project, it's [in the GitHub repo One problem we're going to run into is that, in order to call `thread.join()`, we're going to have to move the `thread` out of the `Worker`. We can't move _part_ of a struct, so we're going to have to use the same trick we did in [chapter 17][chap17] and store the thread in an `Option` so we can set it to `None`. -Calling `join` isn't enough though. This will wait until each thread quits, but right now the closure in each thread is an infinite loop! We need to somehow signal to the `Worker`'s thread that it should stop accepting new jobs. We can do this by dropping the `sender` half of the channel. This will cause the receiver to wake up and return an error. We'll have to pull the same trick we did with `thread` and store the `sender` in an `Option` to make this work. We'll also want to handle the error from `recv` correctly instead of just panicking. +Calling `join` isn't enough though. This will wait until each thread quits, but right now the closure in each thread is an infinite loop! We need to somehow signal to the `Worker`'s thread that it should stop accepting new jobs. We can do this by dropping the `sender` half of the channel. This will cause the receiver to wake up and return an error. We'll have to pull the same trick we did with `thread` and store the `sender` in an `Option` to make this work, otherwise there's no way for us to drop the sender. We'll also want to handle the error from `recv` correctly instead of just panicking. Here's the updated library: @@ -124,7 +124,7 @@ Now we just need some way to make the server shut down. A simple way to do this // --snip-- ``` -Now our server will shut down after two requests. +Now our server will shut down after two requests. Not exactly something we'd want to do in production, but it will prove our shutdown code is working here. ## Next Steps @@ -138,4 +138,7 @@ The original Rust book has some suggestions about places you could take this pro Another fun one might be to try to hook the SIGINT and SIGTERM signals so a CTRL-C will cause the server to shut down gracefully. +This is as far as the original Rust book went, but you can continue on to our [special bonus chapter][chap21] to find out how we can rewrite this web server using async Rust! + [chap17]: ../ch17-object-oriented-features.md "Chapter 17: Object Oriented Features of Rust" +[chap21]: ../ch21-async.md "Chapter 21: Bonus Chapter: Async Programming" diff --git a/docs/ch21-async.md b/docs/ch21-async.md index cae7e79..12df587 100644 --- a/docs/ch21-async.md +++ b/docs/ch21-async.md @@ -1,12 +1,10 @@ # 21 - Async Programming -In this section we're going to re-implement our web server from [chapter 20][chap20] using async functions. We're just going to give you enough here to get your feet wet. For further reading, check out [Asynchronous Programming in Rust](https://rust-lang.github.io/async-book/), and the [Tokio Tutorial](https://tokio.rs/tokio/tutorial). - -If you're looking for the full source for this project, it's [in the GitHub repo](https://github.com/jwalton/rust-book-abridged/tree/master/examples/ch21-async-web-server). +In this section we're going to re-implement our web server from [chapter 20][chap20] using async functions. We're just going to give you enough here to get your feet wet. For further reading, check out [Asynchronous Programming in Rust](https://rust-lang.github.io/async-book/), and the [Tokio Tutorial](https://tokio.rs/tokio/tutorial). As usual, if you're looking for the full source for this project, it's [in the GitHub repo](https://github.com/jwalton/rust-book-abridged/tree/master/examples/ch21-async-web-server). ## JavaScript -Wait... Isn't this supposed to be a book about Rust? It is, but we're going to start this chapter off talking about JavaScript. Love it or hate it, it's the most popular language in the world, and it is probably where most people were first exposed to the idea of async programming. +Wait... Isn't this supposed to be a book about Rust? It is, but we're going to start this chapter off talking about JavaScript. Love it or hate it, JavaScript is the most popular language in the world, and it is probably where most people were first exposed to the idea of async programming. ```js title="user.js" // JavaScript Code @@ -18,7 +16,7 @@ async function getUserName() { } ``` -Even if you don't know JavaScript, hopefully this example is simple enough that you can follow along. We're calling `fs.readFile` to read in a file, which is going to return a `Promise`. A `Promise` in JavaScript is the result of some calculation we don't know yet (similar to a `Future` in Java, or as we'll see in a moment a `Future` in Rust). The magic in this function happens at the `await` keyword. When we `await` a promise, the current function stops executing, allowing other functions to run. At some future point in time when the promise resolves, this function will continue from where it left off. +Even if you don't know JavaScript, hopefully this example is simple enough that you can follow along. We're calling `fs.readFile` to read in a file. In JavaScript this is going to return a `Promise`. A `Promise` in JavaScript is the result of some calculation we don't know yet (similar to a `Future` in Java, or as we'll see in a moment a `Future` in Rust). The magic in this function happens at the `await` keyword. When we `await` a promise, the current function stops executing, allowing other functions to run. At some future point in time when the promise resolves, this function will continue from where it left off. In JavaScript, the above is actually more or less syntactic sugar for: @@ -50,15 +48,15 @@ async fn get_user_name() -> Result<(), Box> { This is very similar to the JavaScript example in many ways. Here `fs::read_to_string` returns a type that implements the `Future` trait (specifically `Future>`). When we call `await` on the future, execution of this function is suspended, and at some future point someone will resume execution and the result of the `await` will be a `Result`. The `?` operator turns the `Result` into a `String`. -The important things to know here are that you can only use `await` inside a function that's declared `async`, and `await` will temporarily suspend execution of this function. +The important things to know here are that - in JavaScript or in Rust - you can only use `await` inside a function that's declared `async`, and `await` will temporarily suspend execution of this function. ## The Runtime -In our JavaScript example, we glossed over one important detail. Who calls into the closure we're passing to `then`? In JavaScript, everything runs in an event loop which is part of the JavaScript runtime. When the promise eventually resolves, it will queue a task and the event loop will pick it up and call into the closure. In our Rust example, we have the same problem; who takes care of restarting `get_user_name` when the `Future` from `fs::read_to_string` completes? Here again, it's the runtime. +In our JavaScript example, we glossed over one important detail. Someone calls calls into the closure we're passing to `then`, but who is this mysterious someone? In JavaScript, everything runs in an event loop which is part of the JavaScript runtime. When the promise eventually resolves, it will queue a task and the event loop will pick it up and call into the closure. In our Rust example, we have the same problem; who takes care of restarting `get_user_name` when the `Future` from `fs::read_to_string` completes? Here again, it's the runtime. -But... Rust doesn't have a runtime. In Rust, the only code that runs in your application is code you write or code you bring in from a crate, so you need to either write your own runtime or pull one in from a crate! The most popular at the moment is [Tokio](https://tokio.rs/), but there are other options. Also, unlike in JavaScript where everything is single threaded on the event loop, in Rust our async runtime could be implemented on a single thread or could be multithreaded (Tokio supports both). +Except of course that Rust doesn't have a runtime. In Rust, the only code that runs in your application is code you write or code you bring in from a crate, so you need to either write your own runtime or pull one in from a crate! The most popular at the moment is [Tokio](https://tokio.rs/), but there are other options. Also, unlike in JavaScript where everything is single threaded on the event loop, in Rust our async runtime could be implemented on a single thread or could be multithreaded (Tokio supports both). -Tokio provides us with a lot more than just a runtime. If you look at our Rust example above, you'll notice we're calling `tokio::fs::read_to_string` instead of `std::io::read_to_string`. The standard library version does the same thing, but it doesn't return a `Future`, it blocks until the file is read. If we were to use `std::io::read_to_string` here, it would block this thread for a while, potentially stopping other async code from running. Tokio provides async versions of many standard library functions in this way, and because of this, refactoring non-async code to async can be quite a lot of work. +Tokio provides us with a lot more than just a runtime. If you look at our Rust example above, you'll notice we're calling `tokio::fs::read_to_string` instead of `std::io::read_to_string`. The standard library version does the same thing, but it doesn't return a `Future`, it blocks until the file is read. If we were to use `std::io::read_to_string` here, it would block this thread for a while, potentially stopping other async code from running. Tokio provides async versions of many standard library functions in this way, and because of this, refactoring non-async code to async is usually not trivial. ## An `async` Web Server @@ -81,7 +79,7 @@ edition = "2021" tokio = { version = "1", features = ["full"] } ``` -Notice the `features = ["full"]` here. Features allow us to conditionally compile only the parts of Tokio we need. Tokio provides duplicates of most of the standard library, and if you don't need parts of it you can remove them here to make your binary smaller. Here's the code: +Notice the `features = ["full"]`. Features allow us to conditionally compile only the parts of Tokio we need. Tokio provides duplicates of most of the standard library, and if you don't need parts of it you can remove them here to make your binary smaller. Here's the code: ```rust title="src/main.rs" use std::{error::Error, time::Duration}; @@ -134,9 +132,9 @@ async fn send_response(mut stream: TcpStream, code: u16, reason: &str, filename: If you want to run this, you'll need the `hello.html` and `404.html` files from [chapter 20][chap20]. -This looks very similar to our previous single and multithreaded web servers. We have to `use tokio::io::AsyncBufReadExt` to be able to call `buf_reader.lines` in `handle_connection`, because in Tokio `lines` is defined on the `AsyncBufReadExt` trait, and similar for `tokio::io::AsyncWriteExt` and `stream.write_all` in `send_response`. We've also replaced some `for` loops as Rust doesn't (yet) support async for loops. +This looks very similar to our previous single and multithreaded web servers. We have to `use tokio::io::AsyncBufReadExt` to be able to call `buf_reader.lines` in `handle_connection`, because in Tokio `lines` is defined on the `AsyncBufReadExt` trait, and similar for `tokio::io::AsyncWriteExt` and `stream.write_all` in `send_response`. We've also replaced some `for` loops as Rust doesn't (yet) support async for loops. (We also simplified the code for parsing the request, since we weren't actually using any of the headers in our previous examples so we don't bother reading them here.) -This is also very similar to our single threaded version because if you try reloading the "/sleep" route a few times, you'll see that this is only handling a single request at once. The problem is that in our main loop, we're `await`ing `handle_connection`: +This is also very similar to our single threaded version because if you try reloading the "/sleep" route a few times, you'll see that this is only handling a single request at once. Isn't async supposed to fix that for us? The problem is that in our main loop, we're `await`ing `handle_connection`: ```rust loop { @@ -161,7 +159,7 @@ If you're interested in the internals of `async` and `Future`s in Rust, this is ::: -In order to fix this problem, we have to create this future, then let Tokio know we'd like it to be polled. Tokio's answer to this is something called a `Task`. We can spawn a task with `tokio::spawn`: +In order to fix this problem, we have to create this future, then let Tokio know we'd like it to be polled. Tokio's answer to this is something called a `Task`. We can spawn a task with `tokio::spawn`: ```rust loop { @@ -172,7 +170,7 @@ In order to fix this problem, we have to create this future, then let Tokio know } ``` -You might have expected `spawn` to take a closure, but it actually takes a future! Here we're using an `async` block to create a future, and the `move` keyword to move ownership of the `stream` into that block. We could also have rewritten this as: +You might have expected `spawn` to take a closure, but it actually takes a future! Here we're using an `async` block to create a future, and the `move` keyword to move ownership of the `stream` into that block. We could also have rewritten this as: ```rust loop { @@ -182,10 +180,10 @@ You might have expected `spawn` to take a closure, but it actually takes a futur } ``` -But the async block is more idiomatic. `spawn` returns a `tokio::task::JoinHandle` similar to the `JoinHandle` we get when you spawn a thread. You can `await` on this handle to wait for the underlying Future to complete. +But the async block is more idiomatic. `spawn` returns a `tokio::task::JoinHandle` similar to the `JoinHandle` we get when you spawn a thread. You can `await` on this handle to wait for the underlying Future to complete. -Tasks are a form of "green thread". Spawning a task is very lightweight, involving only a single allocation and 64 bytes of memory, so you can easily spawn thousands or millions of tasks (which would be ill-advised if we were talking about OS threads). +Tasks are a form of "green thread". Spawning a task is very lightweight, involving only a single allocation and 64 bytes of memory, so you can easily spawn thousands or millions of tasks (which would be ill-advised if we were talking about OS threads). -If you've read this far, you've made it to the end of the book. If you enjoyed it, please [give us a star on GitHub](https://github.com/jwalton/rust-book-abridged), or [buy me a coffee](https://github.com/sponsors/jwalton). +If you've read this far, you've made it to the end of the book. If you enjoyed it, please [star the book on GitHub](https://github.com/jwalton/rust-book-abridged), or [buy me a coffee](https://github.com/sponsors/jwalton). Happy Rusting! [chap20]: ./ch20/ch20-01-single-threaded-web-server.md "Chapter 20: Multithreaded Web Server" diff --git a/docs/images/conslist.svg b/docs/images/conslist.svg new file mode 100755 index 0000000..d66709e --- /dev/null +++ b/docs/images/conslist.svg @@ -0,0 +1,271 @@ + + + + + + + + + + + + + + + + List::Cons + 10 + + + + + + List::Nil + + List::Cons + 5 + + + + + List::Cons + 3 + + + + + List::Cons + 4 + + + + + + + diff --git a/examples/ch20-graceful-shutdown/src/lib.rs b/examples/ch20-graceful-shutdown/src/lib.rs index 0671211..85cbc4f 100644 --- a/examples/ch20-graceful-shutdown/src/lib.rs +++ b/examples/ch20-graceful-shutdown/src/lib.rs @@ -31,6 +31,7 @@ impl ThreadPool { // all of our threads. let mut workers = Vec::with_capacity(size); + // Create new workers and add them to the pool. for id in 0..size { workers.push(Worker::new(id, Arc::clone(&receiver))); } diff --git a/examples/ch20-graceful-shutdown/src/main.rs b/examples/ch20-graceful-shutdown/src/main.rs index 8bde08f..977796a 100644 --- a/examples/ch20-graceful-shutdown/src/main.rs +++ b/examples/ch20-graceful-shutdown/src/main.rs @@ -33,7 +33,7 @@ fn handle_connection(mut stream: TcpStream) { let http_request: Vec<_> = buf_reader .lines() .map(|result| result.unwrap()) - .take_while(|line| !line.is_empty()) // Blank line is end of request. + .take_while(|line| !line.is_empty()) // Blank line is end of headers. .collect(); let request_line = &http_request[0]; diff --git a/examples/ch20-multi-threaded-web-server/src/lib.rs b/examples/ch20-multi-threaded-web-server/src/lib.rs index cf08ef5..6531511 100644 --- a/examples/ch20-multi-threaded-web-server/src/lib.rs +++ b/examples/ch20-multi-threaded-web-server/src/lib.rs @@ -31,6 +31,7 @@ impl ThreadPool { // all of our threads. let mut workers = Vec::with_capacity(size); + // Create new workers and add them to the pool. for id in 0..size { workers.push(Worker::new(id, Arc::clone(&receiver))); } diff --git a/examples/ch20-multi-threaded-web-server/src/main.rs b/examples/ch20-multi-threaded-web-server/src/main.rs index ffbc0c8..1dd1a78 100644 --- a/examples/ch20-multi-threaded-web-server/src/main.rs +++ b/examples/ch20-multi-threaded-web-server/src/main.rs @@ -33,7 +33,7 @@ fn handle_connection(mut stream: TcpStream) { let http_request: Vec<_> = buf_reader .lines() .map(|result| result.unwrap()) - .take_while(|line| !line.is_empty()) // Blank line is end of request. + .take_while(|line| !line.is_empty()) // Blank line is end of headers. .collect(); let request_line = &http_request[0]; diff --git a/examples/ch20-single-threaded-web-server/src/main.rs b/examples/ch20-single-threaded-web-server/src/main.rs index b38bced..1300a58 100644 --- a/examples/ch20-single-threaded-web-server/src/main.rs +++ b/examples/ch20-single-threaded-web-server/src/main.rs @@ -27,7 +27,7 @@ fn handle_connection(mut stream: TcpStream) { let http_request: Vec<_> = buf_reader .lines() .map(|result| result.unwrap()) - .take_while(|line| !line.is_empty()) // Blank line is end of request. + .take_while(|line| !line.is_empty()) // Blank line is end of headers. .collect(); let request_line = &http_request[0];