From 73f860f7e8956a38de9c44097369eb1b781f133a Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Mon, 22 Sep 2025 09:37:41 +0900 Subject: [PATCH 01/16] adding ci files --- .github/workflows/run-blktests.yml | 175 +++++++++++++++ CODE_OF_CONDUCT.md | 134 ++++++++++++ LICENSE | 339 +++++++++++++++++++++++++++++ LICENSES/GPL-2.0-or-later.txt | 339 +++++++++++++++++++++++++++++ README.md | 11 + 5 files changed, 998 insertions(+) create mode 100644 .github/workflows/run-blktests.yml create mode 100644 CODE_OF_CONDUCT.md create mode 100644 LICENSE create mode 100644 LICENSES/GPL-2.0-or-later.txt create mode 100644 README.md diff --git a/.github/workflows/run-blktests.yml b/.github/workflows/run-blktests.yml new file mode 100644 index 0000000000000..002416262f14a --- /dev/null +++ b/.github/workflows/run-blktests.yml @@ -0,0 +1,175 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# +# Copyright (c) 2025 Western Digital Corporation or its affiliates. +# +# Authors: Dennis Maisenbacher (dennis.maisenbacher@wdc.com) + +name: Run blktests + +on: + pull_request: + +concurrency: + group: ci-test-${{ github.ref_name }} + +env: + KERNEL_REF: "${{ github.event.pull_request.head.sha }}" + KERNEL_TREE: "https://github.com/${{ github.repository }}" + +#This workflow requires an actions-runner-controllers (ARC) to be active. +#The k8s cluster of this ARC needs KubeVirt to be installed. +jobs: + build-and-test-kernel: + #This step runs in a container in the k8s cluster + runs-on: arc-vm-linux-blktests + steps: + - name: Checkout blktests-ci + uses: actions/checkout@v4 + with: + repository: linux-blktests/blktests-ci + path: blktests-ci + + - name: Build kernel and package it into a containerimage + run: | + cd blktests-ci/playbooks/roles/kernel-builder-k8s-job/templates + docker build \ + --build-arg KERNEL_TREE=${KERNEL_TREE} \ + --build-arg KERNEL_REF=${KERNEL_REF} \ + -t linux-kernel-containerdisk \ + -f Dockerfile.linux-kernel-containerdisk . 2>&1 | tee build.log + #Setting KERNEL_VERSION var which is latern needed for notifying the VM what kernel to pick up + cat build.log | grep KERNEL_VERSION | awk '{print $3}' | grep KERNEL_VERSION >> $GITHUB_ENV + + - name: Push the new Fedora containerimage with the freshly build kernel + run: | + docker tag linux-kernel-containerdisk registry-service.docker-registry.svc.cluster.local/linux-kernel-containerdisk:${KERNEL_VERSION} + docker push registry-service.docker-registry.svc.cluster.local/linux-kernel-containerdisk:${KERNEL_VERSION} + + - name: Run in VM + uses: ./blktests-ci/.github/actions/kubevirt-action + with: + kernel_version: ${{ env.KERNEL_VERSION }} + vm_artifact_upload_dir: blktests/results + run_cmds: | + #Print VM debug info + uname -a + cat /etc/os-release + lsblk + + #Install build dependencies for blktests + sudo dnf install -y gcc \ + clang \ + make \ + util-linux \ + llvm \ + gawk \ + fio \ + udev \ + kmod \ + coreutils \ + gcc \ + gzip \ + e2fsprogs \ + xfsprogs \ + f2fs-tools \ + btrfs-progs \ + device-mapper-multipath \ + blktrace \ + kernel-headers \ + liburing \ + liburing-devel \ + nbd \ + device-mapper \ + ktls-utils \ + dosfstools \ + bc \ + libnl3-cli \ + cryptsetup \ + sg3_utils \ + pciutils \ + unzip \ + jq \ + nvme-cli \ + git \ + wget \ + pkgconf \ + libudev-devel + + git clone https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git + cd mdadm + git checkout d764c4829947923142a83251296d04edaee7d2f7 + make -j$(nproc) + sudo make install + + cd - + git clone https://github.com/linux-blktests/blktests.git + + cd blktests + git checkout ci + make + + #ATTENTION! This section formats all available NVMe devices. Be careful when changing the `runs-on` tag! + #This step runs in a VM with the previously compiled kernel in the k8s cluster + + failed=0 + rm -f results/all_failures + + # Run blktests block group + cat > config << EOF + TEST_DEVS=(${BDEV0}) + EOF + export RUN_ZONED_TESTS=1 + sudo ./check block || failed=1 + test -f results/failrues && cat results/failures >> results/all_failures + + # Run blktests nvme group + cat > config << EOF + TEST_DEVS=(${BDEV0}) + NVMET_TRTYPES="loop rdma tcp" + EOF + sudo ./check nvme || failed=1 + test -f results/failrues && cat results/failures >> results/all_failures + + # Run blktests scsi group + cat > config << EOF + TEST_DEVS=() + export RUN_ZONED_TESTS=1 + EOF + sudo ./check scsi || failed=1 + test -f results/failrues && cat results/failures >> results/all_failures + + # Run blktests misc groups + cat > config << EOF + TEST_DEVS=() + EOF + sudo ./check dm loop md nbd throtl ublk || failed=1 + test -f results/failrues && cat results/failures >> results/all_failures + + # Run ZBD blktests without TEST_DEVS + cat > config << EOF + TEST_DEVS=() + EOF + export RUN_ZONED_TESTS=1 + sudo ./check zbd || failed=1 + test -f results/failrues && cat results/failures >> results/all_failures + + # Run ZBD blktests with TEST_DEVS + cat > config << EOF + TEST_DEVS=(${ZBD0}) + DEVICE_ONLY=1 + EOF + + export RUN_ZONED_TESTS=1 + sudo ./check zbd || failed=1 + test -f results/failrues && cat results/failures >> results/all_failures + + # Run blktests srp group + cat > config << EOF + TEST_DEVS=() + EOF + sudo ./check srp || failed=1 + test -f results/failrues && cat results/failures >> results/all_failures + + test -f results/all_failures && echo "All Failures:" && cat results/all_failures + + exit $failed diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000000000..4edb70c22d479 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,134 @@ + +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, caste, color, religion, or sexual +identity and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the overall + community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or advances of + any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email address, + without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official email address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +opensource@wdc.com. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series of +actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or permanent +ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within the +community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.1, available at +[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. + +Community Impact Guidelines were inspired by +[Mozilla's code of conduct enforcement ladder][Mozilla CoC]. + +For answers to common questions about this code of conduct, see the FAQ at +[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at +[https://www.contributor-covenant.org/translations][translations]. + +[homepage]: https://www.contributor-covenant.org +[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html +[Mozilla CoC]: https://github.com/mozilla/diversity +[FAQ]: https://www.contributor-covenant.org/faq +[translations]: https://www.contributor-covenant.org/translations + diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000..d159169d10508 --- /dev/null +++ b/LICENSE @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/LICENSES/GPL-2.0-or-later.txt b/LICENSES/GPL-2.0-or-later.txt new file mode 100644 index 0000000000000..d159169d10508 --- /dev/null +++ b/LICENSES/GPL-2.0-or-later.txt @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/README.md b/README.md new file mode 100644 index 0000000000000..339ce7da25ed4 --- /dev/null +++ b/README.md @@ -0,0 +1,11 @@ + + +# blktests-kpd-ci +This is a collection of GitHub actions workflow scripts for Continuous +Integration (CI) of Linux kernel using blktests. The scripts are intended to be +triggered by [Kernel Patches Daemon](https://github.com/kernel-patches/kernel-patches-daemon). +To use the scripts, specify this repository and the main branch as "ci_repo" and +"ci_branch" parameters respectively in the Kernel Patches Daemon config file. From 525dbeae1aaf38c1ea47434d6ac546e4cefbdf66 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Tue, 16 Sep 2025 16:44:11 -0700 Subject: [PATCH 02/16] iomap: move bio read logic into helper function Move the iomap_readpage_iter() bio read logic into a separate helper function, iomap_bio_read_folio_range(). This is needed to make iomap read/readahead more generically usable, especially for filesystems that do not require CONFIG_BLOCK. Additionally rename buffered write's iomap_read_folio_range() function to iomap_bio_read_folio_range_sync() to better describe its synchronous behavior. Signed-off-by: Joanne Koong Reviewed-by: "Darrick J. Wong" --- fs/iomap/buffered-io.c | 68 ++++++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 29 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index fd827398afd2f..05399aaa13619 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -357,36 +357,15 @@ struct iomap_readpage_ctx { struct readahead_control *rac; }; -static int iomap_readpage_iter(struct iomap_iter *iter, - struct iomap_readpage_ctx *ctx) +static void iomap_bio_read_folio_range(const struct iomap_iter *iter, + struct iomap_readpage_ctx *ctx, loff_t pos, size_t plen) { + struct folio *folio = ctx->cur_folio; const struct iomap *iomap = &iter->iomap; - loff_t pos = iter->pos; + struct iomap_folio_state *ifs = folio->private; + size_t poff = offset_in_folio(folio, pos); loff_t length = iomap_length(iter); - struct folio *folio = ctx->cur_folio; - struct iomap_folio_state *ifs; - size_t poff, plen; sector_t sector; - int ret; - - if (iomap->type == IOMAP_INLINE) { - ret = iomap_read_inline_data(iter, folio); - if (ret) - return ret; - return iomap_iter_advance(iter, &length); - } - - /* zero post-eof blocks as the page may be mapped */ - ifs = ifs_alloc(iter->inode, folio, iter->flags); - iomap_adjust_read_range(iter->inode, folio, &pos, length, &poff, &plen); - if (plen == 0) - goto done; - - if (iomap_block_needs_zeroing(iter, pos)) { - folio_zero_range(folio, poff, plen); - iomap_set_range_uptodate(folio, poff, plen); - goto done; - } ctx->cur_folio_in_bio = true; if (ifs) { @@ -425,6 +404,37 @@ static int iomap_readpage_iter(struct iomap_iter *iter, ctx->bio->bi_end_io = iomap_read_end_io; bio_add_folio_nofail(ctx->bio, folio, plen, poff); } +} + +static int iomap_readpage_iter(struct iomap_iter *iter, + struct iomap_readpage_ctx *ctx) +{ + const struct iomap *iomap = &iter->iomap; + loff_t pos = iter->pos; + loff_t length = iomap_length(iter); + struct folio *folio = ctx->cur_folio; + size_t poff, plen; + int ret; + + if (iomap->type == IOMAP_INLINE) { + ret = iomap_read_inline_data(iter, folio); + if (ret) + return ret; + return iomap_iter_advance(iter, &length); + } + + /* zero post-eof blocks as the page may be mapped */ + ifs_alloc(iter->inode, folio, iter->flags); + iomap_adjust_read_range(iter->inode, folio, &pos, length, &poff, &plen); + if (plen == 0) + goto done; + + if (iomap_block_needs_zeroing(iter, pos)) { + folio_zero_range(folio, poff, plen); + iomap_set_range_uptodate(folio, poff, plen); + } else { + iomap_bio_read_folio_range(iter, ctx, pos, plen); + } done: /* @@ -549,7 +559,7 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops) } EXPORT_SYMBOL_GPL(iomap_readahead); -static int iomap_read_folio_range(const struct iomap_iter *iter, +static int iomap_bio_read_folio_range_sync(const struct iomap_iter *iter, struct folio *folio, loff_t pos, size_t len) { const struct iomap *srcmap = iomap_iter_srcmap(iter); @@ -562,7 +572,7 @@ static int iomap_read_folio_range(const struct iomap_iter *iter, return submit_bio_wait(&bio); } #else -static int iomap_read_folio_range(const struct iomap_iter *iter, +static int iomap_bio_read_folio_range_sync(const struct iomap_iter *iter, struct folio *folio, loff_t pos, size_t len) { WARN_ON_ONCE(1); @@ -739,7 +749,7 @@ static int __iomap_write_begin(const struct iomap_iter *iter, status = write_ops->read_folio_range(iter, folio, block_start, plen); else - status = iomap_read_folio_range(iter, + status = iomap_bio_read_folio_range_sync(iter, folio, block_start, plen); if (status) return status; From b58f44a095584c598db4ad5b31e1dd7a1cd45e7b Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Tue, 16 Sep 2025 16:44:12 -0700 Subject: [PATCH 03/16] iomap: move read/readahead bio submission logic into helper function Move the read/readahead bio submission logic into a separate helper. This is needed to make iomap read/readahead more generically usable, especially for filesystems that do not require CONFIG_BLOCK. Signed-off-by: Joanne Koong Reviewed-by: "Darrick J. Wong" --- fs/iomap/buffered-io.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 05399aaa13619..ee96558b6d996 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -357,6 +357,14 @@ struct iomap_readpage_ctx { struct readahead_control *rac; }; +static void iomap_bio_submit_read(struct iomap_readpage_ctx *ctx) +{ + struct bio *bio = ctx->bio; + + if (bio) + submit_bio(bio); +} + static void iomap_bio_read_folio_range(const struct iomap_iter *iter, struct iomap_readpage_ctx *ctx, loff_t pos, size_t plen) { @@ -382,8 +390,7 @@ static void iomap_bio_read_folio_range(const struct iomap_iter *iter, gfp_t orig_gfp = gfp; unsigned int nr_vecs = DIV_ROUND_UP(length, PAGE_SIZE); - if (ctx->bio) - submit_bio(ctx->bio); + iomap_bio_submit_read(ctx); if (ctx->rac) /* same as readahead_gfp_mask */ gfp |= __GFP_NORETRY | __GFP_NOWARN; @@ -478,13 +485,10 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops) while ((ret = iomap_iter(&iter, ops)) > 0) iter.status = iomap_read_folio_iter(&iter, &ctx); - if (ctx.bio) { - submit_bio(ctx.bio); - WARN_ON_ONCE(!ctx.cur_folio_in_bio); - } else { - WARN_ON_ONCE(ctx.cur_folio_in_bio); + iomap_bio_submit_read(&ctx); + + if (!ctx.cur_folio_in_bio) folio_unlock(folio); - } /* * Just like mpage_readahead and block_read_full_folio, we always @@ -550,12 +554,10 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops) while (iomap_iter(&iter, ops) > 0) iter.status = iomap_readahead_iter(&iter, &ctx); - if (ctx.bio) - submit_bio(ctx.bio); - if (ctx.cur_folio) { - if (!ctx.cur_folio_in_bio) - folio_unlock(ctx.cur_folio); - } + iomap_bio_submit_read(&ctx); + + if (ctx.cur_folio && !ctx.cur_folio_in_bio) + folio_unlock(ctx.cur_folio); } EXPORT_SYMBOL_GPL(iomap_readahead); From 61b546a44051c9f1347373fe8829e45ea665c950 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Tue, 16 Sep 2025 16:44:13 -0700 Subject: [PATCH 04/16] iomap: store read/readahead bio generically Store the iomap_readpage_ctx bio generically as a "void *read_ctx". This makes the read/readahead interface more generic, which allows it to be used by filesystems that may not be block-based and may not have CONFIG_BLOCK set. Signed-off-by: Joanne Koong Reviewed-by: "Darrick J. Wong" --- fs/iomap/buffered-io.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index ee96558b6d996..2a1709e0757bd 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -353,13 +353,13 @@ static void iomap_read_end_io(struct bio *bio) struct iomap_readpage_ctx { struct folio *cur_folio; bool cur_folio_in_bio; - struct bio *bio; + void *read_ctx; struct readahead_control *rac; }; static void iomap_bio_submit_read(struct iomap_readpage_ctx *ctx) { - struct bio *bio = ctx->bio; + struct bio *bio = ctx->read_ctx; if (bio) submit_bio(bio); @@ -374,6 +374,7 @@ static void iomap_bio_read_folio_range(const struct iomap_iter *iter, size_t poff = offset_in_folio(folio, pos); loff_t length = iomap_length(iter); sector_t sector; + struct bio *bio = ctx->read_ctx; ctx->cur_folio_in_bio = true; if (ifs) { @@ -383,9 +384,8 @@ static void iomap_bio_read_folio_range(const struct iomap_iter *iter, } sector = iomap_sector(iomap, pos); - if (!ctx->bio || - bio_end_sector(ctx->bio) != sector || - !bio_add_folio(ctx->bio, folio, plen, poff)) { + if (!bio || bio_end_sector(bio) != sector || + !bio_add_folio(bio, folio, plen, poff)) { gfp_t gfp = mapping_gfp_constraint(folio->mapping, GFP_KERNEL); gfp_t orig_gfp = gfp; unsigned int nr_vecs = DIV_ROUND_UP(length, PAGE_SIZE); @@ -394,22 +394,21 @@ static void iomap_bio_read_folio_range(const struct iomap_iter *iter, if (ctx->rac) /* same as readahead_gfp_mask */ gfp |= __GFP_NORETRY | __GFP_NOWARN; - ctx->bio = bio_alloc(iomap->bdev, bio_max_segs(nr_vecs), - REQ_OP_READ, gfp); + bio = bio_alloc(iomap->bdev, bio_max_segs(nr_vecs), REQ_OP_READ, + gfp); /* * If the bio_alloc fails, try it again for a single page to * avoid having to deal with partial page reads. This emulates * what do_mpage_read_folio does. */ - if (!ctx->bio) { - ctx->bio = bio_alloc(iomap->bdev, 1, REQ_OP_READ, - orig_gfp); - } + if (!bio) + bio = bio_alloc(iomap->bdev, 1, REQ_OP_READ, orig_gfp); if (ctx->rac) - ctx->bio->bi_opf |= REQ_RAHEAD; - ctx->bio->bi_iter.bi_sector = sector; - ctx->bio->bi_end_io = iomap_read_end_io; - bio_add_folio_nofail(ctx->bio, folio, plen, poff); + bio->bi_opf |= REQ_RAHEAD; + bio->bi_iter.bi_sector = sector; + bio->bi_end_io = iomap_read_end_io; + bio_add_folio_nofail(bio, folio, plen, poff); + ctx->read_ctx = bio; } } From ef3eb89b549883cec4d08e7829a98c800df28d3f Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Tue, 16 Sep 2025 16:44:14 -0700 Subject: [PATCH 05/16] iomap: iterate over entire folio in iomap_readpage_iter() Iterate over all non-uptodate ranges in a single call to iomap_readpage_iter() instead of leaving the partial folio iteration to the caller. This will be useful for supporting caller-provided async folio read callbacks (added in later commit) because that will require tracking when the first and last async read request for a folio is sent, in order to prevent premature read completion of the folio. This additionally makes the iomap_readahead_iter() logic a bit simpler. Signed-off-by: Joanne Koong Reviewed-by: Christoph Hellwig Reviewed-by: "Darrick J. Wong" --- fs/iomap/buffered-io.c | 69 ++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 37 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 2a1709e0757bd..0c4ba2a634909 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -420,6 +420,7 @@ static int iomap_readpage_iter(struct iomap_iter *iter, loff_t length = iomap_length(iter); struct folio *folio = ctx->cur_folio; size_t poff, plen; + loff_t count; int ret; if (iomap->type == IOMAP_INLINE) { @@ -431,39 +432,33 @@ static int iomap_readpage_iter(struct iomap_iter *iter, /* zero post-eof blocks as the page may be mapped */ ifs_alloc(iter->inode, folio, iter->flags); - iomap_adjust_read_range(iter->inode, folio, &pos, length, &poff, &plen); - if (plen == 0) - goto done; - if (iomap_block_needs_zeroing(iter, pos)) { - folio_zero_range(folio, poff, plen); - iomap_set_range_uptodate(folio, poff, plen); - } else { - iomap_bio_read_folio_range(iter, ctx, pos, plen); - } + length = min_t(loff_t, length, + folio_size(folio) - offset_in_folio(folio, pos)); + while (length) { + iomap_adjust_read_range(iter->inode, folio, &pos, length, &poff, + &plen); -done: - /* - * Move the caller beyond our range so that it keeps making progress. - * For that, we have to include any leading non-uptodate ranges, but - * we can skip trailing ones as they will be handled in the next - * iteration. - */ - length = pos - iter->pos + plen; - return iomap_iter_advance(iter, &length); -} + count = pos - iter->pos + plen; + if (WARN_ON_ONCE(count > length)) + return -EIO; -static int iomap_read_folio_iter(struct iomap_iter *iter, - struct iomap_readpage_ctx *ctx) -{ - int ret; + if (plen == 0) + return iomap_iter_advance(iter, &count); - while (iomap_length(iter)) { - ret = iomap_readpage_iter(iter, ctx); + if (iomap_block_needs_zeroing(iter, pos)) { + folio_zero_range(folio, poff, plen); + iomap_set_range_uptodate(folio, poff, plen); + } else { + iomap_bio_read_folio_range(iter, ctx, pos, plen); + } + + length -= count; + ret = iomap_iter_advance(iter, &count); if (ret) return ret; + pos = iter->pos; } - return 0; } @@ -482,7 +477,7 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops) trace_iomap_readpage(iter.inode, 1); while ((ret = iomap_iter(&iter, ops)) > 0) - iter.status = iomap_read_folio_iter(&iter, &ctx); + iter.status = iomap_readpage_iter(&iter, &ctx); iomap_bio_submit_read(&ctx); @@ -504,16 +499,16 @@ static int iomap_readahead_iter(struct iomap_iter *iter, int ret; while (iomap_length(iter)) { - if (ctx->cur_folio && - offset_in_folio(ctx->cur_folio, iter->pos) == 0) { - if (!ctx->cur_folio_in_bio) - folio_unlock(ctx->cur_folio); - ctx->cur_folio = NULL; - } - if (!ctx->cur_folio) { - ctx->cur_folio = readahead_folio(ctx->rac); - ctx->cur_folio_in_bio = false; - } + if (ctx->cur_folio && !ctx->cur_folio_in_bio) + folio_unlock(ctx->cur_folio); + ctx->cur_folio = readahead_folio(ctx->rac); + /* + * We should never in practice hit this case since the iter + * length matches the readahead length. + */ + if (WARN_ON_ONCE(!ctx->cur_folio)) + return -EINVAL; + ctx->cur_folio_in_bio = false; ret = iomap_readpage_iter(iter, ctx); if (ret) return ret; From c954a29173eb7593a81d14859dbdb18c56463f2a Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Tue, 16 Sep 2025 16:44:15 -0700 Subject: [PATCH 06/16] iomap: rename iomap_readpage_iter() to iomap_read_folio_iter() ->readpage was deprecated and reads are now on folios. Signed-off-by: Joanne Koong Reviewed-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/iomap/buffered-io.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 0c4ba2a634909..d6266cb702e37 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -412,7 +412,7 @@ static void iomap_bio_read_folio_range(const struct iomap_iter *iter, } } -static int iomap_readpage_iter(struct iomap_iter *iter, +static int iomap_read_folio_iter(struct iomap_iter *iter, struct iomap_readpage_ctx *ctx) { const struct iomap *iomap = &iter->iomap; @@ -477,7 +477,7 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops) trace_iomap_readpage(iter.inode, 1); while ((ret = iomap_iter(&iter, ops)) > 0) - iter.status = iomap_readpage_iter(&iter, &ctx); + iter.status = iomap_read_folio_iter(&iter, &ctx); iomap_bio_submit_read(&ctx); @@ -509,7 +509,7 @@ static int iomap_readahead_iter(struct iomap_iter *iter, if (WARN_ON_ONCE(!ctx->cur_folio)) return -EINVAL; ctx->cur_folio_in_bio = false; - ret = iomap_readpage_iter(iter, ctx); + ret = iomap_read_folio_iter(iter, ctx); if (ret) return ret; } From 7af98cd6269ad223fb65cabe6fcb24a6ffc1b7f6 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Tue, 16 Sep 2025 16:44:16 -0700 Subject: [PATCH 07/16] iomap: rename iomap_readpage_ctx struct to iomap_read_folio_ctx ->readpage was deprecated and reads are now on folios. Signed-off-by: Joanne Koong Reviewed-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/iomap/buffered-io.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index d6266cb702e37..6c5a631848b7f 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -350,14 +350,14 @@ static void iomap_read_end_io(struct bio *bio) bio_put(bio); } -struct iomap_readpage_ctx { +struct iomap_read_folio_ctx { struct folio *cur_folio; bool cur_folio_in_bio; void *read_ctx; struct readahead_control *rac; }; -static void iomap_bio_submit_read(struct iomap_readpage_ctx *ctx) +static void iomap_bio_submit_read(struct iomap_read_folio_ctx *ctx) { struct bio *bio = ctx->read_ctx; @@ -366,7 +366,7 @@ static void iomap_bio_submit_read(struct iomap_readpage_ctx *ctx) } static void iomap_bio_read_folio_range(const struct iomap_iter *iter, - struct iomap_readpage_ctx *ctx, loff_t pos, size_t plen) + struct iomap_read_folio_ctx *ctx, loff_t pos, size_t plen) { struct folio *folio = ctx->cur_folio; const struct iomap *iomap = &iter->iomap; @@ -413,7 +413,7 @@ static void iomap_bio_read_folio_range(const struct iomap_iter *iter, } static int iomap_read_folio_iter(struct iomap_iter *iter, - struct iomap_readpage_ctx *ctx) + struct iomap_read_folio_ctx *ctx) { const struct iomap *iomap = &iter->iomap; loff_t pos = iter->pos; @@ -469,7 +469,7 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops) .pos = folio_pos(folio), .len = folio_size(folio), }; - struct iomap_readpage_ctx ctx = { + struct iomap_read_folio_ctx ctx = { .cur_folio = folio, }; int ret; @@ -494,7 +494,7 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops) EXPORT_SYMBOL_GPL(iomap_read_folio); static int iomap_readahead_iter(struct iomap_iter *iter, - struct iomap_readpage_ctx *ctx) + struct iomap_read_folio_ctx *ctx) { int ret; @@ -539,7 +539,7 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops) .pos = readahead_pos(rac), .len = readahead_length(rac), }; - struct iomap_readpage_ctx ctx = { + struct iomap_read_folio_ctx ctx = { .rac = rac, }; From ef315e0c1dcc51f2fc6b24e3bdf504540cdd265a Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Tue, 16 Sep 2025 16:44:17 -0700 Subject: [PATCH 08/16] iomap: track read/readahead folio ownership internally The purpose of "struct iomap_read_folio_ctx->cur_folio_in_bio" is to track folio ownership to know who is responsible for unlocking it. Rename "cur_folio_in_bio" to "cur_folio_owned" to better reflect this purpose and so that this can be generically used later on by filesystems that are not block-based. Since "struct iomap_read_folio_ctx" will be made a public interface later on when read/readahead takes in caller-provided callbacks, track the folio ownership state internally instead of exposing it in "struct iomap_read_folio_ctx" to make the interface simpler for end users. Signed-off-by: Joanne Koong --- fs/iomap/buffered-io.c | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 6c5a631848b7f..587bbdbd24bcd 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -352,7 +352,6 @@ static void iomap_read_end_io(struct bio *bio) struct iomap_read_folio_ctx { struct folio *cur_folio; - bool cur_folio_in_bio; void *read_ctx; struct readahead_control *rac; }; @@ -376,7 +375,6 @@ static void iomap_bio_read_folio_range(const struct iomap_iter *iter, sector_t sector; struct bio *bio = ctx->read_ctx; - ctx->cur_folio_in_bio = true; if (ifs) { spin_lock_irq(&ifs->state_lock); ifs->read_bytes_pending += plen; @@ -413,7 +411,7 @@ static void iomap_bio_read_folio_range(const struct iomap_iter *iter, } static int iomap_read_folio_iter(struct iomap_iter *iter, - struct iomap_read_folio_ctx *ctx) + struct iomap_read_folio_ctx *ctx, bool *cur_folio_owned) { const struct iomap *iomap = &iter->iomap; loff_t pos = iter->pos; @@ -450,6 +448,7 @@ static int iomap_read_folio_iter(struct iomap_iter *iter, folio_zero_range(folio, poff, plen); iomap_set_range_uptodate(folio, poff, plen); } else { + *cur_folio_owned = true; iomap_bio_read_folio_range(iter, ctx, pos, plen); } @@ -472,16 +471,22 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops) struct iomap_read_folio_ctx ctx = { .cur_folio = folio, }; + /* + * If an external IO helper takes ownership of the folio, it is + * responsible for unlocking it when the read completes. + */ + bool cur_folio_owned = false; int ret; trace_iomap_readpage(iter.inode, 1); while ((ret = iomap_iter(&iter, ops)) > 0) - iter.status = iomap_read_folio_iter(&iter, &ctx); + iter.status = iomap_read_folio_iter(&iter, &ctx, + &cur_folio_owned); iomap_bio_submit_read(&ctx); - if (!ctx.cur_folio_in_bio) + if (!cur_folio_owned) folio_unlock(folio); /* @@ -494,12 +499,13 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops) EXPORT_SYMBOL_GPL(iomap_read_folio); static int iomap_readahead_iter(struct iomap_iter *iter, - struct iomap_read_folio_ctx *ctx) + struct iomap_read_folio_ctx *ctx, + bool *cur_folio_owned) { int ret; while (iomap_length(iter)) { - if (ctx->cur_folio && !ctx->cur_folio_in_bio) + if (ctx->cur_folio && !*cur_folio_owned) folio_unlock(ctx->cur_folio); ctx->cur_folio = readahead_folio(ctx->rac); /* @@ -508,8 +514,8 @@ static int iomap_readahead_iter(struct iomap_iter *iter, */ if (WARN_ON_ONCE(!ctx->cur_folio)) return -EINVAL; - ctx->cur_folio_in_bio = false; - ret = iomap_read_folio_iter(iter, ctx); + *cur_folio_owned = false; + ret = iomap_read_folio_iter(iter, ctx, cur_folio_owned); if (ret) return ret; } @@ -542,15 +548,21 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops) struct iomap_read_folio_ctx ctx = { .rac = rac, }; + /* + * If an external IO helper takes ownership of the folio, it is + * responsible for unlocking it when the read completes. + */ + bool cur_folio_owned = false; trace_iomap_readahead(rac->mapping->host, readahead_count(rac)); while (iomap_iter(&iter, ops) > 0) - iter.status = iomap_readahead_iter(&iter, &ctx); + iter.status = iomap_readahead_iter(&iter, &ctx, + &cur_folio_owned); iomap_bio_submit_read(&ctx); - if (ctx.cur_folio && !ctx.cur_folio_in_bio) + if (ctx.cur_folio && !cur_folio_owned) folio_unlock(ctx.cur_folio); } EXPORT_SYMBOL_GPL(iomap_readahead); From d111b2d03ab24a15f9e6197c973e210575ca83f0 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Tue, 16 Sep 2025 16:44:18 -0700 Subject: [PATCH 09/16] iomap: add public start/finish folio read helpers Move ifs read_bytes_pending increment logic into a separate helper, iomap_start_folio_read(), which will be needed later on by caller-provided read callbacks (added in a later commit) for read/readahead. This is the counterpart to the currently existing iomap_finish_folio_read(). Make iomap_start_folio_read() and iomap_finish_folio_read() publicly accessible. These need to be accessible in order for caller-provided read callbacks to use. Signed-off-by: Joanne Koong Reviewed-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/iomap/buffered-io.c | 26 +++++++++++++++++--------- include/linux/iomap.h | 3 +++ 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 587bbdbd24bcd..3794389703479 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -317,9 +317,20 @@ static int iomap_read_inline_data(const struct iomap_iter *iter, return 0; } -#ifdef CONFIG_BLOCK -static void iomap_finish_folio_read(struct folio *folio, size_t off, - size_t len, int error) +void iomap_start_folio_read(struct folio *folio, size_t len) +{ + struct iomap_folio_state *ifs = folio->private; + + if (ifs) { + spin_lock_irq(&ifs->state_lock); + ifs->read_bytes_pending += len; + spin_unlock_irq(&ifs->state_lock); + } +} +EXPORT_SYMBOL_GPL(iomap_start_folio_read); + +void iomap_finish_folio_read(struct folio *folio, size_t off, size_t len, + int error) { struct iomap_folio_state *ifs = folio->private; bool uptodate = !error; @@ -339,7 +350,9 @@ static void iomap_finish_folio_read(struct folio *folio, size_t off, if (finished) folio_end_read(folio, uptodate); } +EXPORT_SYMBOL_GPL(iomap_finish_folio_read); +#ifdef CONFIG_BLOCK static void iomap_read_end_io(struct bio *bio) { int error = blk_status_to_errno(bio->bi_status); @@ -369,17 +382,12 @@ static void iomap_bio_read_folio_range(const struct iomap_iter *iter, { struct folio *folio = ctx->cur_folio; const struct iomap *iomap = &iter->iomap; - struct iomap_folio_state *ifs = folio->private; size_t poff = offset_in_folio(folio, pos); loff_t length = iomap_length(iter); sector_t sector; struct bio *bio = ctx->read_ctx; - if (ifs) { - spin_lock_irq(&ifs->state_lock); - ifs->read_bytes_pending += plen; - spin_unlock_irq(&ifs->state_lock); - } + iomap_start_folio_read(folio, plen); sector = iomap_sector(iomap, pos); if (!bio || bio_end_sector(bio) != sector || diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 73dceabc21c8c..0938c4a57f4ca 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -467,6 +467,9 @@ ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio, loff_t pos, loff_t end_pos, unsigned int dirty_len); int iomap_ioend_writeback_submit(struct iomap_writepage_ctx *wpc, int error); +void iomap_start_folio_read(struct folio *folio, size_t len); +void iomap_finish_folio_read(struct folio *folio, size_t off, size_t len, + int error); void iomap_start_folio_write(struct inode *inode, struct folio *folio, size_t len); void iomap_finish_folio_write(struct inode *inode, struct folio *folio, From 6e4d5f3368d6c19f47be2b5012cdba1d28d821f0 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Tue, 16 Sep 2025 16:44:19 -0700 Subject: [PATCH 10/16] iomap: add caller-provided callbacks for read and readahead Add caller-provided callbacks for read and readahead so that it can be used generically, especially by filesystems that are not block-based. In particular, this: * Modifies the read and readahead interface to take in a struct iomap_read_folio_ctx that is publicly defined as: struct iomap_read_folio_ctx { const struct iomap_read_ops *ops; struct folio *cur_folio; struct readahead_control *rac; void *read_ctx; }; where struct iomap_read_ops is defined as: struct iomap_read_ops { int (*read_folio_range)(const struct iomap_iter *iter, struct iomap_read_folio_ctx *ctx, size_t len); void (*read_submit)(struct iomap_read_folio_ctx *ctx); }; read_folio_range() reads in the folio range and is required by the caller to provide. read_submit() is optional and is used for submitting any pending read requests. * Modifies existing filesystems that use iomap for read and readahead to use the new API, through the new statically inlined helpers iomap_bio_read_folio() and iomap_bio_readahead(). There is no change in functinality for those filesystems. Signed-off-by: Joanne Koong --- .../filesystems/iomap/operations.rst | 45 ++++++++++++ block/fops.c | 5 +- fs/erofs/data.c | 5 +- fs/gfs2/aops.c | 6 +- fs/iomap/buffered-io.c | 69 +++++++++++-------- fs/xfs/xfs_aops.c | 5 +- fs/zonefs/file.c | 5 +- include/linux/iomap.h | 62 ++++++++++++++++- 8 files changed, 159 insertions(+), 43 deletions(-) diff --git a/Documentation/filesystems/iomap/operations.rst b/Documentation/filesystems/iomap/operations.rst index 067ed8e14ef34..dbb193415c0e0 100644 --- a/Documentation/filesystems/iomap/operations.rst +++ b/Documentation/filesystems/iomap/operations.rst @@ -135,6 +135,29 @@ These ``struct kiocb`` flags are significant for buffered I/O with iomap: * ``IOCB_DONTCACHE``: Turns on ``IOMAP_DONTCACHE``. +``struct iomap_read_ops`` +-------------------------- + +.. code-block:: c + + struct iomap_read_ops { + int (*read_folio_range)(const struct iomap_iter *iter, + struct iomap_read_folio_ctx *ctx, size_t len); + void (*submit_read)(struct iomap_read_folio_ctx *ctx); + }; + +iomap calls these functions: + + - ``read_folio_range``: Called to read in the range. This must be provided + by the caller. The caller is responsible for calling + iomap_start_folio_read() and iomap_finish_folio_read() before and after + reading in the folio range. This should be done even if an error is + encountered during the read. This returns 0 on success or a negative error + on failure. + + - ``submit_read``: Submit any pending read requests. This function is + optional. + Internal per-Folio State ------------------------ @@ -182,6 +205,28 @@ The ``flags`` argument to ``->iomap_begin`` will be set to zero. The pagecache takes whatever locks it needs before calling the filesystem. +Both ``iomap_readahead`` and ``iomap_read_folio`` pass in a ``struct +iomap_read_folio_ctx``: + +.. code-block:: c + + struct iomap_read_folio_ctx { + const struct iomap_read_ops *ops; + struct folio *cur_folio; + struct readahead_control *rac; + void *read_ctx; + }; + +``iomap_readahead`` must set: + * ``ops->read_folio_range()`` and ``rac`` + +``iomap_read_folio`` must set: + * ``ops->read_folio_range()`` and ``cur_folio`` + +``ops->submit_read()`` and ``read_ctx`` are optional. ``read_ctx`` is used to +pass in any custom data the caller needs accessible in the ops callbacks for +fulfilling reads. + Buffered Writes --------------- diff --git a/block/fops.c b/block/fops.c index ddbc69c0922ba..a2c2391d8dfa9 100644 --- a/block/fops.c +++ b/block/fops.c @@ -533,12 +533,13 @@ const struct address_space_operations def_blk_aops = { #else /* CONFIG_BUFFER_HEAD */ static int blkdev_read_folio(struct file *file, struct folio *folio) { - return iomap_read_folio(folio, &blkdev_iomap_ops); + iomap_bio_read_folio(folio, &blkdev_iomap_ops); + return 0; } static void blkdev_readahead(struct readahead_control *rac) { - iomap_readahead(rac, &blkdev_iomap_ops); + iomap_bio_readahead(rac, &blkdev_iomap_ops); } static ssize_t blkdev_writeback_range(struct iomap_writepage_ctx *wpc, diff --git a/fs/erofs/data.c b/fs/erofs/data.c index 3b1ba571c7286..be4191b33321f 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -371,7 +371,8 @@ static int erofs_read_folio(struct file *file, struct folio *folio) { trace_erofs_read_folio(folio, true); - return iomap_read_folio(folio, &erofs_iomap_ops); + iomap_bio_read_folio(folio, &erofs_iomap_ops); + return 0; } static void erofs_readahead(struct readahead_control *rac) @@ -379,7 +380,7 @@ static void erofs_readahead(struct readahead_control *rac) trace_erofs_readahead(rac->mapping->host, readahead_index(rac), readahead_count(rac), true); - return iomap_readahead(rac, &erofs_iomap_ops); + iomap_bio_readahead(rac, &erofs_iomap_ops); } static sector_t erofs_bmap(struct address_space *mapping, sector_t block) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 47d74afd63ac9..38d4f343187ac 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -424,11 +424,11 @@ static int gfs2_read_folio(struct file *file, struct folio *folio) struct inode *inode = folio->mapping->host; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); - int error; + int error = 0; if (!gfs2_is_jdata(ip) || (i_blocksize(inode) == PAGE_SIZE && !folio_buffers(folio))) { - error = iomap_read_folio(folio, &gfs2_iomap_ops); + iomap_bio_read_folio(folio, &gfs2_iomap_ops); } else if (gfs2_is_stuffed(ip)) { error = stuffed_read_folio(ip, folio); } else { @@ -503,7 +503,7 @@ static void gfs2_readahead(struct readahead_control *rac) else if (gfs2_is_jdata(ip)) mpage_readahead(rac, gfs2_block_map); else - iomap_readahead(rac, &gfs2_iomap_ops); + iomap_bio_readahead(rac, &gfs2_iomap_ops); } /** diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 3794389703479..561378f2b9bb5 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -363,12 +363,6 @@ static void iomap_read_end_io(struct bio *bio) bio_put(bio); } -struct iomap_read_folio_ctx { - struct folio *cur_folio; - void *read_ctx; - struct readahead_control *rac; -}; - static void iomap_bio_submit_read(struct iomap_read_folio_ctx *ctx) { struct bio *bio = ctx->read_ctx; @@ -377,11 +371,12 @@ static void iomap_bio_submit_read(struct iomap_read_folio_ctx *ctx) submit_bio(bio); } -static void iomap_bio_read_folio_range(const struct iomap_iter *iter, - struct iomap_read_folio_ctx *ctx, loff_t pos, size_t plen) +static int iomap_bio_read_folio_range(const struct iomap_iter *iter, + struct iomap_read_folio_ctx *ctx, size_t plen) { struct folio *folio = ctx->cur_folio; const struct iomap *iomap = &iter->iomap; + loff_t pos = iter->pos; size_t poff = offset_in_folio(folio, pos); loff_t length = iomap_length(iter); sector_t sector; @@ -416,8 +411,15 @@ static void iomap_bio_read_folio_range(const struct iomap_iter *iter, bio_add_folio_nofail(bio, folio, plen, poff); ctx->read_ctx = bio; } + return 0; } +const struct iomap_read_ops iomap_bio_read_ops = { + .read_folio_range = iomap_bio_read_folio_range, + .submit_read = iomap_bio_submit_read, +}; +EXPORT_SYMBOL_GPL(iomap_bio_read_ops); + static int iomap_read_folio_iter(struct iomap_iter *iter, struct iomap_read_folio_ctx *ctx, bool *cur_folio_owned) { @@ -426,7 +428,7 @@ static int iomap_read_folio_iter(struct iomap_iter *iter, loff_t length = iomap_length(iter); struct folio *folio = ctx->cur_folio; size_t poff, plen; - loff_t count; + loff_t delta; int ret; if (iomap->type == IOMAP_INLINE) { @@ -445,23 +447,30 @@ static int iomap_read_folio_iter(struct iomap_iter *iter, iomap_adjust_read_range(iter->inode, folio, &pos, length, &poff, &plen); - count = pos - iter->pos + plen; - if (WARN_ON_ONCE(count > length)) + delta = pos - iter->pos; + if (WARN_ON_ONCE(delta + plen > length)) return -EIO; + length -= delta + plen; + + ret = iomap_iter_advance(iter, &delta); + if (ret) + return ret; if (plen == 0) - return iomap_iter_advance(iter, &count); + return 0; if (iomap_block_needs_zeroing(iter, pos)) { folio_zero_range(folio, poff, plen); iomap_set_range_uptodate(folio, poff, plen); } else { *cur_folio_owned = true; - iomap_bio_read_folio_range(iter, ctx, pos, plen); + ret = ctx->ops->read_folio_range(iter, ctx, plen); + if (ret) + return ret; } - length -= count; - ret = iomap_iter_advance(iter, &count); + delta = plen; + ret = iomap_iter_advance(iter, &delta); if (ret) return ret; pos = iter->pos; @@ -469,16 +478,15 @@ static int iomap_read_folio_iter(struct iomap_iter *iter, return 0; } -int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops) +int iomap_read_folio(const struct iomap_ops *ops, + struct iomap_read_folio_ctx *ctx) { + struct folio *folio = ctx->cur_folio; struct iomap_iter iter = { .inode = folio->mapping->host, .pos = folio_pos(folio), .len = folio_size(folio), }; - struct iomap_read_folio_ctx ctx = { - .cur_folio = folio, - }; /* * If an external IO helper takes ownership of the folio, it is * responsible for unlocking it when the read completes. @@ -489,10 +497,11 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops) trace_iomap_readpage(iter.inode, 1); while ((ret = iomap_iter(&iter, ops)) > 0) - iter.status = iomap_read_folio_iter(&iter, &ctx, + iter.status = iomap_read_folio_iter(&iter, ctx, &cur_folio_owned); - iomap_bio_submit_read(&ctx); + if (ctx->ops->submit_read) + ctx->ops->submit_read(ctx); if (!cur_folio_owned) folio_unlock(folio); @@ -533,8 +542,8 @@ static int iomap_readahead_iter(struct iomap_iter *iter, /** * iomap_readahead - Attempt to read pages from a file. - * @rac: Describes the pages to be read. * @ops: The operations vector for the filesystem. + * @ctx: The ctx used for issuing readahead. * * This function is for filesystems to call to implement their readahead * address_space operation. @@ -546,16 +555,15 @@ static int iomap_readahead_iter(struct iomap_iter *iter, * function is called with memalloc_nofs set, so allocations will not cause * the filesystem to be reentered. */ -void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops) +void iomap_readahead(const struct iomap_ops *ops, + struct iomap_read_folio_ctx *ctx) { + struct readahead_control *rac = ctx->rac; struct iomap_iter iter = { .inode = rac->mapping->host, .pos = readahead_pos(rac), .len = readahead_length(rac), }; - struct iomap_read_folio_ctx ctx = { - .rac = rac, - }; /* * If an external IO helper takes ownership of the folio, it is * responsible for unlocking it when the read completes. @@ -565,13 +573,14 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops) trace_iomap_readahead(rac->mapping->host, readahead_count(rac)); while (iomap_iter(&iter, ops) > 0) - iter.status = iomap_readahead_iter(&iter, &ctx, + iter.status = iomap_readahead_iter(&iter, ctx, &cur_folio_owned); - iomap_bio_submit_read(&ctx); + if (ctx->ops->submit_read) + ctx->ops->submit_read(ctx); - if (ctx.cur_folio && !cur_folio_owned) - folio_unlock(ctx.cur_folio); + if (ctx->cur_folio && !cur_folio_owned) + folio_unlock(ctx->cur_folio); } EXPORT_SYMBOL_GPL(iomap_readahead); diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index a26f798155331..0c2ed00733f2a 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -742,14 +742,15 @@ xfs_vm_read_folio( struct file *unused, struct folio *folio) { - return iomap_read_folio(folio, &xfs_read_iomap_ops); + iomap_bio_read_folio(folio, &xfs_read_iomap_ops); + return 0; } STATIC void xfs_vm_readahead( struct readahead_control *rac) { - iomap_readahead(rac, &xfs_read_iomap_ops); + iomap_bio_readahead(rac, &xfs_read_iomap_ops); } static int diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c index fd3a5922f6c34..4d6e7eb52966e 100644 --- a/fs/zonefs/file.c +++ b/fs/zonefs/file.c @@ -112,12 +112,13 @@ static const struct iomap_ops zonefs_write_iomap_ops = { static int zonefs_read_folio(struct file *unused, struct folio *folio) { - return iomap_read_folio(folio, &zonefs_read_iomap_ops); + iomap_bio_read_folio(folio, &zonefs_read_iomap_ops); + return 0; } static void zonefs_readahead(struct readahead_control *rac) { - iomap_readahead(rac, &zonefs_read_iomap_ops); + iomap_bio_readahead(rac, &zonefs_read_iomap_ops); } /* diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 0938c4a57f4ca..4a168ebb40f5d 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -16,6 +16,7 @@ struct inode; struct iomap_iter; struct iomap_dio; struct iomap_writepage_ctx; +struct iomap_read_folio_ctx; struct iov_iter; struct kiocb; struct page; @@ -339,8 +340,10 @@ static inline bool iomap_want_unshare_iter(const struct iomap_iter *iter) ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, const struct iomap_ops *ops, const struct iomap_write_ops *write_ops, void *private); -int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops); -void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); +int iomap_read_folio(const struct iomap_ops *ops, + struct iomap_read_folio_ctx *ctx); +void iomap_readahead(const struct iomap_ops *ops, + struct iomap_read_folio_ctx *ctx); bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count); struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len); bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags); @@ -478,6 +481,35 @@ void iomap_finish_folio_write(struct inode *inode, struct folio *folio, int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio); int iomap_writepages(struct iomap_writepage_ctx *wpc); +struct iomap_read_folio_ctx { + const struct iomap_read_ops *ops; + struct folio *cur_folio; + struct readahead_control *rac; + void *read_ctx; +}; + +struct iomap_read_ops { + /* + * Read in a folio range. + * + * The caller is responsible for calling iomap_start_folio_read() and + * iomap_finish_folio_read() before and after reading in the folio + * range. This should be done even if an error is encountered during the + * read. + * + * Returns 0 on success or a negative error on failure. + */ + int (*read_folio_range)(const struct iomap_iter *iter, + struct iomap_read_folio_ctx *ctx, size_t len); + + /* + * Submit any pending read requests. + * + * This is optional. + */ + void (*submit_read)(struct iomap_read_folio_ctx *ctx); +}; + /* * Flags for direct I/O ->end_io: */ @@ -543,4 +575,30 @@ int iomap_swapfile_activate(struct swap_info_struct *sis, extern struct bio_set iomap_ioend_bioset; +#ifdef CONFIG_BLOCK +extern const struct iomap_read_ops iomap_bio_read_ops; + +static inline void iomap_bio_read_folio(struct folio *folio, + const struct iomap_ops *ops) +{ + struct iomap_read_folio_ctx ctx = { + .ops = &iomap_bio_read_ops, + .cur_folio = folio, + }; + + iomap_read_folio(ops, &ctx); +} + +static inline void iomap_bio_readahead(struct readahead_control *rac, + const struct iomap_ops *ops) +{ + struct iomap_read_folio_ctx ctx = { + .ops = &iomap_bio_read_ops, + .rac = rac, + }; + + iomap_readahead(ops, &ctx); +} +#endif /* CONFIG_BLOCK */ + #endif /* LINUX_IOMAP_H */ From 9e72ce3f1820a646758895b1cb630b37e562c7d3 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Tue, 16 Sep 2025 16:44:20 -0700 Subject: [PATCH 11/16] iomap: add bias for async read requests Non-block-based filesystems will be using iomap read/readahead. If they handle reading in ranges asynchronously and fulfill those read requests on an ongoing basis (instead of all together at the end), then there is the possibility that the read on the folio may be prematurely ended if earlier async requests complete before the later ones have been issued. For example if there is a large folio and a readahead request for 16 pages in that folio, if doing readahead on those 16 pages is split into 4 async requests and the first request is sent off and then completed before we have sent off the second request, then when the first request calls iomap_finish_folio_read(), ifs->read_bytes_pending would be 0, which would end the read and unlock the folio prematurely. To mitigate this, a "bias" is added to ifs->read_bytes_pending before the first range is forwarded to the caller and removed after the last range has been forwarded. iomap writeback does this with their async requests as well to prevent prematurely ending writeback. Signed-off-by: Joanne Koong --- fs/iomap/buffered-io.c | 55 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 47 insertions(+), 8 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 561378f2b9bb5..667a49cb5ae57 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -420,6 +420,38 @@ const struct iomap_read_ops iomap_bio_read_ops = { }; EXPORT_SYMBOL_GPL(iomap_bio_read_ops); +/* + * Add a bias to ifs->read_bytes_pending to prevent the read on the folio from + * being ended prematurely. + * + * Otherwise, if the ranges are read asynchronously and read requests are + * fulfilled on an ongoing basis, there is the possibility that the read on the + * folio may be prematurely ended if earlier async requests complete before the + * later ones have been issued. + */ +static void iomap_read_add_bias(struct folio *folio) +{ + iomap_start_folio_read(folio, 1); +} + +static void iomap_read_remove_bias(struct folio *folio, bool *cur_folio_owned) +{ + struct iomap_folio_state *ifs = folio->private; + bool finished, uptodate; + + if (ifs) { + spin_lock_irq(&ifs->state_lock); + ifs->read_bytes_pending -= 1; + finished = !ifs->read_bytes_pending; + if (finished) + uptodate = ifs_is_fully_uptodate(folio, ifs); + spin_unlock_irq(&ifs->state_lock); + if (finished) + folio_end_read(folio, uptodate); + *cur_folio_owned = true; + } +} + static int iomap_read_folio_iter(struct iomap_iter *iter, struct iomap_read_folio_ctx *ctx, bool *cur_folio_owned) { @@ -429,7 +461,7 @@ static int iomap_read_folio_iter(struct iomap_iter *iter, struct folio *folio = ctx->cur_folio; size_t poff, plen; loff_t delta; - int ret; + int ret = 0; if (iomap->type == IOMAP_INLINE) { ret = iomap_read_inline_data(iter, folio); @@ -441,6 +473,8 @@ static int iomap_read_folio_iter(struct iomap_iter *iter, /* zero post-eof blocks as the page may be mapped */ ifs_alloc(iter->inode, folio, iter->flags); + iomap_read_add_bias(folio); + length = min_t(loff_t, length, folio_size(folio) - offset_in_folio(folio, pos)); while (length) { @@ -448,16 +482,18 @@ static int iomap_read_folio_iter(struct iomap_iter *iter, &plen); delta = pos - iter->pos; - if (WARN_ON_ONCE(delta + plen > length)) - return -EIO; + if (WARN_ON_ONCE(delta + plen > length)) { + ret = -EIO; + break; + } length -= delta + plen; ret = iomap_iter_advance(iter, &delta); if (ret) - return ret; + break; if (plen == 0) - return 0; + break; if (iomap_block_needs_zeroing(iter, pos)) { folio_zero_range(folio, poff, plen); @@ -466,16 +502,19 @@ static int iomap_read_folio_iter(struct iomap_iter *iter, *cur_folio_owned = true; ret = ctx->ops->read_folio_range(iter, ctx, plen); if (ret) - return ret; + break; } delta = plen; ret = iomap_iter_advance(iter, &delta); if (ret) - return ret; + break; pos = iter->pos; } - return 0; + + iomap_read_remove_bias(folio, cur_folio_owned); + + return ret; } int iomap_read_folio(const struct iomap_ops *ops, From 05321a3d0f0c8695e7e0716541d220f90a40be82 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Tue, 16 Sep 2025 16:44:21 -0700 Subject: [PATCH 12/16] iomap: move buffered io bio logic into new file Move bio logic in the buffered io code into its own file and remove CONFIG_BLOCK gating for iomap read/readahead. Signed-off-by: Christoph Hellwig Signed-off-by: Joanne Koong Reviewed-by: "Darrick J. Wong" --- fs/iomap/Makefile | 3 +- fs/iomap/bio.c | 90 ++++++++++++++++++++++++++++++++++++++++++ fs/iomap/buffered-io.c | 90 +----------------------------------------- fs/iomap/internal.h | 12 ++++++ 4 files changed, 105 insertions(+), 90 deletions(-) create mode 100644 fs/iomap/bio.c diff --git a/fs/iomap/Makefile b/fs/iomap/Makefile index f7e1c8534c464..a572b8808524a 100644 --- a/fs/iomap/Makefile +++ b/fs/iomap/Makefile @@ -14,5 +14,6 @@ iomap-y += trace.o \ iomap-$(CONFIG_BLOCK) += direct-io.o \ ioend.o \ fiemap.o \ - seek.o + seek.o \ + bio.o iomap-$(CONFIG_SWAP) += swapfile.o diff --git a/fs/iomap/bio.c b/fs/iomap/bio.c new file mode 100644 index 0000000000000..8a51c9d702686 --- /dev/null +++ b/fs/iomap/bio.c @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2010 Red Hat, Inc. + * Copyright (C) 2016-2023 Christoph Hellwig. + */ +#include +#include +#include "internal.h" +#include "trace.h" + +static void iomap_read_end_io(struct bio *bio) +{ + int error = blk_status_to_errno(bio->bi_status); + struct folio_iter fi; + + bio_for_each_folio_all(fi, bio) + iomap_finish_folio_read(fi.folio, fi.offset, fi.length, error); + bio_put(bio); +} + +static void iomap_bio_submit_read(struct iomap_read_folio_ctx *ctx) +{ + struct bio *bio = ctx->read_ctx; + + if (bio) + submit_bio(bio); +} + +static int iomap_bio_read_folio_range(const struct iomap_iter *iter, + struct iomap_read_folio_ctx *ctx, size_t plen) +{ + struct folio *folio = ctx->cur_folio; + const struct iomap *iomap = &iter->iomap; + loff_t pos = iter->pos; + size_t poff = offset_in_folio(folio, pos); + loff_t length = iomap_length(iter); + sector_t sector; + struct bio *bio = ctx->read_ctx; + + iomap_start_folio_read(folio, plen); + + sector = iomap_sector(iomap, pos); + if (!bio || bio_end_sector(bio) != sector || + !bio_add_folio(bio, folio, plen, poff)) { + gfp_t gfp = mapping_gfp_constraint(folio->mapping, GFP_KERNEL); + gfp_t orig_gfp = gfp; + unsigned int nr_vecs = DIV_ROUND_UP(length, PAGE_SIZE); + + if (bio) + submit_bio(bio); + + if (ctx->rac) /* same as readahead_gfp_mask */ + gfp |= __GFP_NORETRY | __GFP_NOWARN; + bio = bio_alloc(iomap->bdev, bio_max_segs(nr_vecs), REQ_OP_READ, + gfp); + /* + * If the bio_alloc fails, try it again for a single page to + * avoid having to deal with partial page reads. This emulates + * what do_mpage_read_folio does. + */ + if (!bio) + bio = bio_alloc(iomap->bdev, 1, REQ_OP_READ, orig_gfp); + if (ctx->rac) + bio->bi_opf |= REQ_RAHEAD; + bio->bi_iter.bi_sector = sector; + bio->bi_end_io = iomap_read_end_io; + bio_add_folio_nofail(bio, folio, plen, poff); + ctx->read_ctx = bio; + } + return 0; +} + +const struct iomap_read_ops iomap_bio_read_ops = { + .read_folio_range = iomap_bio_read_folio_range, + .submit_read = iomap_bio_submit_read, +}; +EXPORT_SYMBOL_GPL(iomap_bio_read_ops); + +int iomap_bio_read_folio_range_sync(const struct iomap_iter *iter, + struct folio *folio, loff_t pos, size_t len) +{ + const struct iomap *srcmap = iomap_iter_srcmap(iter); + struct bio_vec bvec; + struct bio bio; + + bio_init(&bio, srcmap->bdev, &bvec, 1, REQ_OP_READ); + bio.bi_iter.bi_sector = iomap_sector(srcmap, pos); + bio_add_folio_nofail(&bio, folio, len, offset_in_folio(folio, pos)); + return submit_bio_wait(&bio); +} diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 667a49cb5ae57..72258b0109ecc 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -8,6 +8,7 @@ #include #include #include +#include "internal.h" #include "trace.h" #include "../internal.h" @@ -352,74 +353,6 @@ void iomap_finish_folio_read(struct folio *folio, size_t off, size_t len, } EXPORT_SYMBOL_GPL(iomap_finish_folio_read); -#ifdef CONFIG_BLOCK -static void iomap_read_end_io(struct bio *bio) -{ - int error = blk_status_to_errno(bio->bi_status); - struct folio_iter fi; - - bio_for_each_folio_all(fi, bio) - iomap_finish_folio_read(fi.folio, fi.offset, fi.length, error); - bio_put(bio); -} - -static void iomap_bio_submit_read(struct iomap_read_folio_ctx *ctx) -{ - struct bio *bio = ctx->read_ctx; - - if (bio) - submit_bio(bio); -} - -static int iomap_bio_read_folio_range(const struct iomap_iter *iter, - struct iomap_read_folio_ctx *ctx, size_t plen) -{ - struct folio *folio = ctx->cur_folio; - const struct iomap *iomap = &iter->iomap; - loff_t pos = iter->pos; - size_t poff = offset_in_folio(folio, pos); - loff_t length = iomap_length(iter); - sector_t sector; - struct bio *bio = ctx->read_ctx; - - iomap_start_folio_read(folio, plen); - - sector = iomap_sector(iomap, pos); - if (!bio || bio_end_sector(bio) != sector || - !bio_add_folio(bio, folio, plen, poff)) { - gfp_t gfp = mapping_gfp_constraint(folio->mapping, GFP_KERNEL); - gfp_t orig_gfp = gfp; - unsigned int nr_vecs = DIV_ROUND_UP(length, PAGE_SIZE); - - iomap_bio_submit_read(ctx); - - if (ctx->rac) /* same as readahead_gfp_mask */ - gfp |= __GFP_NORETRY | __GFP_NOWARN; - bio = bio_alloc(iomap->bdev, bio_max_segs(nr_vecs), REQ_OP_READ, - gfp); - /* - * If the bio_alloc fails, try it again for a single page to - * avoid having to deal with partial page reads. This emulates - * what do_mpage_read_folio does. - */ - if (!bio) - bio = bio_alloc(iomap->bdev, 1, REQ_OP_READ, orig_gfp); - if (ctx->rac) - bio->bi_opf |= REQ_RAHEAD; - bio->bi_iter.bi_sector = sector; - bio->bi_end_io = iomap_read_end_io; - bio_add_folio_nofail(bio, folio, plen, poff); - ctx->read_ctx = bio; - } - return 0; -} - -const struct iomap_read_ops iomap_bio_read_ops = { - .read_folio_range = iomap_bio_read_folio_range, - .submit_read = iomap_bio_submit_read, -}; -EXPORT_SYMBOL_GPL(iomap_bio_read_ops); - /* * Add a bias to ifs->read_bytes_pending to prevent the read on the folio from * being ended prematurely. @@ -623,27 +556,6 @@ void iomap_readahead(const struct iomap_ops *ops, } EXPORT_SYMBOL_GPL(iomap_readahead); -static int iomap_bio_read_folio_range_sync(const struct iomap_iter *iter, - struct folio *folio, loff_t pos, size_t len) -{ - const struct iomap *srcmap = iomap_iter_srcmap(iter); - struct bio_vec bvec; - struct bio bio; - - bio_init(&bio, srcmap->bdev, &bvec, 1, REQ_OP_READ); - bio.bi_iter.bi_sector = iomap_sector(srcmap, pos); - bio_add_folio_nofail(&bio, folio, len, offset_in_folio(folio, pos)); - return submit_bio_wait(&bio); -} -#else -static int iomap_bio_read_folio_range_sync(const struct iomap_iter *iter, - struct folio *folio, loff_t pos, size_t len) -{ - WARN_ON_ONCE(1); - return -EIO; -} -#endif /* CONFIG_BLOCK */ - /* * iomap_is_partially_uptodate checks whether blocks within a folio are * uptodate or not. diff --git a/fs/iomap/internal.h b/fs/iomap/internal.h index d05cb3aed96e7..7ab1033ab4b78 100644 --- a/fs/iomap/internal.h +++ b/fs/iomap/internal.h @@ -6,4 +6,16 @@ u32 iomap_finish_ioend_direct(struct iomap_ioend *ioend); +#ifdef CONFIG_BLOCK +int iomap_bio_read_folio_range_sync(const struct iomap_iter *iter, + struct folio *folio, loff_t pos, size_t len); +#else +int iomap_bio_read_folio_range_sync(const struct iomap_iter *iter, + struct folio *folio, loff_t pos, size_t len) +{ + WARN_ON_ONCE(1); + return -EIO; +} +#endif /* CONFIG_BLOCK */ + #endif /* _IOMAP_INTERNAL_H */ From 68c01d5459f75c0bea87ba28af2e52ff76ffe489 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Tue, 16 Sep 2025 16:44:22 -0700 Subject: [PATCH 13/16] iomap: make iomap_read_folio() a void return No errors are propagated in iomap_read_folio(). Change iomap_read_folio() to a void return to make this clearer to callers. Signed-off-by: Joanne Koong Reviewed-by: "Darrick J. Wong" --- fs/iomap/buffered-io.c | 9 +-------- include/linux/iomap.h | 2 +- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 72258b0109ecc..be535bd3aeca4 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -450,7 +450,7 @@ static int iomap_read_folio_iter(struct iomap_iter *iter, return ret; } -int iomap_read_folio(const struct iomap_ops *ops, +void iomap_read_folio(const struct iomap_ops *ops, struct iomap_read_folio_ctx *ctx) { struct folio *folio = ctx->cur_folio; @@ -477,13 +477,6 @@ int iomap_read_folio(const struct iomap_ops *ops, if (!cur_folio_owned) folio_unlock(folio); - - /* - * Just like mpage_readahead and block_read_full_folio, we always - * return 0 and just set the folio error flag on errors. This - * should be cleaned up throughout the stack eventually. - */ - return 0; } EXPORT_SYMBOL_GPL(iomap_read_folio); diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 4a168ebb40f5d..fa55ec611fff8 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -340,7 +340,7 @@ static inline bool iomap_want_unshare_iter(const struct iomap_iter *iter) ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, const struct iomap_ops *ops, const struct iomap_write_ops *write_ops, void *private); -int iomap_read_folio(const struct iomap_ops *ops, +void iomap_read_folio(const struct iomap_ops *ops, struct iomap_read_folio_ctx *ctx); void iomap_readahead(const struct iomap_ops *ops, struct iomap_read_folio_ctx *ctx); From c4d4b530a3582fa417746e4ccb0b8ed110b620fc Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Tue, 16 Sep 2025 16:44:23 -0700 Subject: [PATCH 14/16] fuse: use iomap for read_folio Read folio data into the page cache using iomap. This gives us granular uptodate tracking for large folios, which optimizes how much data needs to be read in. If some portions of the folio are already uptodate (eg through a prior write), we only need to read in the non-uptodate portions. Signed-off-by: Joanne Koong Reviewed-by: "Darrick J. Wong" --- fs/fuse/file.c | 81 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 57 insertions(+), 24 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 4adcf09d4b01a..4f27a3b0c20ab 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -828,23 +828,70 @@ static int fuse_do_readfolio(struct file *file, struct folio *folio, return 0; } +static int fuse_iomap_begin(struct inode *inode, loff_t offset, loff_t length, + unsigned int flags, struct iomap *iomap, + struct iomap *srcmap) +{ + iomap->type = IOMAP_MAPPED; + iomap->length = length; + iomap->offset = offset; + return 0; +} + +static const struct iomap_ops fuse_iomap_ops = { + .iomap_begin = fuse_iomap_begin, +}; + +struct fuse_fill_read_data { + struct file *file; +}; + +static int fuse_iomap_read_folio_range_async(const struct iomap_iter *iter, + struct iomap_read_folio_ctx *ctx, + size_t len) +{ + struct fuse_fill_read_data *data = ctx->read_ctx; + struct folio *folio = ctx->cur_folio; + loff_t pos = iter->pos; + size_t off = offset_in_folio(folio, pos); + struct file *file = data->file; + int ret; + + /* + * for non-readahead read requests, do reads synchronously since + * it's not guaranteed that the server can handle out-of-order reads + */ + iomap_start_folio_read(folio, len); + ret = fuse_do_readfolio(file, folio, off, len); + iomap_finish_folio_read(folio, off, len, ret); + return ret; +} + +static const struct iomap_read_ops fuse_iomap_read_ops = { + .read_folio_range = fuse_iomap_read_folio_range_async, +}; + static int fuse_read_folio(struct file *file, struct folio *folio) { struct inode *inode = folio->mapping->host; - int err; + struct fuse_fill_read_data data = { + .file = file, + }; + struct iomap_read_folio_ctx ctx = { + .cur_folio = folio, + .ops = &fuse_iomap_read_ops, + .read_ctx = &data, - err = -EIO; - if (fuse_is_bad(inode)) - goto out; + }; - err = fuse_do_readfolio(file, folio, 0, folio_size(folio)); - if (!err) - folio_mark_uptodate(folio); + if (fuse_is_bad(inode)) { + folio_unlock(folio); + return -EIO; + } + iomap_read_folio(&fuse_iomap_ops, &ctx); fuse_invalidate_atime(inode); - out: - folio_unlock(folio); - return err; + return 0; } static int fuse_iomap_read_folio_range(const struct iomap_iter *iter, @@ -1394,20 +1441,6 @@ static const struct iomap_write_ops fuse_iomap_write_ops = { .read_folio_range = fuse_iomap_read_folio_range, }; -static int fuse_iomap_begin(struct inode *inode, loff_t offset, loff_t length, - unsigned int flags, struct iomap *iomap, - struct iomap *srcmap) -{ - iomap->type = IOMAP_MAPPED; - iomap->length = length; - iomap->offset = offset; - return 0; -} - -static const struct iomap_ops fuse_iomap_ops = { - .iomap_begin = fuse_iomap_begin, -}; - static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; From eea80c1c97825467bb0eb4571aad833637cdd617 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Tue, 16 Sep 2025 16:44:24 -0700 Subject: [PATCH 15/16] fuse: use iomap for readahead Do readahead in fuse using iomap. This gives us granular uptodate tracking for large folios, which optimizes how much data needs to be read in. If some portions of the folio are already uptodate (eg through a prior write), we only need to read in the non-uptodate portions. Signed-off-by: Joanne Koong Reviewed-by: "Darrick J. Wong" --- fs/fuse/file.c | 220 ++++++++++++++++++++++++++++--------------------- 1 file changed, 124 insertions(+), 96 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 4f27a3b0c20ab..db0b1f20fee4e 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -844,8 +844,65 @@ static const struct iomap_ops fuse_iomap_ops = { struct fuse_fill_read_data { struct file *file; + + /* Fields below are used if sending the read request asynchronously */ + struct fuse_conn *fc; + struct fuse_io_args *ia; + unsigned int nr_bytes; }; +/* forward declarations */ +static bool fuse_folios_need_send(struct fuse_conn *fc, loff_t pos, + unsigned len, struct fuse_args_pages *ap, + unsigned cur_bytes, bool write); +static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file, + unsigned int count, bool async); + +static int fuse_handle_readahead(struct folio *folio, + struct readahead_control *rac, + struct fuse_fill_read_data *data, loff_t pos, + size_t len) +{ + struct fuse_io_args *ia = data->ia; + size_t off = offset_in_folio(folio, pos); + struct fuse_conn *fc = data->fc; + struct fuse_args_pages *ap; + unsigned int nr_pages; + + if (ia && fuse_folios_need_send(fc, pos, len, &ia->ap, data->nr_bytes, + false)) { + fuse_send_readpages(ia, data->file, data->nr_bytes, + fc->async_read); + data->nr_bytes = 0; + data->ia = NULL; + ia = NULL; + } + if (!ia) { + if (fc->num_background >= fc->congestion_threshold && + rac->ra->async_size >= readahead_count(rac)) + /* + * Congested and only async pages left, so skip the + * rest. + */ + return -EAGAIN; + + nr_pages = min(fc->max_pages, readahead_count(rac)); + data->ia = fuse_io_alloc(NULL, nr_pages); + if (!data->ia) + return -ENOMEM; + ia = data->ia; + } + folio_get(folio); + ap = &ia->ap; + ap->folios[ap->num_folios] = folio; + ap->descs[ap->num_folios].offset = off; + ap->descs[ap->num_folios].length = len; + data->nr_bytes += len; + ap->num_folios++; + + return 0; +} + static int fuse_iomap_read_folio_range_async(const struct iomap_iter *iter, struct iomap_read_folio_ctx *ctx, size_t len) @@ -857,18 +914,40 @@ static int fuse_iomap_read_folio_range_async(const struct iomap_iter *iter, struct file *file = data->file; int ret; - /* - * for non-readahead read requests, do reads synchronously since - * it's not guaranteed that the server can handle out-of-order reads - */ iomap_start_folio_read(folio, len); - ret = fuse_do_readfolio(file, folio, off, len); - iomap_finish_folio_read(folio, off, len, ret); + if (ctx->rac) { + ret = fuse_handle_readahead(folio, ctx->rac, data, pos, len); + /* + * If fuse_handle_readahead was successful, fuse_readpages_end + * will do the iomap_finish_folio_read, else we need to call it + * here + */ + if (ret) + iomap_finish_folio_read(folio, off, len, ret); + } else { + /* + * for non-readahead read requests, do reads synchronously + * since it's not guaranteed that the server can handle + * out-of-order reads + */ + ret = fuse_do_readfolio(file, folio, off, len); + iomap_finish_folio_read(folio, off, len, ret); + } return ret; } +static void fuse_iomap_read_submit(struct iomap_read_folio_ctx *ctx) +{ + struct fuse_fill_read_data *data = ctx->read_ctx; + + if (data->ia) + fuse_send_readpages(data->ia, data->file, data->nr_bytes, + data->fc->async_read); +} + static const struct iomap_read_ops fuse_iomap_read_ops = { .read_folio_range = fuse_iomap_read_folio_range_async, + .submit_read = fuse_iomap_read_submit, }; static int fuse_read_folio(struct file *file, struct folio *folio) @@ -930,7 +1009,8 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args, } for (i = 0; i < ap->num_folios; i++) { - folio_end_read(ap->folios[i], !err); + iomap_finish_folio_read(ap->folios[i], ap->descs[i].offset, + ap->descs[i].length, err); folio_put(ap->folios[i]); } if (ia->ff) @@ -940,7 +1020,7 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args, } static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file, - unsigned int count) + unsigned int count, bool async) { struct fuse_file *ff = file->private_data; struct fuse_mount *fm = ff->fm; @@ -962,7 +1042,7 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file, fuse_read_args_fill(ia, file, pos, count, FUSE_READ); ia->read.attr_ver = fuse_get_attr_version(fm->fc); - if (fm->fc->async_read) { + if (async) { ia->ff = fuse_file_get(ff); ap->args.end = fuse_readpages_end; err = fuse_simple_background(fm, &ap->args, GFP_KERNEL); @@ -979,81 +1059,20 @@ static void fuse_readahead(struct readahead_control *rac) { struct inode *inode = rac->mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); - unsigned int max_pages, nr_pages; - struct folio *folio = NULL; + struct fuse_fill_read_data data = { + .file = rac->file, + .fc = fc, + }; + struct iomap_read_folio_ctx ctx = { + .ops = &fuse_iomap_read_ops, + .rac = rac, + .read_ctx = &data + }; if (fuse_is_bad(inode)) return; - max_pages = min_t(unsigned int, fc->max_pages, - fc->max_read / PAGE_SIZE); - - /* - * This is only accurate the first time through, since readahead_folio() - * doesn't update readahead_count() from the previous folio until the - * next call. Grab nr_pages here so we know how many pages we're going - * to have to process. This means that we will exit here with - * readahead_count() == folio_nr_pages(last_folio), but we will have - * consumed all of the folios, and read_pages() will call - * readahead_folio() again which will clean up the rac. - */ - nr_pages = readahead_count(rac); - - while (nr_pages) { - struct fuse_io_args *ia; - struct fuse_args_pages *ap; - unsigned cur_pages = min(max_pages, nr_pages); - unsigned int pages = 0; - - if (fc->num_background >= fc->congestion_threshold && - rac->ra->async_size >= readahead_count(rac)) - /* - * Congested and only async pages left, so skip the - * rest. - */ - break; - - ia = fuse_io_alloc(NULL, cur_pages); - if (!ia) - break; - ap = &ia->ap; - - while (pages < cur_pages) { - unsigned int folio_pages; - - /* - * This returns a folio with a ref held on it. - * The ref needs to be held until the request is - * completed, since the splice case (see - * fuse_try_move_page()) drops the ref after it's - * replaced in the page cache. - */ - if (!folio) - folio = __readahead_folio(rac); - - folio_pages = folio_nr_pages(folio); - if (folio_pages > cur_pages - pages) { - /* - * Large folios belonging to fuse will never - * have more pages than max_pages. - */ - WARN_ON(!pages); - break; - } - - ap->folios[ap->num_folios] = folio; - ap->descs[ap->num_folios].length = folio_size(folio); - ap->num_folios++; - pages += folio_pages; - folio = NULL; - } - fuse_send_readpages(ia, rac->file, pages << PAGE_SHIFT); - nr_pages -= pages; - } - if (folio) { - folio_end_read(folio, false); - folio_put(folio); - } + iomap_readahead(&fuse_iomap_ops, &ctx); } static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to) @@ -2084,7 +2103,7 @@ struct fuse_fill_wb_data { struct fuse_file *ff; unsigned int max_folios; /* - * nr_bytes won't overflow since fuse_writepage_need_send() caps + * nr_bytes won't overflow since fuse_folios_need_send() caps * wb requests to never exceed fc->max_pages (which has an upper bound * of U16_MAX). */ @@ -2129,14 +2148,15 @@ static void fuse_writepages_send(struct inode *inode, spin_unlock(&fi->lock); } -static bool fuse_writepage_need_send(struct fuse_conn *fc, loff_t pos, - unsigned len, struct fuse_args_pages *ap, - struct fuse_fill_wb_data *data) +static bool fuse_folios_need_send(struct fuse_conn *fc, loff_t pos, + unsigned len, struct fuse_args_pages *ap, + unsigned cur_bytes, bool write) { struct folio *prev_folio; struct fuse_folio_desc prev_desc; - unsigned bytes = data->nr_bytes + len; + unsigned bytes = cur_bytes + len; loff_t prev_pos; + size_t max_bytes = write ? fc->max_write : fc->max_read; WARN_ON(!ap->num_folios); @@ -2144,8 +2164,7 @@ static bool fuse_writepage_need_send(struct fuse_conn *fc, loff_t pos, if ((bytes + PAGE_SIZE - 1) >> PAGE_SHIFT > fc->max_pages) return true; - /* Reached max write bytes */ - if (bytes > fc->max_write) + if (bytes > max_bytes) return true; /* Discontinuity */ @@ -2155,11 +2174,6 @@ static bool fuse_writepage_need_send(struct fuse_conn *fc, loff_t pos, if (prev_pos != pos) return true; - /* Need to grow the pages array? If so, did the expansion fail? */ - if (ap->num_folios == data->max_folios && - !fuse_pages_realloc(data, fc->max_pages)) - return true; - return false; } @@ -2183,10 +2197,24 @@ static ssize_t fuse_iomap_writeback_range(struct iomap_writepage_ctx *wpc, return -EIO; } - if (wpa && fuse_writepage_need_send(fc, pos, len, ap, data)) { - fuse_writepages_send(inode, data); - data->wpa = NULL; - data->nr_bytes = 0; + if (wpa) { + bool send = fuse_folios_need_send(fc, pos, len, ap, + data->nr_bytes, true); + + if (!send) { + /* + * Need to grow the pages array? If so, did the + * expansion fail? + */ + send = (ap->num_folios == data->max_folios) && + !fuse_pages_realloc(data, fc->max_pages); + } + + if (send) { + fuse_writepages_send(inode, data); + data->wpa = NULL; + data->nr_bytes = 0; + } } if (data->wpa == NULL) { From 23ff56dbb3e1d3dd0422ed208df9d7eef55d371a Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Tue, 16 Sep 2025 16:44:25 -0700 Subject: [PATCH 16/16] fuse: remove fc->blkbits workaround for partial writes Now that fuse is integrated with iomap for read/readahead, we can remove the workaround that was added in commit bd24d2108e9c ("fuse: fix fuseblk i_blkbits for iomap partial writes"), which was previously needed to avoid a race condition where an iomap partial write may be overwritten by a read if blocksize < PAGE_SIZE. Now that fuse does iomap read/readahead, this is protected against since there is granular uptodate tracking of blocks, which means this workaround can be removed. Signed-off-by: Joanne Koong Reviewed-by: "Darrick J. Wong" --- fs/fuse/dir.c | 2 +- fs/fuse/fuse_i.h | 8 -------- fs/fuse/inode.c | 13 +------------ 3 files changed, 2 insertions(+), 21 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 5c569c3cb53f3..ebee7e0b1cd37 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1199,7 +1199,7 @@ static void fuse_fillattr(struct mnt_idmap *idmap, struct inode *inode, if (attr->blksize != 0) blkbits = ilog2(attr->blksize); else - blkbits = fc->blkbits; + blkbits = inode->i_sb->s_blocksize_bits; stat->blksize = 1 << blkbits; } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index cc428d04be3e1..1647eb7ca6fa6 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -975,14 +975,6 @@ struct fuse_conn { /* Request timeout (in jiffies). 0 = no timeout */ unsigned int req_timeout; } timeout; - - /* - * This is a workaround until fuse uses iomap for reads. - * For fuseblk servers, this represents the blocksize passed in at - * mount time and for regular fuse servers, this is equivalent to - * inode->i_blkbits. - */ - u8 blkbits; }; /* diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 7ddfd2b3cc9c4..3bfd83469d9f9 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -292,7 +292,7 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, if (attr->blksize) fi->cached_i_blkbits = ilog2(attr->blksize); else - fi->cached_i_blkbits = fc->blkbits; + fi->cached_i_blkbits = inode->i_sb->s_blocksize_bits; /* * Don't set the sticky bit in i_mode, unless we want the VFS @@ -1810,21 +1810,10 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) err = -EINVAL; if (!sb_set_blocksize(sb, ctx->blksize)) goto err; - /* - * This is a workaround until fuse hooks into iomap for reads. - * Use PAGE_SIZE for the blocksize else if the writeback cache - * is enabled, buffered writes go through iomap and a read may - * overwrite partially written data if blocksize < PAGE_SIZE - */ - fc->blkbits = sb->s_blocksize_bits; - if (ctx->blksize != PAGE_SIZE && - !sb_set_blocksize(sb, PAGE_SIZE)) - goto err; #endif } else { sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; - fc->blkbits = sb->s_blocksize_bits; } sb->s_subtype = ctx->subtype;