diff --git a/src/hotspot/share/opto/loopTransform.cpp b/src/hotspot/share/opto/loopTransform.cpp index 5f5e0520e7eb6..f92833e9e1c0a 100644 --- a/src/hotspot/share/opto/loopTransform.cpp +++ b/src/hotspot/share/opto/loopTransform.cpp @@ -1668,6 +1668,30 @@ void PhaseIdealLoop::insert_vector_post_loop(IdealLoopTree *loop, Node_List &old loop->record_for_igvn(); } +Node* PhaseIdealLoop::find_last_store_in_outer_loop(Node* store, const IdealLoopTree* outer_loop) { + assert(store != nullptr && store->is_Store(), "starting point should be a store node"); + // Follow the memory uses until we get out of the loop. + // Store nodes in the outer loop body were moved by PhaseIdealLoop::try_move_store_after_loop. + // Because of the conditions in try_move_store_after_loop (no other usage in the loop body + // except for the phi node associated with the loop head), we have the guarantee of a + // linear memory subgraph within the outer loop body. + Node* last = store; + Node* unique_next = store; + do { + last = unique_next; + for (DUIterator_Fast imax, l = last->fast_outs(imax); l < imax; l++) { + Node* use = last->fast_out(l); + if (use->is_Store() && use->in(MemNode::Memory) == last) { + if (is_member(outer_loop, get_ctrl(use))) { + assert(unique_next == last, "memory node should only have one usage in the loop body"); + unique_next = use; + } + } + } + } while (last != unique_next); + return last; +} + //------------------------------insert_post_loop------------------------------- // Insert post loops. Add a post loop to the given loop passed. Node *PhaseIdealLoop::insert_post_loop(IdealLoopTree* loop, Node_List& old_new, @@ -1758,6 +1782,26 @@ Node *PhaseIdealLoop::insert_post_loop(IdealLoopTree* loop, Node_List& old_new, cur_phi->set_req(LoopNode::EntryControl, fallnew); } } + // Store nodes that were moved to the outer loop by PhaseIdealLoop::try_move_store_after_loop + // do not have an associated Phi node. Such nodes are attached to the false projection of the CountedLoopEnd node, + // right after the execution of the inner CountedLoop. + // We have to make sure that such stores in the post loop have the right memory inputs from the main loop + // The moved store node is always attached right after the inner loop exit, and just before the safepoint + const Node* if_false = main_end->proj_out(false); + for (DUIterator j = if_false->outs(); if_false->has_out(j); j++) { + Node* store = if_false->out(j); + if (store->is_Store()) { + // We only make changes if the memory input of the store is outside the outer loop body, + // as this is when we would normally expect a Phi as input. If the memory input + // is in the loop body as well, then we can safely assume it is still correct as the entire + // body was cloned as a unit + if (!is_member(outer_loop, get_ctrl(store->in(MemNode::Memory)))) { + Node* mem_out = find_last_store_in_outer_loop(store, outer_loop); + Node* store_new = old_new[store->_idx]; + store_new->set_req(MemNode::Memory, mem_out); + } + } + } DEBUG_ONLY(ensure_zero_trip_guard_proj(post_head->in(LoopNode::EntryControl), false);) initialize_assertion_predicates_for_post_loop(main_head, post_head, first_node_index_in_cloned_loop_body); diff --git a/src/hotspot/share/opto/loopnode.hpp b/src/hotspot/share/opto/loopnode.hpp index 8863f37699a4a..df7f935da1440 100644 --- a/src/hotspot/share/opto/loopnode.hpp +++ b/src/hotspot/share/opto/loopnode.hpp @@ -1380,6 +1380,9 @@ class PhaseIdealLoop : public PhaseTransform { // during RCE, unrolling and aligning loops. void insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only ); + // Find the last store in the body of an OuterStripMinedLoop when following memory uses + Node *find_last_store_in_outer_loop(Node* store, const IdealLoopTree* outer_loop); + // Add post loop after the given loop. Node *insert_post_loop(IdealLoopTree* loop, Node_List& old_new, CountedLoopNode* main_head, CountedLoopEndNode* main_end, diff --git a/test/hotspot/jtreg/compiler/loopstripmining/MissingStoreAfterOuterStripMinedLoop.java b/test/hotspot/jtreg/compiler/loopstripmining/MissingStoreAfterOuterStripMinedLoop.java new file mode 100644 index 0000000000000..8fa6cae12e6a0 --- /dev/null +++ b/test/hotspot/jtreg/compiler/loopstripmining/MissingStoreAfterOuterStripMinedLoop.java @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @bug 8364757 + * @summary Moving Store nodes from the main CountedLoop to the OuterStripMinedLoop causes + * subsequent Store nodes to be eventually removed because of missing Phi nodes, + * leading to wrong results. + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation + * -Xcomp -XX:-UseLoopPredicate -XX:-UseAutoVectorizationPredicate + * -XX:CompileCommand=compileonly,compiler.loopstripmining.MissingStoreAfterOuterStripMinedLoop::test* + * compiler.loopstripmining.MissingStoreAfterOuterStripMinedLoop + * @run main compiler.loopstripmining.MissingStoreAfterOuterStripMinedLoop + * + */ + +package compiler.loopstripmining; + +public class MissingStoreAfterOuterStripMinedLoop { + public static int x = 0; + public static int y = 0; + + static class A { + int field; + } + + // The store node in the loop body is moved to the OuterStripLoop. + // When making the post loop the new store node + // should have the moved store node as memory input, and not the + // initial x = 0 store. + // + // store (x = 0) + // | + // store (x += 1, exit of CountedLoop main) + // | <-- additional rewiring due to absence of phi node + // store (x += 1, exit of CountedLoop post) + // | + // store (x = 0) + static public void test1() { + x = 0; + for (int i = 0; i < 20000; i++) { + x += i; + } + x = 0; + } + + // Two independent stores + // They should be wired independently in the post loop, no aliasing + static public void test2() { + x = 0; + y = 0; + for (int i = 0; i < 20000; i++) { + x += i; + y += i; + } + x = 0; + y = 0; + } + + // Chain of stores with potential aliasing. + // The entire chain is moved to the OuterStripLoop, between the + // inner loop exit and the safepoint. + // The chain should be preserved when cloning the main loop body + // to create the post loop. Only the first store of the post loop + // should be rewired to have the last store of the main loop + // as memory input. + // + // ... + // | + // store (a1.field = v, exit of CountedLoop main) + // | + // store (a2.field = v, exit of CountedLoop main) + // | + // store (a3.field = v, exit of CountedLoop main) + // | <-- only additional rewiring needed + // store (a1.field = v, exit of CountedLoop post) + // | + // store (a2.field = v, exit of CountedLoop post) + // | + // store (a3.field = v, exit of CountedLoop post) + static public void test3(A a1, A a2, A a3) { + a1.field = 0; + a2.field = 0; + a3.field = 0; + int v = 0; + for (int i = 0; i < 20000; i++) { + v++; + a1.field = v; + a2.field = v; + a3.field = v; + } + } + + public static void main(String[] strArr) { + A a1 = new A(); + A a2 = new A(); + A a3 = new A(); + + test1(); + if (x != 0) { + throw new RuntimeException("unexpected value: " + x); + } + + test2(); + if (x != 0 || y != 0) { + throw new RuntimeException("unexpected value: " + x + " " + y); + } + + test3(a1, a2, a3); + if (a1.field != 20000 || a2.field != 20000 || a3.field != 20000) { + throw new RuntimeException("unexpected value: " + a1.field + " " + a2.field + " " + a3.field); + } + } +} \ No newline at end of file