diff --git a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp
index c094b66338512..7a00bfae75ed1 100644
--- a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp
+++ b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp
@@ -778,9 +778,11 @@ mlir::LogicalResult hlfir::MatmulOp::verify() {
   }
   if (resultShape.size() != expectedResultShape.size())
     return emitOpError("incorrect result shape");
-  if (resultShape[0] != expectedResultShape[0])
+  if (resultShape[0] != expectedResultShape[0] &&
+      expectedResultShape[0] != unknownExtent)
     return emitOpError("incorrect result shape");
-  if (resultShape.size() == 2 && resultShape[1] != expectedResultShape[1])
+  if (resultShape.size() == 2 && resultShape[1] != expectedResultShape[1] &&
+      expectedResultShape[1] != unknownExtent)
     return emitOpError("incorrect result shape");
 
   return mlir::success();
@@ -852,7 +854,10 @@ mlir::LogicalResult hlfir::TransposeOp::verify() {
   if (rank != 2 || resultRank != 2)
     return emitOpError("input and output arrays should have rank 2");
 
-  if (inShape[0] != resultShape[1] || inShape[1] != resultShape[0])
+  constexpr int64_t unknownExtent = fir::SequenceType::getUnknownExtent();
+  if ((inShape[0] != resultShape[1]) && (inShape[0] != unknownExtent))
+    return emitOpError("output shape does not match input array");
+  if ((inShape[1] != resultShape[0]) && (inShape[1] != unknownExtent))
     return emitOpError("output shape does not match input array");
 
   if (eleTy != resultEleTy)
diff --git a/flang/test/Lower/HLFIR/matmul.f90 b/flang/test/Lower/HLFIR/matmul.f90
index 93cb700820646..6e09c18c20657 100644
--- a/flang/test/Lower/HLFIR/matmul.f90
+++ b/flang/test/Lower/HLFIR/matmul.f90
@@ -17,3 +17,41 @@ subroutine matmul1(lhs, rhs, res)
 ! CHECK-NEXT:    hlfir.destroy %[[EXPR]]
 ! CHECK-NEXT:    return
 ! CHECK-NEXT:   }
+
+! regression test for a case where the AST and FIR have different amounts of
+! shape inference
+subroutine matmul2(c)
+  integer, parameter :: N = 4
+  integer, dimension(:,:), allocatable :: a, b, c
+  integer, dimension(N,N) :: x
+
+  allocate(a(3*N, N), b(N, N), c(3*N, N))
+
+  call fill(a)
+  call fill(b)
+  call fill(x)
+
+  c = matmul(a, b - x)
+endsubroutine
+! CHECK-LABEL: func.func @_QPmatmul2
+! CHECK:           %[[C_ARG:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>
+! CHECK:         %[[B_BOX_ALLOC:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?x?xi32>>> {bindc_name = "b"
+! CHECK:         %[[B_BOX_DECL:.*]]:2 = hlfir.declare %[[B_BOX_ALLOC]] {{.*}} uniq_name = "_QFmatmul2Eb"
+
+
+! CHECK:         fir.call @_QPfill
+! CHECK:         fir.call @_QPfill
+! CHECK:         fir.call @_QPfill
+! CHECK-NEXT:    %[[B_BOX:.*]] = fir.load %[[B_BOX_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>
+! CHECK-NEXT:    %[[C0:.*]] = arith.constant 0 : index
+! CHECK-NEXT:    %[[B_DIMS_0:.*]]:3 = fir.box_dims %[[B_BOX]], %[[C0]]
+! CHECK-NEXT:    %[[C1:.*]] = arith.constant 1 : index
+! CHECK-NEXT:    %[[B_DIMS_1:.*]]:3 = fir.box_dims %[[B_BOX]], %[[C1]]
+! CHECK-NEXT:    %[[B_SHAPE:.*]] = fir.shape %[[B_DIMS_0]]#1, %[[B_DIMS_1]]#1
+! CHECK-NEXT:    %[[ELEMENTAL:.*]] = hlfir.elemental %[[B_SHAPE]] : (!fir.shape<2>) -> !hlfir.expr<?x?xi32> {
+
+! CHECK:         }
+! CHECK-NEXT:    %[[A_BOX:.*]] = fir.load %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>
+
+! The shapes in these types are what is being tested:
+! CHECK-NEXT:    %[[MATMUL:.*]] = hlfir.matmul %[[A_BOX]] %[[ELEMENTAL]] {{.*}} : (!fir.box<!fir.heap<!fir.array<?x?xi32>>>, !hlfir.expr<?x?xi32>) -> !hlfir.expr<?x4xi32>
diff --git a/flang/test/Lower/HLFIR/transpose.f90 b/flang/test/Lower/HLFIR/transpose.f90
index 05a57e072ccbe..56a4c83b4947e 100644
--- a/flang/test/Lower/HLFIR/transpose.f90
+++ b/flang/test/Lower/HLFIR/transpose.f90
@@ -15,3 +15,16 @@ subroutine transpose1(m, res)
 ! CHECK-NEXT:    hlfir.destroy %[[EXPR]]
 ! CHECK-NEXT:    return
 ! CHECK-NEXT:  }
+
+! test the case where lowering has more exact information about the output
+! shape than is available from the argument
+subroutine transpose2(a, out)
+  real, allocatable, dimension(:) :: a
+  real, dimension(:,:) :: out
+  integer, parameter :: N = 3
+  integer, parameter :: M = 4
+
+  allocate(a(N*M))
+  out = transpose(reshape(a, (/N, M/)))
+end subroutine
+! CHECK-LABEL: func.func @_QPtranspose2(