diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt
index f9bd233cf8ecf..351ba623e2b6d 100644
--- a/llvm/lib/Target/X86/CMakeLists.txt
+++ b/llvm/lib/Target/X86/CMakeLists.txt
@@ -47,6 +47,7 @@ set(sources
   X86FixupVectorConstants.cpp
   X86AvoidStoreForwardingBlocks.cpp
   X86DynAllocaExpander.cpp
+  X86EliminateRedundantZeroExtend.cpp
   X86FixupSetCC.cpp
   X86FlagsCopyLowering.cpp
   X86FloatingPoint.cpp
diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h
index 6261fadf10a7a..cd59eb5c80149 100644
--- a/llvm/lib/Target/X86/X86.h
+++ b/llvm/lib/Target/X86/X86.h
@@ -127,6 +127,10 @@ FunctionPass *createX86CmovConverterPass();
 /// the upper portions of registers, and to save code size.
 FunctionPass *createX86FixupBWInsts();
 
+/// Return a Machine IR pass that eliminates redundant zero-extension
+/// instructions where the upper bits are already known to be zero.
+FunctionPass *createX86EliminateRedundantZeroExtend();
+
 /// Return a Machine IR pass that reassigns instruction chains from one domain
 /// to another, when profitable.
 FunctionPass *createX86DomainReassignmentPass();
diff --git a/llvm/lib/Target/X86/X86EliminateRedundantZeroExtend.cpp b/llvm/lib/Target/X86/X86EliminateRedundantZeroExtend.cpp
new file mode 100644
index 0000000000000..72717b1c64794
--- /dev/null
+++ b/llvm/lib/Target/X86/X86EliminateRedundantZeroExtend.cpp
@@ -0,0 +1,292 @@
+//===-- X86EliminateRedundantZeroExtend.cpp - Eliminate Redundant ZExt ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This pass eliminates redundant zero-extension instructions where the source
+/// register is a sub-register of the destination and the destination's upper
+/// bits are known to be zero.
+///
+/// For example:
+///   movzbl (%rdi), %ecx  ; ECX = zero-extend byte, upper 24 bits are zero
+///   ...
+///   movzbl %cl, %ecx     ; Redundant! CL is part of ECX, upper bits already 0
+///
+/// This pattern commonly occurs in loops processing byte values.
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "x86-eliminate-zext"
+#define PASS_NAME "X86 Eliminate Redundant Zero Extension"
+
+namespace {
+class EliminateRedundantZeroExtend : public MachineFunctionPass {
+public:
+  static char ID;
+  EliminateRedundantZeroExtend() : MachineFunctionPass(ID) {}
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  StringRef getPassName() const override { return PASS_NAME; }
+
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().setNoVRegs();
+  }
+
+private:
+  const X86InstrInfo *TII = nullptr;
+  const TargetRegisterInfo *TRI = nullptr;
+
+  /// Check if the register's upper bits are known to be zero at this point.
+  /// This checks backward from MI to find the most recent definition of Reg.
+  bool hasZeroUpperBits(Register Reg, const MachineInstr &MI,
+                        const MachineBasicBlock &MBB) const;
+
+  /// Try to eliminate a redundant MOVZX instruction.
+  bool tryEliminateRedundantZeroExtend(MachineInstr &MI,
+                                       MachineBasicBlock &MBB) const;
+};
+
+char EliminateRedundantZeroExtend::ID = 0;
+} // end anonymous namespace
+
+FunctionPass *llvm::createX86EliminateRedundantZeroExtend() {
+  return new EliminateRedundantZeroExtend();
+}
+
+bool EliminateRedundantZeroExtend::hasZeroUpperBits(
+    Register Reg, const MachineInstr &MI, const MachineBasicBlock &MBB) const {
+  // Walk backward from MI to find the most recent definition of Reg
+  MachineBasicBlock::const_reverse_iterator I = ++MI.getReverseIterator();
+  MachineBasicBlock::const_reverse_iterator E = MBB.rend();
+  for (; I != E; ++I) {
+    const MachineInstr &Inst = *I;
+
+    // Check if this instruction defines Reg
+    for (const MachineOperand &MO : Inst.operands()) {
+      if (!MO.isReg() || !MO.isDef())
+        continue;
+
+      Register DefReg = MO.getReg();
+      if (DefReg == Reg || TRI->isSuperRegister(Reg, DefReg)) {
+        // Found a definition - check if it zeros upper bits
+        unsigned Opc = Inst.getOpcode();
+        switch (Opc) {
+        // These instructions zero-extend to 32 bits
+        case X86::MOVZX32rm8:
+        case X86::MOVZX32rr8:
+        case X86::MOVZX32rm16:
+        case X86::MOVZX32rr16:
+          return true;
+        // XOR with self zeros the register
+        case X86::XOR32rr:
+          if (Inst.getOperand(1).getReg() == Inst.getOperand(2).getReg())
+            return true;
+          return false;
+        // MOV32r0 explicitly zeros
+        case X86::MOV32r0:
+          return true;
+        // ADD, SUB on 32-bit register (implicitly zero-extends to 64-bit)
+        case X86::ADD32rr:
+        case X86::ADD32ri:
+        case X86::ADD32rm:
+        case X86::SUB32rr:
+        case X86::SUB32ri:
+        case X86::SUB32rm:
+        case X86::LEA32r:
+          return true;
+        default:
+          // Any other definition might set upper bits, so not safe
+          return false;
+        }
+      }
+
+      // Check if this instruction modifies Reg (partial write or implicit use)
+      if (TRI->regsOverlap(DefReg, Reg)) {
+        // Partial register update - upper bits are unknown
+        return false;
+      }
+    }
+
+    // Check for implicit defs
+    for (const MachineOperand &MO : Inst.implicit_operands()) {
+      if (MO.isReg() && MO.isDef() && TRI->regsOverlap(MO.getReg(), Reg)) {
+        return false;
+      }
+    }
+  }
+
+  // Didn't find a definition in this block - check predecessors
+  // If all predecessors define Reg with zero upper bits, it's safe
+  if (MBB.pred_empty())
+    return false;
+
+  // Check all predecessor blocks
+  for (const MachineBasicBlock *Pred : MBB.predecessors()) {
+    bool FoundZeroExtend = false;
+
+    // SAFETY CHECK: If the sub-register is live-in to the predecessor,
+    // we make the CONSERVATIVE assumption that the parent register was
+    // zero-extended in an earlier block.
+    //
+    // This is safe because:
+    // 1. After register allocation, if $cl is live-in but $ecx is not,
+    //    it means only the low 8 bits are meaningful
+    // 2. The register allocator ensures no other code modifies $ecx between
+    //    the zero-extension and this point (otherwise $ecx would be live)
+    // 3. Any write to $ch or upper bits would show as a def of $ecx, which
+    //    would be found in our backward scan below and handled correctly
+    //
+    // However, this is still conservative - we should verify the actual
+    // definition to be completely safe.
+    Register SubReg8 = TRI->getSubReg(Reg, X86::sub_8bit);
+    Register SubReg16 = TRI->getSubReg(Reg, X86::sub_16bit);
+    bool SubRegLiveIn = (SubReg8 && Pred->isLiveIn(SubReg8)) ||
+                        (SubReg16 && Pred->isLiveIn(SubReg16));
+
+    if (SubRegLiveIn) {
+      // Sub-register is live-in. We'll verify this is safe by checking
+      // that no instructions in this block modify the parent register
+      // before we reach the end (where control flows to our block).
+      // If we find any such modification, we'll conservatively bail out.
+      bool SafeToAssume = true;
+      for (const MachineInstr &Inst : *Pred) {
+        for (const MachineOperand &MO : Inst.operands()) {
+          if (MO.isReg() && MO.isDef()) {
+            Register DefReg = MO.getReg();
+            // Check if this modifies Reg or overlaps with it (partial write)
+            if ((DefReg == Reg || TRI->regsOverlap(DefReg, Reg)) &&
+                DefReg != SubReg8 && DefReg != SubReg16) {
+              // Found a write to the parent register or overlapping register
+              // that's not just the sub-register we expect
+              SafeToAssume = false;
+              break;
+            }
+          }
+        }
+        if (!SafeToAssume)
+          break;
+      }
+
+      if (SafeToAssume) {
+        FoundZeroExtend = true;
+        goto next_predecessor;
+      }
+    }
+
+    // Walk backward through predecessor to find last definition of Reg
+    for (const MachineInstr &Inst : llvm::reverse(*Pred)) {
+      // Check if this instruction defines Reg
+      for (const MachineOperand &MO : Inst.operands()) {
+        if (!MO.isReg() || !MO.isDef())
+          continue;
+
+        Register DefReg = MO.getReg();
+        if (DefReg == Reg || TRI->isSuperRegister(Reg, DefReg)) {
+          // Found a definition - check if it zeros upper bits
+          unsigned Opc = Inst.getOpcode();
+          switch (Opc) {
+          case X86::MOVZX32rm8:
+          case X86::MOVZX32rr8:
+          case X86::MOVZX32rm16:
+          case X86::MOVZX32rr16:
+          case X86::MOV32r0:
+          case X86::ADD32rr:
+          case X86::ADD32ri:
+          case X86::ADD32rm:
+          case X86::SUB32rr:
+          case X86::SUB32ri:
+          case X86::SUB32rm:
+          case X86::LEA32r:
+            FoundZeroExtend = true;
+            break;
+          case X86::XOR32rr:
+            if (Inst.getOperand(1).getReg() == Inst.getOperand(2).getReg())
+              FoundZeroExtend = true;
+            break;
+          default:
+            // Found a definition that doesn't zero upper bits
+            return false;
+          }
+          // Found the definition in this predecessor
+          goto next_predecessor;
+        }
+
+        // Check for partial register updates
+        if (TRI->regsOverlap(DefReg, Reg)) {
+          return false;
+        }
+      }
+    }
+
+  next_predecessor:
+    // If we didn't find a zero-extending definition in this predecessor, fail
+    if (!FoundZeroExtend)
+      return false;
+  }
+
+  // All predecessors have zero-extending definitions
+  return true;
+}
+
+bool EliminateRedundantZeroExtend::tryEliminateRedundantZeroExtend(
+    MachineInstr &MI, MachineBasicBlock &MBB) const {
+  unsigned Opc = MI.getOpcode();
+
+  // Only handle MOVZX32rr8 for now (can extend to MOVZX32rr16 later)
+  if (Opc != X86::MOVZX32rr8)
+    return false;
+
+  Register DstReg = MI.getOperand(0).getReg();
+  Register SrcReg = MI.getOperand(1).getReg();
+
+  // Check if source is a sub-register of destination
+  // e.g., CL is sub-register of ECX
+  if (!TRI->isSubRegister(DstReg, SrcReg))
+    return false;
+
+  // Check if destination's upper bits are already zero
+  if (!hasZeroUpperBits(DstReg, MI, MBB))
+    return false;
+
+  // The MOVZX is redundant! Since SrcReg is part of DstReg and DstReg's
+  // upper bits are already zero, this instruction does nothing.
+  LLVM_DEBUG(dbgs() << "Eliminating redundant zero-extend: " << MI);
+  MI.eraseFromParent();
+  return true;
+}
+
+bool EliminateRedundantZeroExtend::runOnMachineFunction(MachineFunction &MF) {
+  TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
+  TRI = MF.getSubtarget<X86Subtarget>().getRegisterInfo();
+
+  bool Changed = false;
+
+  for (MachineBasicBlock &MBB : MF) {
+    // Iterate through instructions - use a worklist to handle erasures
+    SmallVector<MachineInstr *, 4> ToErase;
+
+    for (MachineInstr &MI : MBB) {
+      if (tryEliminateRedundantZeroExtend(MI, MBB)) {
+        Changed = true;
+        // Note: MI is already erased in tryEliminateRedundantZeroExtend
+        break; // Restart iteration for this block
+      }
+    }
+  }
+
+  return Changed;
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 8dd6f3d97ccea..72835150e8277 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -558,6 +558,7 @@ void X86PassConfig::addPreEmitPass() {
 
   if (getOptLevel() != CodeGenOptLevel::None) {
     addPass(createX86FixupBWInsts());
+    addPass(createX86EliminateRedundantZeroExtend());
     addPass(createX86PadShortFunctions());
     addPass(createX86FixupLEAs());
     addPass(createX86FixupInstTuning());
diff --git a/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll b/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll
index b4d40fee01e41..a283a002d9818 100644
--- a/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll
+++ b/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll
@@ -177,7 +177,6 @@ define zeroext i8 @atomic_shl1_xor_8_gpr_valz(ptr %v, i8 zeroext %c) nounwind {
 ; X86-NEXT:    lock cmpxchgb %cl, (%esi)
 ; X86-NEXT:    jne .LBB3_1
 ; X86-NEXT:  # %bb.2: # %atomicrmw.end
-; X86-NEXT:    movzbl %al, %eax
 ; X86-NEXT:    testl %eax, %edx
 ; X86-NEXT:    sete %al
 ; X86-NEXT:    popl %esi
@@ -198,7 +197,6 @@ define zeroext i8 @atomic_shl1_xor_8_gpr_valz(ptr %v, i8 zeroext %c) nounwind {
 ; X64-NEXT:    lock cmpxchgb %cl, (%rdi)
 ; X64-NEXT:    jne .LBB3_1
 ; X64-NEXT:  # %bb.2: # %atomicrmw.end
-; X64-NEXT:    movzbl %al, %eax
 ; X64-NEXT:    testl %eax, %edx
 ; X64-NEXT:    sete %al
 ; X64-NEXT:    retq
@@ -233,7 +231,6 @@ define zeroext i8 @atomic_shl1_mask0_xor_8_gpr_valz(ptr %v, i8 zeroext %c) nounw
 ; X86-NEXT:    lock cmpxchgb %cl, (%esi)
 ; X86-NEXT:    jne .LBB4_1
 ; X86-NEXT:  # %bb.2: # %atomicrmw.end
-; X86-NEXT:    movzbl %al, %eax
 ; X86-NEXT:    movzbl %dl, %ecx
 ; X86-NEXT:    btl %ecx, %eax
 ; X86-NEXT:    setae %al
@@ -255,7 +252,6 @@ define zeroext i8 @atomic_shl1_mask0_xor_8_gpr_valz(ptr %v, i8 zeroext %c) nounw
 ; X64-NEXT:    lock cmpxchgb %cl, (%rdi)
 ; X64-NEXT:    jne .LBB4_1
 ; X64-NEXT:  # %bb.2: # %atomicrmw.end
-; X64-NEXT:    movzbl %al, %eax
 ; X64-NEXT:    movzbl %sil, %ecx
 ; X64-NEXT:    btl %ecx, %eax
 ; X64-NEXT:    setae %al
@@ -291,7 +287,6 @@ define zeroext i8 @atomic_shl1_mask01_xor_8_gpr_valz(ptr %v, i8 zeroext %c) noun
 ; X86-NEXT:    lock cmpxchgb %cl, (%edx)
 ; X86-NEXT:    jne .LBB5_1
 ; X86-NEXT:  # %bb.2: # %atomicrmw.end
-; X86-NEXT:    movzbl %al, %eax
 ; X86-NEXT:    testl %eax, %ebx
 ; X86-NEXT:    sete %al
 ; X86-NEXT:    popl %ebx
@@ -313,7 +308,6 @@ define zeroext i8 @atomic_shl1_mask01_xor_8_gpr_valz(ptr %v, i8 zeroext %c) noun
 ; X64-NEXT:    lock cmpxchgb %cl, (%rdi)
 ; X64-NEXT:    jne .LBB5_1
 ; X64-NEXT:  # %bb.2: # %atomicrmw.end
-; X64-NEXT:    movzbl %al, %eax
 ; X64-NEXT:    testl %eax, %edx
 ; X64-NEXT:    sete %al
 ; X64-NEXT:    retq
@@ -349,7 +343,6 @@ define zeroext i8 @atomic_shl1_and_8_gpr_brnz(ptr %v, i8 zeroext %c) nounwind {
 ; X86-NEXT:    lock cmpxchgb %ch, (%edx)
 ; X86-NEXT:    jne .LBB6_1
 ; X86-NEXT:  # %bb.2: # %atomicrmw.end
-; X86-NEXT:    movzbl %al, %eax
 ; X86-NEXT:    testl %eax, %ebx
 ; X86-NEXT:    je .LBB6_3
 ; X86-NEXT:  # %bb.4: # %if.then
@@ -378,7 +371,6 @@ define zeroext i8 @atomic_shl1_and_8_gpr_brnz(ptr %v, i8 zeroext %c) nounwind {
 ; X64-NEXT:    lock cmpxchgb %r8b, (%rdi)
 ; X64-NEXT:    jne .LBB6_1
 ; X64-NEXT:  # %bb.2: # %atomicrmw.end
-; X64-NEXT:    movzbl %al, %eax
 ; X64-NEXT:    testl %eax, %edx
 ; X64-NEXT:    je .LBB6_3
 ; X64-NEXT:  # %bb.4: # %if.then
@@ -512,7 +504,6 @@ define zeroext i8 @atomic_shl1_mask01_and_8_gpr_brnz(ptr %v, i8 zeroext %c) noun
 ; X86-NEXT:    testl %ecx, %ebx
 ; X86-NEXT:    je .LBB8_3
 ; X86-NEXT:  # %bb.4: # %if.then
-; X86-NEXT:    movzbl %ah, %eax
 ; X86-NEXT:    movzbl (%edx,%eax), %eax
 ; X86-NEXT:    popl %ebx
 ; X86-NEXT:    retl
@@ -538,7 +529,6 @@ define zeroext i8 @atomic_shl1_mask01_and_8_gpr_brnz(ptr %v, i8 zeroext %c) noun
 ; X64-NEXT:    lock cmpxchgb %r8b, (%rdi)
 ; X64-NEXT:    jne .LBB8_1
 ; X64-NEXT:  # %bb.2: # %atomicrmw.end
-; X64-NEXT:    movzbl %al, %eax
 ; X64-NEXT:    testl %eax, %edx
 ; X64-NEXT:    je .LBB8_3
 ; X64-NEXT:  # %bb.4: # %if.then
diff --git a/llvm/test/CodeGen/X86/ctlz.ll b/llvm/test/CodeGen/X86/ctlz.ll
index 1267fe9033454..a3d28a7fcba24 100644
--- a/llvm/test/CodeGen/X86/ctlz.ll
+++ b/llvm/test/CodeGen/X86/ctlz.ll
@@ -224,7 +224,6 @@ define i8 @ctlz_i8_zero_test(i8 %n) {
 ; X86-NOCMOV-NEXT:    testb %al, %al
 ; X86-NOCMOV-NEXT:    je .LBB4_1
 ; X86-NOCMOV-NEXT:  # %bb.2: # %cond.false
-; X86-NOCMOV-NEXT:    movzbl %al, %eax
 ; X86-NOCMOV-NEXT:    bsrl %eax, %eax
 ; X86-NOCMOV-NEXT:    xorl $7, %eax
 ; X86-NOCMOV-NEXT:    # kill: def $al killed $al killed $eax
@@ -961,7 +960,6 @@ define i8 @ctlz_xor7_i8_false(i8 %x) {
 ; X86-NOCMOV-NEXT:    testb %al, %al
 ; X86-NOCMOV-NEXT:    je .LBB16_1
 ; X86-NOCMOV-NEXT:  # %bb.2: # %cond.false
-; X86-NOCMOV-NEXT:    movzbl %al, %eax
 ; X86-NOCMOV-NEXT:    bsrl %eax, %eax
 ; X86-NOCMOV-NEXT:    xorl $7, %eax
 ; X86-NOCMOV-NEXT:    xorb $7, %al
diff --git a/llvm/test/CodeGen/X86/eliminate-redundant-zext.ll b/llvm/test/CodeGen/X86/eliminate-redundant-zext.ll
new file mode 100644
index 0000000000000..294a6e7f780e3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/eliminate-redundant-zext.ll
@@ -0,0 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -O2 | FileCheck %s
+
+; Test that redundant MOVZX instructions are eliminated when the source
+; register is a sub-register of the destination and the destination's upper
+; bits are already known to be zero.
+
+; This is the original countholes test case from GitHub issue #160710 that demonstrates
+; the redundant movzbl %cl, %ecx in the loop
+define i32 @countholes(ptr %s) {
+; CHECK-LABEL: countholes:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movzbl (%rdi), %ecx
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpb $48, %cl
+; CHECK-NEXT:    jb .LBB0_3
+; CHECK-NEXT:  # %bb.1: # %while.body.preheader
+; CHECK-NEXT:    incq %rdi
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  .LBB0_2: # %while.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    addl pre_table-192(,%rcx,4), %eax
+; CHECK-NEXT:    movzbl (%rdi), %ecx
+; CHECK-NEXT:    incq %rdi
+; CHECK-NEXT:    cmpb $47, %cl
+; CHECK-NEXT:    ja .LBB0_2
+; CHECK-NEXT:  .LBB0_3: # %cleanup
+; CHECK-NEXT:    retq
+entry:
+  %c.0 = load i8, ptr %s, align 1
+  %conv = zext i8 %c.0 to i32
+  %cmp = icmp ult i8 %c.0, 48
+  br i1 %cmp, label %cleanup, label %while.body.preheader
+
+while.body.preheader:
+  br label %while.body
+
+while.body:
+  %s.addr.011 = phi ptr [ %incdec.ptr, %while.body ], [ %s, %while.body.preheader ]
+  %c.010 = phi i8 [ %c.1, %while.body ], [ %c.0, %while.body.preheader ]
+  %tot.09 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
+  %conv3 = zext i8 %c.010 to i64
+  %sub = add nsw i64 %conv3, -48
+  %arrayidx = getelementptr inbounds [10 x i32], ptr @pre_table, i64 0, i64 %sub
+  %0 = load i32, ptr %arrayidx, align 4
+  %add = add i32 %0, %tot.09
+  %incdec.ptr = getelementptr inbounds i8, ptr %s.addr.011, i64 1
+  %c.1 = load i8, ptr %incdec.ptr, align 1
+  %cmp1 = icmp ult i8 %c.1, 48
+  br i1 %cmp1, label %cleanup.loopexit, label %while.body
+
+cleanup.loopexit:
+  br label %cleanup
+
+cleanup:
+  %retval.0 = phi i32 [ 0, %entry ], [ %add, %cleanup.loopexit ]
+  ret i32 %retval.0
+}
+
+@pre_table = internal constant [10 x i32] [i32 1, i32 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 2, i32 1], align 4
diff --git a/llvm/test/CodeGen/X86/isel-select-cmov.ll b/llvm/test/CodeGen/X86/isel-select-cmov.ll
index d013ad2c7fbff..783db3487e2bd 100644
--- a/llvm/test/CodeGen/X86/isel-select-cmov.ll
+++ b/llvm/test/CodeGen/X86/isel-select-cmov.ll
@@ -73,11 +73,9 @@ define zeroext i8 @select_cmov_i8(i1 zeroext %cond, i8 zeroext %a, i8 zeroext %b
 ; FAST-X86-NEXT:    jne LBB0_1
 ; FAST-X86-NEXT:  ## %bb.2:
 ; FAST-X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; FAST-X86-NEXT:    movzbl %al, %eax
 ; FAST-X86-NEXT:    retl
 ; FAST-X86-NEXT:  LBB0_1:
 ; FAST-X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; FAST-X86-NEXT:    movzbl %al, %eax
 ; FAST-X86-NEXT:    retl
 ;
 ; FAST-X86-CMOV-LABEL: select_cmov_i8:
@@ -86,11 +84,9 @@ define zeroext i8 @select_cmov_i8(i1 zeroext %cond, i8 zeroext %a, i8 zeroext %b
 ; FAST-X86-CMOV-NEXT:    jne LBB0_1
 ; FAST-X86-CMOV-NEXT:  ## %bb.2:
 ; FAST-X86-CMOV-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; FAST-X86-CMOV-NEXT:    movzbl %al, %eax
 ; FAST-X86-CMOV-NEXT:    retl
 ; FAST-X86-CMOV-NEXT:  LBB0_1:
 ; FAST-X86-CMOV-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; FAST-X86-CMOV-NEXT:    movzbl %al, %eax
 ; FAST-X86-CMOV-NEXT:    retl
 ;
 ; GISEL-X86-LABEL: select_cmov_i8:
diff --git a/llvm/test/CodeGen/X86/isel-udiv.ll b/llvm/test/CodeGen/X86/isel-udiv.ll
index b123b3c7780fa..f96a12c2fafd0 100644
--- a/llvm/test/CodeGen/X86/isel-udiv.ll
+++ b/llvm/test/CodeGen/X86/isel-udiv.ll
@@ -22,7 +22,6 @@ define i8 @test_udiv_i8(i8 %arg1, i8 %arg2) nounwind {
 ; GISEL-X86-LABEL: test_udiv_i8:
 ; GISEL-X86:       # %bb.0:
 ; GISEL-X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; GISEL-X86-NEXT:    movzbl %al, %eax
 ; GISEL-X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; GISEL-X86-NEXT:    divb %cl
 ; GISEL-X86-NEXT:    retl
diff --git a/llvm/test/CodeGen/X86/isel-urem.ll b/llvm/test/CodeGen/X86/isel-urem.ll
index 386f08151ad9c..5dd901fe8daa6 100644
--- a/llvm/test/CodeGen/X86/isel-urem.ll
+++ b/llvm/test/CodeGen/X86/isel-urem.ll
@@ -49,7 +49,6 @@ define i8 @test_urem_i8(i8 %arg1, i8 %arg2) nounwind {
 ; GISEL-X86-LABEL: test_urem_i8:
 ; GISEL-X86:       # %bb.0:
 ; GISEL-X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; GISEL-X86-NEXT:    movzbl %al, %eax
 ; GISEL-X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; GISEL-X86-NEXT:    divb %cl
 ; GISEL-X86-NEXT:    movb %ah, %al
diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll
index 81390e59d0d0a..01385fb63d6e1 100644
--- a/llvm/test/CodeGen/X86/opt-pipeline.ll
+++ b/llvm/test/CodeGen/X86/opt-pipeline.ll
@@ -202,6 +202,7 @@
 ; CHECK-NEXT:       X86 vzeroupper inserter
 ; CHECK-NEXT:       Lazy Machine Block Frequency Analysis
 ; CHECK-NEXT:       X86 Byte/Word Instruction Fixup
+; CHECK-NEXT:       X86 Eliminate Redundant Zero Extension
 ; CHECK-NEXT:       Lazy Machine Block Frequency Analysis
 ; CHECK-NEXT:       X86 Atom pad short functions
 ; CHECK-NEXT:       X86 LEA Fixup
diff --git a/llvm/test/CodeGen/X86/popcnt.ll b/llvm/test/CodeGen/X86/popcnt.ll
index 3004b8b72fcc5..cd5edffd8ccda 100644
--- a/llvm/test/CodeGen/X86/popcnt.ll
+++ b/llvm/test/CodeGen/X86/popcnt.ll
@@ -67,7 +67,6 @@ define i16 @cnt16(i16 %x) nounwind readnone {
 ; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    shrl $8, %eax
 ; X86-NEXT:    addl %ecx, %eax
-; X86-NEXT:    movzbl %al, %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
@@ -1840,7 +1839,6 @@ define i32 @popcount_i16_zext(i16 zeroext %x) {
 ; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    shrl $8, %eax
 ; X86-NEXT:    addl %ecx, %eax
-; X86-NEXT:    movzbl %al, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-BASE-LABEL: popcount_i16_zext:
diff --git a/llvm/test/CodeGen/X86/pr38539.ll b/llvm/test/CodeGen/X86/pr38539.ll
index 412455384e937..147abcdbff0b9 100644
--- a/llvm/test/CodeGen/X86/pr38539.ll
+++ b/llvm/test/CodeGen/X86/pr38539.ll
@@ -28,7 +28,6 @@ define void @f() nounwind {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movzbl (%eax), %eax
 ; X86-NEXT:    movzbl (%eax), %ecx
-; X86-NEXT:    movzbl %al, %eax
 ; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
 ; X86-NEXT:    divb %cl
 ; X86-NEXT:    movl %edi, %eax
diff --git a/llvm/test/CodeGen/X86/sttni.ll b/llvm/test/CodeGen/X86/sttni.ll
index 39cbee54737c3..b0c92831124bf 100644
--- a/llvm/test/CodeGen/X86/sttni.ll
+++ b/llvm/test/CodeGen/X86/sttni.ll
@@ -89,7 +89,6 @@ define i32 @pcmpestri_reg_diff_i8(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs,
 ; X64-NEXT:    jne .LBB2_2
 ; X64-NEXT:  # %bb.1:
 ; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    movzbl %al, %eax
 ; X64-NEXT:    retq
 ; X64-NEXT:  .LBB2_2: # %compare
 ; X64-NEXT:    movdqa %xmm0, -{{[0-9]+}}(%rsp)
@@ -222,7 +221,6 @@ define i32 @pcmpestri_mem_diff_i8(ptr %lhs_ptr, i32 %lhs_len, ptr %rhs_ptr, i32
 ; X64-NEXT:    jne .LBB5_2
 ; X64-NEXT:  # %bb.1:
 ; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    movzbl %al, %eax
 ; X64-NEXT:    retq
 ; X64-NEXT:  .LBB5_2: # %compare
 ; X64-NEXT:    movdqa %xmm1, -{{[0-9]+}}(%rsp)
@@ -552,7 +550,6 @@ define i32 @pcmpistri_reg_diff_i8(<16 x i8> %lhs, <16 x i8> %rhs) nounwind {
 ; X86-NEXT:    jne .LBB14_2
 ; X86-NEXT:  # %bb.1:
 ; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    movzbl %al, %eax
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB14_2: # %compare
 ; X86-NEXT:    pushl %ebp
@@ -577,7 +574,6 @@ define i32 @pcmpistri_reg_diff_i8(<16 x i8> %lhs, <16 x i8> %rhs) nounwind {
 ; X64-NEXT:    jne .LBB14_2
 ; X64-NEXT:  # %bb.1:
 ; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    movzbl %al, %eax
 ; X64-NEXT:    retq
 ; X64-NEXT:  .LBB14_2: # %compare
 ; X64-NEXT:    movdqa %xmm0, -{{[0-9]+}}(%rsp)
@@ -690,7 +686,6 @@ define i32 @pcmpistri_mem_diff_i8(ptr %lhs_ptr, ptr %rhs_ptr) nounwind {
 ; X64-NEXT:    jne .LBB17_2
 ; X64-NEXT:  # %bb.1:
 ; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    movzbl %al, %eax
 ; X64-NEXT:    retq
 ; X64-NEXT:  .LBB17_2: # %compare
 ; X64-NEXT:    movdqa %xmm1, -{{[0-9]+}}(%rsp)
diff --git a/llvm/test/CodeGen/X86/vector-compress.ll b/llvm/test/CodeGen/X86/vector-compress.ll
index ac932d51017ae..53e6e49268789 100644
--- a/llvm/test/CodeGen/X86/vector-compress.ll
+++ b/llvm/test/CodeGen/X86/vector-compress.ll
@@ -2012,7 +2012,6 @@ define <64 x i8> @test_compress_v64i8(<64 x i8> %vec, <64 x i1> %mask, <64 x i8>
 ; AVX2-NEXT:    andl $63, %ecx
 ; AVX2-NEXT:    vpextrb $10, %xmm0, (%rsp,%rcx)
 ; AVX2-NEXT:    movzbl 56(%rbp), %ecx
-; AVX2-NEXT:    movzbl %cl, %ecx
 ; AVX2-NEXT:    andl $1, %ecx
 ; AVX2-NEXT:    addq %rax, %rcx
 ; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax def $rax
@@ -3348,7 +3347,6 @@ define <64 x i32> @test_compress_large(<64 x i1> %mask, <64 x i32> %vec, <64 x i
 ; AVX2-NEXT:    addl %r8d, %r9d
 ; AVX2-NEXT:    movzbl 16(%rbp), %ecx
 ; AVX2-NEXT:    vextractps $1, %xmm0, (%rsp,%r9,4)
-; AVX2-NEXT:    movzbl %cl, %ecx
 ; AVX2-NEXT:    andl $1, %ecx
 ; AVX2-NEXT:    addl %r9d, %ecx
 ; AVX2-NEXT:    movzbl 24(%rbp), %edx