diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h
index d2bdd6e8..d25fd457 100644
--- a/inc/zoo/swar/SWAR.h
+++ b/inc/zoo/swar/SWAR.h
@@ -56,7 +56,9 @@ struct SWAR {
         NSlots = Lanes,
         PaddingBitsCount = BitWidth % NBits,
         SignificantBitsCount = BitWidth - PaddingBitsCount,
-        AllOnes = ~std::make_unsigned_t<T>{0} >> PaddingBitsCount;
+        AllOnes = ~std::make_unsigned_t<T>{0} >> PaddingBitsCount,
+        LeastSignificantBit = meta::BitmaskMaker<T, std::make_unsigned_t<T>{1}, NBits>::value,
+        MostSignificantBit = LeastSignificantBit << (NBits - 1);
 
     SWAR() = default;
     constexpr explicit SWAR(T v): m_v(v) {}
@@ -68,7 +70,7 @@ struct SWAR {
         X(SWAR, ~)
     //constexpr SWAR operator~() const noexcept { return SWAR{~m_v}; }
     #define SWAR_BINARY_OPERATORS_X_LIST \
-        X(SWAR, &) X(SWAR, ^) X(SWAR, |) X(SWAR, -) X(SWAR, +) X(SWAR, *)
+        X(SWAR, &) X(SWAR, ^) X(SWAR, |) X(SWAR, -) X(SWAR, +)
 
     #define X(rt, op) constexpr rt operator op() const noexcept { return rt(op m_v); }
     SWAR_UNARY_OPERATORS_X_LIST
@@ -106,6 +108,17 @@ struct SWAR {
         return SWAR(m_v | (T(1) << (index * NBits + bit)));
     }
 
+    constexpr auto blitElement(int index, T value) const noexcept {
+        auto elementMask = ((T(1) << NBits) - 1) << (index * NBits);
+        return SWAR((m_v & ~elementMask) | (value << (index * NBits)));
+    }
+
+    constexpr SWAR blitElement(int index, SWAR other) const noexcept {
+        constexpr auto OneElementMask = SWAR(~(~T(0) << NBits));
+        auto IsolationMask = OneElementMask.shiftLanesLeft(index);
+        return (*this & ~IsolationMask) | (other & IsolationMask);
+    }
+
     constexpr SWAR shiftLanesLeft(int laneCount) const noexcept {
         return SWAR(value() << (NBits * laneCount));
     }
@@ -114,15 +127,21 @@ struct SWAR {
         return SWAR(value() >> (NBits * laneCount));
     }
 
-    constexpr auto blitElement(int index, T value) const noexcept {
-        auto elementMask = ((T(1) << NBits) - 1) << (index * NBits);
-        return SWAR((m_v & ~elementMask) | (value << (index * NBits)));
+    /// \brief as the name suggests
+    /// \param protectiveMask should clear the bits that would cross the lane.
+    /// The bits that will be cleared are directly related to the count of shifts, it is natural to maintain
+    /// the protective mask by the caller, otherwise, the mask will be computed on all invocations.
+    /// We are not sure the optimizer would maintain this mask somewhere, if it was to recalculate it it would be disastrous for performance.
+    constexpr SWAR
+    shiftIntraLaneLeft(int bitCount, SWAR protectiveMask) const noexcept {
+        return SWAR{(*this & protectiveMask).value() << bitCount};
     }
 
-    constexpr SWAR blitElement(int index, SWAR other) const noexcept {
-        constexpr auto OneElementMask = SWAR(~(~T(0) << NBits));
-        auto IsolationMask = OneElementMask.shiftLanesLeft(index);
-        return (*this & ~IsolationMask) | (other & IsolationMask);
+    /// \param protectiveMask should clear the bits that would cross the lane
+    /// \sa shiftIntraLaneLeft
+    constexpr SWAR
+    shiftIntraLaneRight(int bitCount, SWAR protectiveMask) const noexcept {
+        return SWAR{(*this & protectiveMask).value() >> bitCount};
     }
 
     T m_v;
@@ -299,7 +318,7 @@ constexpr auto broadcast(SWAR<NBits, T> v) {
 /// BooleanSWAR treats the MSB of each SWAR lane as the boolean associated with that lane.
 template<int NBits, typename T>
 struct BooleanSWAR: SWAR<NBits, T> {
-    // Booleanness is stored in MSB of a given swar.
+    // Booleanness is stored in the MSBs
     static constexpr auto MaskLaneMSB =
         broadcast<NBits, T>(SWAR<NBits, T>(T(1) << (NBits -1)));
     constexpr explicit BooleanSWAR(T v): SWAR<NBits, T>(v) {}
diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp
index b4480e54..db826eda 100644
--- a/test/swar/BasicOperations.cpp
+++ b/test/swar/BasicOperations.cpp
@@ -4,9 +4,334 @@
 
 #include <type_traits>
 
+namespace zoo::swar {
+
+/// \note This code should be substituted by an application of "progressive" algebraic iteration
+/// \note There is also parallelPrefix (to be implemented)
+template<int NB, typename B>
+constexpr SWAR<NB, B> parallelSuffix(SWAR<NB, B> input) {
+    using S = SWAR<NB, B>;
+    auto
+        shiftClearingMask = S{~S::MostSignificantBit},
+        doubling = input,
+        result = S{0};
+    auto
+        bitsToXOR = NB,
+        power = 1;
+    for(;;) {
+        if(1 & bitsToXOR) {
+            result = result ^ doubling;
+            doubling = doubling.shiftIntraLaneLeft(power, shiftClearingMask);
+        }
+        bitsToXOR >>= 1;
+        if(!bitsToXOR) { break; }
+        auto shifted = doubling.shiftIntraLaneLeft(power, shiftClearingMask);
+        doubling = doubling ^ shifted;
+        // 01...1
+        // 001...1
+        // 00001...1
+        // 000000001...1
+        shiftClearingMask =
+            shiftClearingMask & S{shiftClearingMask.value() >> power};
+        power <<= 1;
+    }
+    return S{result};
+}
+
+/// \todo because of the desirability of "accumuating" the XORs at the MSB,
+/// the parallel suffix operation is more suitable.
+template<int NB, typename B>
+constexpr SWAR<NB, B> parity(SWAR<NB, B> input) {
+    using S = SWAR<NB, B>;
+    auto preResult = parallelSuffix(input);
+    auto onlyMSB = preResult.value() & S::MostSignificantBit;
+    return S{onlyMSB};
+}
+
+
+/*
+Execution trace at two points:
+1. when checking `if(1 & count)`
+2. when checking `if(!count)`
+If the variable did not change from the last value, it may be ommitted
+input       Count       x       d       power   mask
+1           1           0       x0      1       01111...
+            0           x0
+2           2           0       x0      1       01111...
+            1           0       x0      1       01111...
+            1           0       x01     2       00111...
+            0           x01
+3           3           0       x0      1       01111...
+            1           x0      x1      1       01111...
+            1           x0      x12     2       00111...
+            0           x012    x23
+4           4           0       x0      1       01111...
+            2           0       x0      1       01111...
+            2           0       x01     2       00111...
+            1           0       x01
+            1           0       x0123   4       00001...
+            0           x0123   x01234567
+5           5           0       x0      1       01111...
+            2           x0      x1
+            2           x0      x12     2       00111...
+            1
+            1                   x1234   4       00001...
+            0           x01234
+6           6           0       x0      1       01111...
+            3
+            3                   x01     2       00111...
+            1           x01     x23     
+            1                   x2345   4       00001...
+            0           x012345 x6789
+7           7           0       x0      1       01......
+            3           x0      x1
+            3                   x12     2       001.....
+            1           x012    x34
+            1                   x3456   4       00001...
+            0           x0-6    x789A
+25 = 16 + 8 + 1
+25          25          0       x0      1       01111...
+            12          x0      x1
+            12                  x12     2       00111...
+            6
+            6                   x1234   4       {0}4
+            3
+            3                   x1-8    8       {0}8
+            1           x0-8    x9-16
+            1                   x9-24   16      {0}16
+            0           x0-24   x25-?
+*/
+
+template<int NB, typename B>
+struct ArithmeticResultTriplet {
+    SWAR<NB, B> result;
+    BooleanSWAR<NB, B> carry, overflow;
+};
+
+namespace impl {
+template<int NB, typename B>
+constexpr auto makeLaneMaskFromMSB_and_LSB(SWAR<NB, B> msb, SWAR<NB, B> lsb) {
+    auto msbCopiedDown = msb - lsb;
+    auto msbReintroduced = msbCopiedDown | msb;
+    return msbReintroduced;
+}
+}
+
+template<int NB, typename B>
+constexpr auto makeLaneMaskFromLSB(SWAR<NB, B> input) {
+    using S = SWAR<NB, B>;
+    auto lsb = input & S{S::LeastSignificantBit};
+    auto lsbCopiedToMSB = S{lsb.value() << (NB - 1)};
+    return impl::makeLaneMaskFromMSB_and_LSB(lsbCopiedToMSB, lsb);
+}
+
+template<int NB, typename B>
+constexpr auto makeLaneMaskFromMSB(SWAR<NB, B> input) {
+    using S = SWAR<NB, B>;
+    auto msb = input & S{S::MostSignificantBit};
+    auto msbCopiedToLSB = S{msb.value() >> (NB - 1)};
+    return impl::makeLaneMaskFromMSB_and_LSB(msb, msbCopiedToLSB);
+}
+
+template<int NB, typename B>
+constexpr auto fullAddition(SWAR<NB, B> s1, SWAR<NB, B> s2) {
+    using S = SWAR<NB, B>;
+    constexpr auto
+        SignBit = S{S::MostSignificantBit},
+        LowerBits = SignBit - S{S::LeastSignificantBit};
+    // prevent overflow by clearing the most significant bits
+    auto
+        s1prime = LowerBits & s1,
+        s2prime = LowerBits & s2,
+        resultPrime = s1prime + s2prime,
+        s1Sign = SignBit & s1,
+        s2Sign = SignBit & s2,
+        signPrime = SignBit & resultPrime,
+        result = resultPrime ^ s1Sign ^ s2Sign,
+        // carry is set whenever at least two of the sign bits of s1, s2,
+        // signPrime are set
+        carry = (s1Sign & s2Sign) | (s1Sign & signPrime) | (s2Sign & signPrime),
+        // overflow: the inputs have the same sign and different to result
+        // same sign: s1Sign ^ s2Sign
+        overflow = (s1Sign ^ s2Sign ^ SignBit) & (s1Sign ^ result);
+    return ArithmeticResultTriplet<NB, B>(result, carry, overflow);
+}
+
+/// \brief Performs a generalized iterated application of an associative operator to a base
+///
+/// In algebra, the repeated application of an operator to a "base" has different names depending on the
+/// operator, for example "a + a + a + ... + a" n-times would be called "repeated addition",
+/// if * is numeric multiplication, "a * a * a * ... * a" n-times would be called "exponentiation of a to the n power"
+/// the generic term we use is "iteration" for naming this function.
+/// Since * and "product" are frequently used in Algebra to denote the application of a general operator, we
+/// keep the option to use the imprecise language of "product, base and exponent".  "Iteration" has a very
+/// different meaning in programming and especially different in C++.
+/// There may be iteration over an operator that is not associative (such as quaternion multiplication), this
+/// function leverages the associative property of the operator to "halve" the count of iterations at each step.
+/// \note There is a symmetrical operation to be implemented of associative iteration in the
+/// "progressive" direction: instead of starting with the most significant bit of the count, down to the lsb,
+/// and doing "op(result, base, count)"; going from lsb to msb doing "op(result, square, exponent)"
+/// \tparam Operator a callable with three arguments: the left and right arguments to the operation
+/// and the count to be used, the "count" is an artifact of this generalization
+/// \tparam IterationCount loosely models the "exponent" in "exponentiation", however, it may not
+/// be a number, the iteration count is part of the execution context to apply the operator
+/// \param forSquaring is an artifact of this generalization
+/// \param log2Count is to potentially reduce the number of iterations if the caller a-priori knows
+/// there are fewer iterations than what the type of exponent would allow
+template<
+    typename Base, typename IterationCount, typename Operator,
+    // the critical use of associativity is that it allows halving the
+    // iteration count
+    typename CountHalver
+>
+constexpr auto associativeOperatorIterated_regressive(
+    Base base, Base neutral, IterationCount count, IterationCount forSquaring,
+    Operator op, unsigned log2Count, CountHalver ch
+) {
+    auto result = neutral;
+    if(!log2Count) { return result; }
+    for(;;) {
+        result = op(result, base, count);
+        if(!--log2Count) { break; }
+        result = op(result, result, forSquaring);
+        count = ch(count);
+    }
+    return result;
+}
+
+template<int ActualBits, int NB, typename T>
+constexpr auto multiplication_OverflowUnsafe_SpecificBitCount(
+    SWAR<NB, T> multiplicand, SWAR<NB, T> multiplier
+) {
+    using S = SWAR<NB, T>;
+
+    auto operation = [](auto left, auto right, auto counts) {
+        auto addendums = makeElementMaskFromMSB(counts);
+        return left + (addendums & right);
+    };
+
+    auto halver = [](auto counts) {
+        auto msbCleared = counts & ~S{S::MostSignificantBit};
+        return S{msbCleared.value() << 1};
+    };
+
+    multiplier = S{multiplier.value() << (NB - ActualBits)};
+    return associativeOperatorIterated_regressive(
+        multiplicand, S{0}, multiplier, S{S::MostSignificantBit}, operation,
+        ActualBits, halver
+    );
+}
+
+/// \note Not removed yet because it is an example of "progressive" associative exponentiation
+template<int ActualBits, int NB, typename T>
+constexpr auto multiplication_OverflowUnsafe_SpecificBitCount_deprecated(
+    SWAR<NB, T> multiplicand,
+    SWAR<NB, T> multiplier
+) {
+    using S = SWAR<NB, T>;
+    constexpr auto LeastBit = S::LeastSignificantBit;
+    auto multiplicandDoubling = multiplicand.value();
+    auto mplier = multiplier.value();
+    auto product = S{0};
+    for(auto count = ActualBits;;) {
+        auto multiplicandDoublingMask = makeLaneMaskFromLSB(S{mplier});
+        product = product + (multiplicandDoublingMask & S{multiplicandDoubling});
+        if(!--count) { break; }
+        multiplicandDoubling <<= 1;
+        auto leastBitCleared = mplier & ~LeastBit;
+        mplier = leastBitCleared >> 1;
+    }
+    return product;
+}
+
+template<int NB, typename T>
+constexpr auto multiplication_OverflowUnsafe(
+    SWAR<NB, T> multiplicand,
+    SWAR<NB, T> multiplier
+) {
+    return
+        multiplication_OverflowUnsafe_SpecificBitCount<NB>(
+            multiplicand, multiplier
+        );
+}
+
+template<int NB, typename T>
+struct SWAR_Pair{
+    SWAR<NB, T> even, odd;
+};
+
+template<int NB, typename T>
+constexpr SWAR<NB, T> doublingMask() {
+    using S = SWAR<NB, T>;
+    static_assert(0 == S::Lanes % 2, "Only even number of elements supported");
+    using D = SWAR<NB * 2, T>;
+    return S{(D::LeastSignificantBit << NB) - D::LeastSignificantBit};
+}
+
+template<int NB, typename T>
+constexpr auto doublePrecision(SWAR<NB, T> input) {
+    using S = SWAR<NB, T>;
+    static_assert(
+        0 == S::NSlots % 2,
+        "Precision can only be doubled for SWARs of even element count"
+    );
+    using RV = SWAR<NB * 2, T>;
+    constexpr auto DM = doublingMask<NB, T>();
+    return SWAR_Pair<NB * 2, T>{
+        RV{(input & DM).value()},
+        RV{(input.value() >> NB) & DM.value()}
+    };
+}
+
+template<int NB, typename T>
+constexpr auto halvePrecision(SWAR<NB, T> even, SWAR<NB, T> odd) {
+    using S = SWAR<NB, T>;
+    static_assert(0 == NB % 2, "Only even lane-bitcounts supported");
+    using RV = SWAR<NB/2, T>;
+    constexpr auto HalvingMask = doublingMask<NB/2, T>();
+    auto
+        evenHalf = RV{even.value()} & HalvingMask,
+        oddHalf = RV{(RV{odd.value()} & HalvingMask).value() << NB/2};
+    return evenHalf | oddHalf;
+}
+
+}
+
 using namespace zoo;
 using namespace zoo::swar;
 
+namespace Multiplication {
+
+static_assert(0x0F0F0F0F == doublingMask<4, uint32_t>().value());
+
+constexpr auto PrecisionFixtureTest = 0x89ABCDEF;
+constexpr auto Doubled =
+    doublePrecision(SWAR<4, uint32_t>{PrecisionFixtureTest});
+
+static_assert(0x090B0D0F == Doubled.even.value());
+static_assert(0x080A0C0E == Doubled.odd.value());
+static_assert(PrecisionFixtureTest == halvePrecision(Doubled.even, Doubled.odd).value());
+
+constexpr SWAR<8, u32> Micand{0x5030201};
+constexpr SWAR<8, u32> Mplier{0xA050301};
+
+// expected:
+// 5*0xA = 5*10 = 50 = 0x32,
+// 3*5 = 15 = 0xF,
+// 3*2 = 6,
+// 1*1 = 1
+constexpr auto Expected = 0x320F0601;
+
+static_assert(
+    Expected == multiplication_OverflowUnsafe(Micand, Mplier).value()
+);
+static_assert(
+    0x320F0601 != // intentionally use a too-small bit count
+    multiplication_OverflowUnsafe_SpecificBitCount<3>(Micand, Mplier).value()
+);
+
+}
+
 #define HE(nbits, t, v0, v1) \
     static_assert(horizontalEquality<nbits, t>(\
         SWAR<nbits, t>(v0),\
@@ -21,6 +346,29 @@ HE(3, u8, 0xFF, 0x7);
 HE(2, u8, 0xAA, 0x2);
 #undef HE
 
+TEST_CASE("Old version", "[deprecated][swar]") {
+    SWAR<8, u32> Micand{0x5030201};
+    SWAR<8, u32> Mplier{0xA050301};
+    auto Expected = 0x320F0601;
+    auto result =
+        multiplication_OverflowUnsafe_SpecificBitCount_deprecated<4>(
+            Micand, Mplier
+        );
+    CHECK(Expected == result.value());
+}
+
+TEST_CASE("Parity", "[swar]") {
+    // For each nibble, E indicates (E)ven and O (O)dd parities
+    //                EEOEEOOO
+    auto Examples = 0xFF13A7E4;
+    SWAR<4, u32> casesBy4{Examples};
+    SWAR<8, u32> casesBy8{Examples};
+    auto by4 = parity(casesBy4);
+    auto by8 = parity(casesBy8);
+    CHECK(by4.value() == 0x00800888);
+    CHECK(by8.value() == 0x00808000);
+}
+
 TEST_CASE(
     "Isolate",
     "[swar]"