# 4.2 Strassen's algorithm for matrix multiplication

## Implementation of Strassen's algorithm

Let us implement Strassen's algorithm. Notice that the pseudocode is available in [Exercise 4.2-2](exercise4.2-2).

In [1]:
#include <stdio.h>
#include <time.h>

// Standard matrix multiplication for comparison
// Only square matrices, just as in Strassen's algorithm

unsigned long long matrix_multiply ( int ** A , int ** B , int ** C , size_t n ) {
    // Standard matrix multiplication
    
    unsigned long long ops = 0; // number of operations
    size_t i,j,k; // indices
    for (i=0;i<n;i++) {
        for (j=0;j<n;j++) {
            for (k=0;k<n;k++) {
                C[i][j] += A[i][k]*B[k][j];
                ops+=2;
            }
        }
    }
    return ops;
}

// Matrix addition is necessary for Strassen's algorithm

unsigned long long matrix_add ( int ** A , size_t iA , size_t jA ,
                 int ** B , size_t iB , size_t jB ,
                 int ** C , size_t iC , size_t jC ,
                 size_t n ) {
    /*
        Matrix addition for square submatrices of order n, with indices
            i*, i*+1, ... i*+(n-1)
        and similarly for j*
    */
    
    // Could be made recursive.
    
    unsigned long long ops = 0; // number of operations
    size_t i,j; // indices
    for (i=0;i<n;i++) {
        for (j=0;j<n;j++) {
            C[iC+i][jC+j] += A[iA+i][jA+j]+B[iB+i][jB+j];
            ops+=2;
        }
    }
    
    return ops;
}

unsigned long long matrix_subtract ( int ** A , size_t iA , size_t jA ,
                      int ** B , size_t iB , size_t jB ,
                      int ** C , size_t iC , size_t jC ,
                      size_t n ) {
    /*
        Matrix subtraction for square submatrices of order n, with indices
            i*, i*+1, ... i*+(n-1)
        and similarly for j*
    */
    
    // Could be made recursive.
    
    unsigned long long ops = 0; // number of operations
    size_t i,j; // indices
    for (i=0;i<n;i++) {
        for (j=0;j<n;j++) {
            C[iC+i][jC+j] += A[iA+i][jA+j]-B[iB+i][jB+j];
            ops+=2;
        }
    }
    
    return ops;
}

// print matrix nicely
void print_matrix (int ** A , size_t m , size_t n ) {
    //prints a matrix A in a nice, python-compatible format
    size_t i,j; // indices
    printf("[\n");
    for (i=0;i<m-1;i++) {
        printf("  [");
        for (j=0;j<n-1;j++) {
            printf(" %6d ," , A[i][j]);
        }
        printf(" %6d],\n" , A[i][j]);
    }
    
    // last row
    printf("  [");
    for (j=0;j<n-1;j++) {
        printf(" %6d ," , A[i][j]);
    }
    printf(" %6d]\n" , A[i][j]);
    
    printf("]");
    
}

unsigned long long strassen ( int ** A , size_t iA , size_t jA ,
               int ** B , size_t iB , size_t jB ,
               int ** C , size_t iC , size_t jC ,
               size_t n ) {
    
    /*
        Strassen algorithm on submatrices of order n, with indexes
        starting at i* and j*
    */
    
    ///////////////////////////
    // Base case
    if (n==1) {
        C[iC][jC] += A[iA][jA]*B[iB][jB];
        return 2;
    }
    
    ///////////////////////////    
    // Divide
    size_t m = n/2; // half of order
    
    ///////////////////////////    
    // Conquer
    
    size_t i,j; // indices
    
    unsigned long long ops = 0; // number of operations
    
    // Initialize auxiliary matrices
    int *** S = malloc( 10 * sizeof(int*) );
    for (i=0;i<10;i++) {
        S[i] = malloc( m * sizeof(int*) );
        for (j=0;j<m;j++) {
            S[i][j] = calloc (m, sizeof(int) ); // initialize row to zero
        }
    }
    
    int *** P = malloc( 7 * sizeof(int*) );
    for (i=0;i<7;i++) {
        P[i] = malloc( m * sizeof(int*) );
        for (j=0;j<m;j++) {
            P[i][j] = calloc (m, sizeof(int) ); // initialize row to zero
        }
    }
    
    // Compute auxiliary matrices
    
    for (i=0;i<m;i++) {
        for (j=0;j<m;j++) {
            // S1 = B12 - B22
            S[0][i][j] = B[iB+i][jB+m+j] - B[iB+m+i][jB+m+j];
            // S2 = A11 + A12
            S[1][i][j] = A[iA+i][jA+j] + A[iA+i][jA+m+j];
            // S3 = A21 + A22
            S[2][i][j] = A[iA+m+i][jA+j] + A[iA+m+i][jA+m+j];
            // S4 = B21 - B11
            S[3][i][j] = B[iB+m+i][jB+j] - B[iB+i][jB+j];
            // S5 = A11 + A22
            S[4][i][j] = A[iA+i][jA+j] + A[iA+m+i][jA+m+j];
            // S6 = B11 + B22
            S[5][i][j] = B[iB+i][jB+j] + B[iB+m+i][jB+m+j];
            ops+=10;
            // S7 = A12 - A22
            S[6][i][j] = A[iA+i][jA+m+j]-A[iA+m+i][jA+m+j];
            // S8 = B21 + B22
            S[7][i][j] = B[iB+m+i][jB+j] + B[iB+m+i][jB+m+j];
            // S9 = A11 - A21
            S[8][i][j] = A[iA+i][jA+j] - A[iA+m+i][jA+j];
            // S10 = B11 + B12
            S[9][i][j] = B[iB+i][jB+j] + B[iB+i][jB+m+j];
        }
    }
    
    //P1 = A11 * S1
    ops += strassen ( A    , iA     , jA     ,
                      S[0] , 0      , 0      ,
                      P[0] , 0      , 0      ,
                      m );
    //P2 = S2*B22
    ops += strassen ( S[1] , 0      , 0      ,
                      B    , iB + m , jB + m ,
                      P[1] , 0      , 0      ,
                      m );
    //P3 = S3*B11
    ops += strassen ( S[2] , 0      , 0      ,
                      B    , iB     , jB     ,
                      P[2] , 0      , 0      ,
                      m );
    //P4 = A22*S4
    ops += strassen ( A    , iA + m , jA + m ,
                      S[3] , 0      , 0      ,
                      P[3] , 0      , 0      ,
                      m );
    //P5 = S5*S6
    ops += strassen ( S[4] , 0      , 0      ,
                      S[5] , 0      , 0      ,
                      P[4] , 0      , 0      ,
                      m );
    //P6 = S7*S8
    ops += strassen ( S[6] , 0      , 0      ,
                      S[7] , 0      , 0      ,
                      P[5] , 0      , 0      ,
                      m );
    //P7 = S9*S10
    ops += strassen ( S[8] , 0      , 0      ,
                      S[9] , 0      , 0      ,
                      P[6] , 0      , 0      ,
                      m );
    
    // Finish by updating product entries
    for (i=0;i<m;i++) {
        for (j=0;j<m;j++) {
            // C11 += P5 + P4 - P2 + P6
            C[iC+i][jC+j] += P[4][i][j] + P[3][i][j] - P[1][i][j] + P[5][i][j];
            // C12 += P1 + P2
            C[iC+i][jC+m+j] += P[0][i][j] + P[1][i][j];
            // C21 += P3 + P4
            C[iC+m+i][jC+j] += P[2][i][j] + P[3][i][j];
            // C22 += P5 + P1 - P3 - P7
            C[iC+m+i][jC+m+j] += P[4][i][j] + P[0][i][j] - P[2][i][j] - P[6][i][j];
            ops+=12;
        }
    }
    
    // Free auxiliary matrices
    for (i=0;i<10;i++) {
        for (j=0;j<m;j++) {
            free(S[i][j]);
        }
        free(S[i]);
    }

    for (i=0;i<7;i++) {
        for (j=0;j<m;j++) {
            free(P[i][j]);
        }
        free(P[i]);
    }
    
    free(S);
    free(P);
    
    return ops;
}

int main() {
    srand(time(NULL)); // random seed
    
    // Order of matrices; 8 is large-ish and readable
    size_t n = 8;
    
    size_t i,j; // indices
    
    // Create random matrices with entries between -99 and 99
    int ** A , ** B , ** C , ** D;
    
    A = malloc(n*sizeof(int*));
    B = malloc(n*sizeof(int*));
    C = malloc(n*sizeof(int*));
    D = malloc(n*sizeof(int*));
    
    for (i=0;i<n;i++) {
        A[i] = malloc(n*sizeof(int));
        B[i] = malloc(n*sizeof(int));
        C[i] = malloc(n*sizeof(int));
        D[i] = malloc(n*sizeof(int));
        
        for (j=0;j<n;j++) {
            // between -99 and 99
            A[i][j] = rand()%199 - 99;
            B[i][j] = rand()%199 - 99;
            C[i][j] = 0;
            D[i][j] = 0;
        }
    }
    
    /*
    A[0][0] = 1;
    A[0][1] = 3;
    A[1][0] = 7;
    A[1][1] = 5;
    B[0][0] = 6;
    B[0][1] = 8;
    B[1][0] = 4;
    B[1][1] = 2;
    */
    
    // Start calculating products and number of operations
    unsigned long long number_of_operations;
    
    for (i=0;i<30;i++) {
        printf("=");
    }
    printf("\n");
    
    printf("Random matrices:\n");
    printf("A =\n");
    print_matrix(A,n,n);
    
    printf("\n");
    
    printf("B =\n");
    print_matrix(B,n,n);
    
    // ==============================
    printf("\n");
    for (i=0;i<30;i++) {
        printf("=");
    }
    printf("\n");

    printf("Matrix product calculated through standard matrix multiplication:\n");
    
    number_of_operations = matrix_multiply(A,B,C,n);
    
    printf("A*B =\n");
    print_matrix(C,n,n);
    
    printf("\n");
    
    printf("Number of operations: %d" , number_of_operations);
    
    // ==============================
    printf("\n");
    for (i=0;i<30;i++) {
        printf("=");
    }
    printf("\n");
    printf("Matrix product calculated through Strassen's algorithm:\n");
    
    number_of_operations = strassen ( A , 0 , 0 ,
                                      B , 0 , 0 ,
                                      D , 0 , 0 ,
                                      n );
    
    printf("A*B =\n");
    print_matrix(D,n,n);
    
    printf("\n");
    
    printf("Number of operations: %d" , number_of_operations);
    
    // ==============================
    printf("\n");
    for (i=0;i<30;i++) {
        printf("=");
    }
    printf("\n");
    
    for (i=0;i<n;i++) {
        for (j=0;j<n;j++) {
            if (C[i][j]!=D[i][j]) {
                printf("Entries (i,j)=(%zu,%zu) were calculated differently!\n",i,j);
                i=n+1;
                j=n+1;
            }
        }
    }
    if (i==n && j==n) {
        printf("The products were calculated the same.");
    }
    
    for (i=0;i<n;i++) {
        free(A[i]);
        free(B[i]);
        free(C[i]);
        free(D[i]);
    }
    
    free(A);
    free(B);
    free(C);
    free(D);
    
    
    // Let us print the number of operations for small values of n
    
    n=1;
    
    unsigned long long operations_standard = 2, operations_strassen = 2;
    // ==============================
    printf("\n");
    for (i=0;i<30;i++) {
        printf("=");
    }
    printf("\n");
    
    printf("\nThe following table shows the number of operations (additions and scalar multiplications)\n"
           "performed to calculate C+=A*B with both standard multiplication and Strassen's algorithm.\n\n");
    printf("| %15s | %15s | %15s |\n"
           "|-----------------|-----------------|-----------------|\n",
          "n",
          "standard",
          "Strassen");
    do {
        printf("| %15d | %15llu | %15llu |\n",
               n , operations_standard , operations_strassen);
        
        n*=2;
        operations_standard = 2*n*n*n;
        operations_strassen *= 7;
        operations_strassen += (22*n*n)/4;
            /*
                T(n) requires
                    7*T(n/2) (computing P_i);
                    10*(n/2)^2 (computing S_i);
                    12*(n/2)^2 (updatin C_ij.
            */
    } while ( (n<=16384) && (operations_standard<=operations_strassen) );
    return 0;
}

Random matrices:
A =
[
  [    -74 ,    -29 ,     37 ,    -39 ,     49 ,    -74 ,     49 ,    -18],
  [      5 ,    -45 ,    -92 ,     60 ,    -62 ,     68 ,     33 ,    -71],
  [     95 ,    -18 ,     46 ,     78 ,    -63 ,     66 ,    -93 ,    -91],
  [     19 ,     93 ,     42 ,    -71 ,     96 ,    -75 ,    -60 ,    -57],
  [     29 ,     43 ,    -56 ,     70 ,      2 ,    -80 ,     39 ,     72],
  [    -79 ,     38 ,     -3 ,     -4 ,     63 ,     76 ,     27 ,    -86],
  [     43 ,      8 ,    -68 ,     96 ,     68 ,     95 ,     74 ,     42],
  [    -12 ,     35 ,    -77 ,    -69 ,    -18 ,     41 ,    -74 ,    -81]
]
B =
[
  [     52 ,     46 ,    -52 ,    -30 ,     18 ,    -29 ,    -74 ,    -54],
  [     -6 ,     29 ,      3 ,     49 ,    -57 ,    -31 ,    -34 ,     59],
  [     -1 ,      9 ,     19 ,     -4 ,    -96 ,     63 ,    -74 ,    -12],
  [    -60 ,      3 ,     54 ,     56 ,     96 ,      7 ,    -70 ,    -65],
  [     24 ,    -25 ,     -1 ,    -43 ,     14 ,    -15 , 

(exercise4.2-1)=
## 4.2-1

> Use Strassen's algorithm to compute the matrix product
> \begin{equation*}\begin{bmatrix}1&3\\7&5\end{bmatrix} \begin{bmatrix}6&8\\4&2\end{bmatrix}.\end{equation*}
> Show your work.

Let $A=\begin{bmatrix}1&3\\7&5\end{bmatrix}$, and $B=\begin{bmatrix}6&8\\4&2\end{bmatrix}$. In this case, the order of the matrices is $n=2$.

1. Since $n>1$, we need to partition our matrices into $n/2\times n/2$ submatrices:
    - $A_{11}=1$, $A_{12}=3$, $A_{21}=7$, $A_{22}=5$;
    - $B_{11}=6$, $B_{12}=8$. $B_{21}=4$, $B_{22}=2$;
    - $C_{11}=C_{12}=C_{21}=C_{22}=0$.
2.  - $S_1=B_{12}-B_{22} = 8 - 2 = 6$.
    - $S_2=A_{11}+A_{12} = 1 + 3 = 4$.
    - $S_3=A_{21}+A_{22} = 7 + 5 = 12$.
    - $S_4=B_{21}-B_{11} = 4 - 6 = -2$.
    - $S_5=A_{11}+A_{22} = 1 + 5 = 6$.
    - $S_6=B_{11}+B_{22} = 6 + 2 = 8$.
    - $S_7=A_{12}-A_{22} = 3 - 5 = -2$.
    - $S_8=B_{21}+B_{22} = 4 + 2 = 6$.
    - $S_9=A_{11}-A_{21} = 1 - 7 = -6$.
    - $S_{10}=B_{11}+B_{12} = 6 + 8 = 14$.
3.  - $P_1=A_{11}\cdot S_1 = 1\cdot6 = 6$.
    - $P_2=S_2\cdot B_{22} = 4 \cdot 2 = 8$.
    - $P_3=S_3\cdot B_{11} = 12\cdot 6 = 72$.
    - $P_4=A_{22}\cdot S_4 = 5\cdot(-2)=-10$.
    - $P_5=S_5\cdot S_6 = 6\cdot 8 = 48$.
    - $P_6=S_7\cdot S_8 = (-2)\cdot 6 = -12$.
    - $P_7=S_9\cdot S_{10} = (-6)\cdot 14 = -84$.
4.  - $C_{11} = P_5+P_4-P_2+P_6 = 48 + (-10)-8-12 = 18$.
    - $C_{12} = P_1+P_2 = 6 + 8 = 14$.
    - $C_{21} = P_3+P_4 = 72 + (-10) = 62$.
    - $C_{22} = P_5+P_1-P_3-P_7 = 48 + 6 - 72 - (-84) = 66$.
    
Therefore,
\begin{equation*}
\begin{bmatrix}1&3\\7&5\end{bmatrix}\cdot\begin{bmatrix}6&8\\4&2\end{bmatrix} = \begin{bmatrix}18 & 14\\62& 66\end{bmatrix}.\end{equation*}

(exercise4.2-2)=
## 4.2-2

> Write pseudocode for Strassen's algorithm.

    STRASSEN(A,B,C,n)
    1   if n==1
    2   // Base case.
    3       C[1,1] = C[1,1]+A[1,1]*B[1,1]
    4       return
    5   // Divide.
    6   partition A, B, and C into n/2 x n/2 submatrices
            A_11, A_12, A_21, A_22; B_11, B_12, B_21, B_22;
            and C_11, C_12, C_21, C_22; respectively
    7   // Conquer
    8   Let S_1  = B_12 - B_22,
            S_2  = A_11 + A_12,
            S_3  = A_21 + A_22,
            S_4  = B_21 - B_11,
            S_5  = A_11 + A_22,
            S_6  = B_11 + B_22,
            S_7  = A_12 - A_22,
            S_8  = B_21 + B_22,
            S_9  = A_11 - A_21,
            S_10 = B_11 + B_12.
    9   Initialize n/2 x n/2 zero matrices
            P_1, P_2, P_3, P_4, P_5, P_6, P_7
    10  STRASSEN( A_11 , S_1  , P_1 , n/2)
        STRASSEN( S_2  , B_22 , P_2 , n/2)
        STRASSEN( S_3  , B_11 , P_3 , n/2)
        STRASSEN( A_22 , S_4  , P_4 , n/2)
        STRASSEN( S_5  , S_6  , P_5 , n/2)
        STRASSEN( S_7  , S_8  , P_6 , n/2)
        STRASSEN( S_9  , S_10 , P_7 , n/2)
    11 Let C_11 = C_11 + P_5 + P_4 - P_2 + P_6
           C_12 = C_12 + P_1 + P_2
           C_21 = C_21 + P_3 + P_4
           C_22 = C_22 + P_5 + P_1 - P_3 - P_7
    12 return

(exercise4.2-3)=
## 4.2-3

> What is the largest $k$ such that if you can multiply $3\times 3$ matrices using $k$ multiplications (not assuming commutativity of multiplication), then you can multiply $n\times n$ matrices in $o(n^{\lg 7})$ time? What is the running time of this algorithm?

```{note}
We should regard saying that we "can multiply $3\times 3$ matrices using $k$ multiplications (not assuming commutativity of multiplication)" as stating that we can perform a certain specific sequence of symbolic manipulations on the entries of $3\times 3$ matrices using only addition and multiplications and its standard properties (associativity, properties of additive/multiplicative identities, etc.) **except** commutativity of the product, with multiplications being used at most $k$ times, in order to construct the standard product of $3\times 3$ matrices.

In particular, this "sequence of symbolic manipulations" can also be applied with matrices in places of the formal symbols, which allows us to multiply $3\times 3$ block matrices in terms of $k$ products of their blocks, yielding a recursive formula for products of general square matrices with order a power of $3$.

If we want to be even more formal, a "sequence of symbolic manipulations" could be regarded as an algorithm on the space of $3\times 3$ matrices over the [free (non-commutative) $R$-algebra](https://en.wikipedia.org/wiki/Free_algebra) generated by an infinite set, so in particular it also applies to any $R$-algebra, where $R$ is an appropriate ring (such as $\mathbb{R}$ or $\mathbb{Z}$).

Denoting by $M_n(R)$ the space of $n\times n$ matrices over $R$, we can apply this algorithm on $M_{3n}(R)\cong M_3(M_n(R))$ to obtain the product of $3n\times 3n$ matrices with at most $k$ products of $n\times n$ matrices and a finite (and bounded) sequence of sums, assignments, etc., all of which are $O(n^2)$.
```

Suppose that we can multiply $3\times 3$ matrices without using commutativity of multiplication and $k$ multiplications. Consider the following algorithm that multiplies two $n\times n$ matrices $A$ and $B$, where $n$ is a power of $3$:

    MATRIX-MULTIPLY-3(A,B,n)
    1   if n==1
    2       return A[1,1]*B[1,1]
    3   partition A and B into n/3 x n/3 matrices
    4   Perform the multiplication of A and B with the given procedure
            for 3x3 matrices, as if A and B were 3x3 matrices with each
            n/3 x n/3 submatrix being an entry. Implement multiplication
            of n/3 x n/3 (sub)matrices by MATRIX-MULTIPLY-3, and other
            procedures (additions, reassignments, etc.) in the standard
            manner.

Lett $T(n)$ be the (worst-case) running time of `MATRIX-MULTIPLY-3`. Line 3 above takes time $\Theta(n^2)$, whereas line 4 performs `MATRIX-MULTIPLY-3` $k$ times on $n/3 \times n/3$ matrices, which in total takes time $kT(n/3)$, and also has a $O(n^2)$ part (regarding the "other procedures"). Thus, $T(n)$ satisfies the recursion
\begin{equation*}
T(n) = kT(n/3)+O(n^2).
\end{equation*}
By the Master Theorem, we have
$$T(n) = \Theta(n^{\log_3 k}).$$
In order for us to have $T(n)=o(n^{\lg 7})$, we need that
\begin{equation*}\log_3 k < \lg 7,\end{equation*}
that is,
\begin{equation*}k<3^{\lg 7}\approx 21.8.\end{equation*}
Therefore, the largest $k$ which the exercise asks for is $k=21$.

Currently, the best algorithms to multiply $3\times 3$ matrices directly take $23$ operations, so their recursive versions for large matrices are still asymptotically worse than Strassen's algorithm (although better than standard matrix multiplication, as $\log_3 23 \approx 2.85$). See [Courtois-Bard-Hulme, *A New General-Purpose Method to Multiply 3x3 Matrices Using Only 23 Multiplications*](https://arxiv.org/abs/1108.2830).

(exercise4.2-4)=
## 4.2-4

> V. Pan discovered a way of multiplying $68\times 68$ matrices using $132,464$ multiplications, a way of multiplying $70\times 70$ matrices using $143,640$ multiplications, and a way of multiplying $72\times 72$ matrices using $155,424$ multiplications. Which method yields the best asymptotic running time when used in a divide-and-conquer matrix-multiplication algorithm? How does it compare with Strassen's algorithm?

Analogously to Strassen's algorithm and the [previous exercise](exercise4.2-3), if we have a general method for multiplying $k\times k$ matrices with $p$ multiplications, then we can apply it recursively to multiply $n\times n$ matrices, with $n$ a power of $k$, by applying this method to $k\times k$ block matrices. This yields a recursion for the running time $T(n)$ of such a recursive algorithm satisfying
\begin{equation*}T(n) = pT(n/k) + O(n^2).\end{equation*}
By the Master Theorem,
\begin{equation*}T(n)=\Theta(n^{\log_k p})\end{equation*}
(as long as $2<\log_k p$, i.e., $k^2<p$, which is satisfied below).

For $k\in\left\{68,70,72\right\}$, let $T_k(n)$ be the running time of the recursive algorithm obtained by applying the previous paragraph to V. Pan's algorithm for multiplying $k\times k$ matrices. Then
- $T_{68}(n)=\Theta(n^{\log_{68} 132464})$, where $\log_{68} 132464 \approx 2.795128$;
- $T_{70}(n)=\Theta(n^{\log_{70} 143640})$, where $\log_{70} 143640 \approx 2.795123$;
- $T_{72}(n)=\Theta(n^{\log_{72} 155424})$, where $\log_{72} 155424 \approx 2.795147$.

Therefore, the divide-and-conquer method applied to V. Pan's algorithm for multiplying $70\times 70$ matrices is the fastest one, and it is asymptotically faster than Strassen's algorithm (in fact, all three algorithms thus obtained are faster than Strassen's).

(exercise4.2-5)=
## 4.2-5

> Show how to multiply the complex numbers $a + bi$ and $c+ d i$ using only three multiplications of real numbers. The algorithm should take $a$, $b$ , $c$ , and $d$ as input and produce the real component $ac +bd$ and the imaginary component $ad+ bc$ separately.

Basically, we want expressions involving "mixed products" between the pairs of numbers $(a,b)$ and $(c,d)$. A simple way to obtain these is with expressions of the form
\begin{equation*}(a\pm b)(c\pm d)=ac\pm ad\pm bc\pm bd,\end{equation*}
which is computed with only one multiplication. Then we need to get rid of some of the mixed products in the expression above to get to the real and imaginary parts of $(a+bi)(c+di)$. One possibility follows,

    COMPLEX-MULTIPLY (a,b,c,d)
    1   Let s = a-b and t = c+d
    2   Compute x = s*t,
                y = a*d,
            and z = b*c
    3   return x-y+z (real component)
           and y+z (imaginary component)

```{note}
The mathematical-minded reader would probably start to tackle this exercise by using the standard mapping from complex number to real $2\times 2$ matrices,
\begin{equation*}x+yi\longmapsto\begin{bmatrix}x&-y\\y&-x\end{bmatrix},\end{equation*}
which respects all necessary structure (addition, multiplication and scalar multiplication; that is, it is an $\mathbb{R}$-algebra homomorphism). Then we could apply Strassen's algorithm to the compute the matrix product
\begin{equation*}
\begin{bmatrix}a&-b\\b&a\end{bmatrix}\begin{bmatrix}c&-d\\d&c\end{bmatrix}=\begin{bmatrix}(ac-bd)&-(ad+bc)\\(ad+bc)&(ac-bd)\end{bmatrix}.\end{equation*}
and try to get rid of a few extra products as the matrices are so well-structured (they are antisymmetric).

However, this effort is to no avail. In this case, using the same notation as in Strassen's algorithm,

- $P_1 = -ac - ad$
- $P_2 = ac - bc$
- $P_3 = ac + bc$
- $P_4 = -ac + ad$
- $P_5 = 4ac$
- $P_6 = -ac - ad - bc - bd$
- $P_7 = ac - ad - bc + bd$

The real part of the product of $a+bi$ and $c+di$ is $C_{11}$ or $C_{22}$, so computing it requises in any case computing $P_5$, which is equivalent to computing $ac$.

The imaginary part of the product of $a+bi$ and $c+di$ is $C_{12}$ (up to signal) or $C_{21}$, so necessarily we would need to compute $P_1$ or $P_3$. Since $ac$ needs to be computed, in any case this would be equivalent to computing $ad$ as well. Similarly, we would also need to compute $bc$.

Going back to the real part, we would need to compute $P_6$ or $P_7$, so using the computations above this would be the same as computing $bd$.

Thus, computing the product of $a+bi$ and $c+di$ via Strassen's algorithm (in a symbolic manner) would entail computing some $P_i$ which generate a vector subspace (of the free commutative $\mathbb{R}$-algebra generated by $a,b,c,d$) containing $ac,ad,bc$ and $bd$, which are linearly independent and thus this vector subspace has dimension at least $4$. Therefore, at least $4$ of the products among the $P_i$ would need to be computed, which does not solve the exercise.
```

## 4.2-6

> Suppose that you have a $\Theta(n^\alpha)$-time algorithm for squaring $n\times n$ matrices, where $\alpha\geq 2$. Show how to use that algorithm to multiply two different $n\times n$ matrices in $\Theta(n^\alpha)$ time.

Suppose the algorithm `MATRIX-SQUARE(A,n)` returns the square of the $n\times n$ matrix $A$ in time $\Theta(n^\alpha)$. Since
\begin{equation*}AB = \dfrac{1}{2}\left((A+B)^2 -A^2-B^2\right),\end{equation*}
the following algorithm returns the product of two $n\times n$ matrices $A$ and $B$:

    MATRIX-MULTIPLY_(A,B,C,n)
    1   Let X = A-B
    2   Let X = MATRIX-SQUARE(X)
            Y = MATRIX-SQUARE(A)
            Z = MATRIX-SQUARE(B)
    3   Let W = (X-Y-Z)/2
    4   return W
    
Lines 1 and 3 run in time $\Theta(n^2)$, whereas line 2 runs in time $3\Theta(n^\alpha)=\Theta(n^\alpha)$. Therefore, the running time of the `MATRIX-MULTIPLY_(A,B,n)` is
\begin{equation*}\Theta(n^2)+\Theta(n^\alpha)=\Theta(n^\alpha),\end{equation*}
since $\alpha\geq 2$.