# Bini Fast Matrix Multiplication O(2.7799)

In [2]:
import numpy as np

In [3]:
from numpy import linalg as la

In [4]:
np.set_printoptions(precision=2)

In [5]:
#without dynamic peeling:
# row of A need to be divisible by 3 (power of is preferable)
# columns of A need do be divisble by 2

# rows and cols of B need to be divisible by two

In [18]:
A = np.random.rand(243, 256)
B = np.random.rand(256, 256)

In [19]:
def bini(A, B, steps, e=1):
    
    #Check Dimensions
    (m, n) = A.shape
    (nn, p) = B.shape
    if n != nn: raise ValueError("incompatible dimensions")
    
    #pre-allocate output matrix
    C = np.zeros((m,p))
    
    #Base case
    if steps == 0 or m == 1 or n == 1 or p == 1:
        C = np.dot(A,B)
        return C
    
    #Dynamic peling
    #**************
    # right now only input matricies that fit the power of 3 and 2 dimension rules
 
    """
    This is the notation I use from Bini's 1980 paper

    |A1, A4|  |B1, B2|  =  |C1, C2|
    |A2, A5|  |B3, B4|     |C3, C4|
    |A3, A6|               |C5, C6|
    """

    # split up the matricies once rows of A are divisible by 3
    # and cols of A and rows and cols of are divisible by 2
    m2 = int(m/3) #first third of the rows of A
    m3 = m2*2     #second third of the rows of A
    n2 = int(n/2) #half of the cols of A
    
    # Do I need to do this below for B? Could I only do just one because it should be
    # square? and since its just the 
    p2 = int(p/2) #half of the cols of B
    nn2 = int(nn/2) # half of the rows of B
    
    A1 = A[:m2, :n2]
    A2 = A[m2:m3, :n2]
    A3 = A[m3:, :n2]
    A4 = A[:m2, n2:]
    A5 = A[m2:m3, n2:]
    A6 = A[m3:, n2:]
    
    #being really explicit with rows and cols of B but cold replace with just cols of 
    # A
    B1 = B[:nn2, :p2]
    B2 = B[:nn2, p2:]
    B3 = B[nn2:, :p2]
    B4 = B[nn2:, p2:]
    
    #bini(A, B, steps, e=0.1)
    # conquer
    M1 = bini(A1 + A5, e*B1 + B4, steps-1, e)
    M2 = bini(A5, -B3-B4, steps-1, e)
    M3 = bini(A1, B4, steps-1, e)
    M4 = bini(e*A4+A5, -e*B1 + B3, steps-1, e)
    M5 = bini(A1 + e*A4, e*B2 + B4, steps-1, e)
    M6 = bini(A2 + A6, B1 + e*B4, steps-1, e)
    M7 = bini(A2, -B1 - B2, steps-1, e)
    M8 = bini(A6, B1, steps-1, e)
    M9 = bini(A2 + e*A3, B2 - e*B4, steps-1, e)
    M10 = bini(e*A3 + A6, B1 + e*B3, steps-1, e)
    
    # put C together
    C[:m2, :n2] = (1/e)*(M1+M2-M3+M4) #C1
    C[:m2, n2:] = (1/e)*(-M3+M5)      #C2
    C[m2:m3, :n2] = M4+M6+M10         #C3
    C[m2:m3, n2:] = M1+M5+M9          #C4
    C[m3:, :n2] = (1/e)*(M8+M10)      #C5
    C[m3:, n2:] = (1/e)*(M6+M7-M8+M9) #C6
    
    return C

In [20]:
C = bini(A,B, 1)

In [21]:
la.norm(C-A@B, 'fro')/la.norm(C)

0.7154607879986333