In [1]:
from pathlib import Path
import os
import time
os.chdir("..")

In [2]:
# !pip install numpy typer sympy 

## Compilation [REQUIRED]

In [3]:
# compile search2, lift2, select2, search3, lift3, select3, it takes ~ 100s

t1 = time.time()
bin_dir = Path("bin")
bin_dir.mkdir(exist_ok=True)
for mod_flag, suffix in [("", "2"), ("-DMOD3", "3")]:
    for src in ["search", "lift", "select"]:
        name = f"{src}{suffix}"
        exe = bin_dir / name
        !g++ {mod_flag} -Ofast -std=c++20 -march=native -Ithird_party -pthread src/{src}.cpp -o {exe}
        globals()[name] = exe
t2 = time.time()
print(f"Compiled in {t2-t1:.1f}s.")

Compiled in 108.3s.


In [4]:
# use -h to check parameters
!{search2} -h

Pool-based Tensor Flip Graph Search 


bin\search2 [OPTIONS] name


POSITIONALS:
  name TEXT REQUIRED          Tensor name (e.g., gt-333) 

OPTIONS:
  -h,     --help              Print this help message and exit 
          --id TEXT           Output identifier (affects scheme names only) 
  -f,     --path-limit INT [1000000]  
                              Path length limit 
  -s,     --pool-size INT [200]  
                              Pool size limit 
  -r,     --target-rank INT [0]  
                              Target rank 
  -p,     --plus-lim INT [50000]  
                              Flips before plus transition 
  -t,     --threads INT:POSITIVE [4]  
                              Number of worker threads 
  -m,     --max-attempts INT:POSITIVE [1000]  
                              Max attempts per rank level 
          --stop INT:POSITIVE [20000]  
                              Stop if nothing found after this many attempts 
          --plus              Enable plus transiti

## General matrix multiplication (ùîΩ‚ÇÇ-search)

Let's reproduce Laderman results for 3√ó3 matrices and rank 23.

In [5]:
# 0. generate corresponding tensor
%run scripts/generator.py gg 3 3 3

Saved: data\tensors\gg-333.npy and data\tensors\gg-333.meta.json  shape=(27, 4)  dtype=int8


In [6]:
# 1. run flip graph search (ùîΩ‚ÇÇ)
!{search2} gg-333 --plus --save

=== Pool-based Flip Graph Search ===
Tensor name: gg-333
Operation: gg
Dimensions: 3x3x3
Output ID: (none)
Tensor: data/tensors\gg-333.npy
Field: mod2
Dims: U=9, V=9, W=9  (nnz=27)
Path limit: 1000000, Pool size: 200, Target rank: 0, Stop attempts: 20000, Threads: 4, Plus transitions: enabled (limit: 50000)

Starting from rank 27

=== Searching for rank 26 ===
Completed 210 attempts in 2.6s
Found 210 candidate schemes of rank 26
Verified: 210/210

=== Searching for rank 25 ===
Completed 205 attempts in 2.4s
Found 205 candidate schemes of rank 25
Verified: 205/205

=== Searching for rank 24 ===
Completed 210 attempts in 2.9s
Found 210 candidate schemes of rank 24
Verified: 210/210

=== Searching for rank 23 ===
Completed 214 attempts in 3.0s
Found 213 candidate schemes of rank 23
Verified: 213/213

=== Searching for rank 22 ===
Completed 1002 attempts in 43.0s
Found 0 candidate schemes of rank 22
Failed to find schemes of rank 22 after 1002 attempts

=== Final Results ===
Best rank achi

In [7]:
# 2. run hensel lifting for 10 steps, to lift schemes from mod 2 to mod 2¬π¬π algebra
#    and find rational (integer) coefficients
!{lift2} gg-333 23

=== Hensel Lifting + Rational Reconstruction (mod 2) ===
Tensor name: gg-333
Operation:   gg
Shape:       (3, 3, 3)
Tensor:      data/tensors/gg-333.npy
Output ID:   (none)
Params:      nU=9, nV=9, nW=9
Rank:        23
Modulus:     2048 = 2^11
Threads:     8
Bound:       32
Verify:      no

Loaded 213 schemes (r=23)

Phase 1: Hensel lifting...
Progress: 50/213 (49 successful)
Progress: 100/213 (94 successful)
Progress: 150/213 (142 successful)
Progress: 200/213 (190 successful)
Progress: 213/213 (201 successful)


Lifting complete:
  Time:       0.061158 s
  Successful: 201/213

Saved lifted schemes to: data/schemes_lifted/gg-333/rank23-2pow11.npy
Format: (201, 621) uint64

Phase 2: Rational reconstruction...
Progress: 50/201 (50 successful)
Progress: 100/201 (100 successful)
Progress: 150/201 (150 successful)
Progress: 200/201 (200 successful)
Progress: 201/201 (201 successful)


Reconstruction complete:
  Time:       0.008703 s
  Successful: 201/201
  Integer:    183/201
  NNZ (min/a

In [8]:
# 3. select integer scheme with smallest number of additions
!{select2} gg-333 23

=== Recursion Analysis ===
Tensor name: gg-333
Operation: gg (symmetric)
Shape:     (3, 3, 3)
Rank:      23
Input dir: data/schemes_rational\gg-333
Files:     1 matching file(s)
Params:    nU=9, nV=9, nW=9

Loaded 201 rational schemes from 1 file(s)

Integer schemes:  183
Rational schemes: 18

  Unique triples (integer):  1
  Unique triples (rational): 1

  Pareto front (integer):  1 non-dominated triple(s)
  Pareto front (rational): 1 non-dominated triple(s)

Merging fronts...
  No rational schemes added (all dominated by or equal to integer front)

=== Final Pareto Front: 1 scheme ===
Triple           | Count   | Field   | NNZ    | MaxDenom  
------------------------------------------------------------
( 0, 0, 0)       | 201     | Z       | 185    | 1         

Saving selected schemes...
Saved 1 selected scheme (.npy + .txt) to data/schemes_selected/


In [9]:
# And here rank-23 scheme for 3x3 general matrix multiplication:
!cat data/schemes_selected/gg-333-rank23-rec-0-0-0-z.txt

m1  = (a1 - a6 + a7)(b2 + b4 + b7)
m2  = (a1 - a4 + a7)(b1 - b2 + b4)
m3  = (a3 - a6 + a8)(b6 - b8 + b9)
m4  = (a1 + a2 - a3)(b4 - b6)
m5  = (a1 - a3)(b6 + b9)
m6  = (a1 + a2 - a3)(b6)
m7  = (a3 - a6 + a9)(b4 + b7 - b8)
m8  = (a2 - a5 + a8)(b5)
m9  = (a1 - a2 + a3 - a4 + a5 - a6 + a7 - a8 + a9)(b4)
m10 = (a1 - a3)(b4 - b6 + b7 - b9)
m11 = (a2 - a3 - a5 + a6)(b2 - b3 - b6)
m12 = (a2 - a6 + a8)(b2 + b8)
m13 = (a7)(b1 + b2 - b3 + b4 + b7)
m14 = (a2 + a8)(b2 + b5 + b8)
m15 = (a1 - a2 + a3 - a4 + a5 - a6 + a7)(b2 - b3 - b4)
m16 = (a1)(b1 - b3 + b7 - b9)
m17 = (a1 - a6)(b2 + b6 + b9)
m18 = (a1 - a2 + a3 - a4 + a5 - a6)(b2 - b3)
m19 = (a4 - a6)(b2)
m20 = (a1)(b1 + b7)
m21 = (a8)(b5 - b6 + b8 - b9)
m22 = (a8 - a9)(b4 - b8 + b9)
m23 = (a7 - a9)(b4 + b7)

c1 = m4 - m5 + m6 - m10 + m20
c2 = -m3 - m5 - m12 + m14 + m17 - m21
c3 = -m5 + m6 - m16 + m20
c4 = -m1 - m2 + m4 + m6 + m13 - m15 + m18 + m19 + m20
c5 = -m8 - m12 + m14 + m19
c6 = m6 + m11 - m16 - m17 + m18 + m19 + m20
c7 = -m9 + m13 - m15 + m1

In [14]:
# We can check output of the scheme, substituting m and expanding them
%run scripts/print_scheme_output.py data/schemes_selected/gg-333-rank23-rec-0-0-0-z.txt

c1 = a1*b1 + a2*b4 + a3*b7
c2 = a1*b2 + a2*b5 + a3*b8
c3 = a1*b3 + a2*b6 + a3*b9
c4 = a4*b1 + a5*b4 + a6*b7
c5 = a4*b2 + a5*b5 + a6*b8
c6 = a4*b3 + a5*b6 + a6*b9
c7 = a7*b1 + a8*b4 + a9*b7
c8 = a7*b2 + a8*b5 + a9*b8
c9 = a7*b3 + a8*b6 + a9*b9
