In [1]:
import os
from math import prod

from typing import *

import pandas as pd
import numpy as np

import einops

import tensorly as tl
from tensorly.decomposition import parafac, non_negative_parafac
from tensorly.kruskal_tensor import kruskal_to_tensor

Using numpy backend.


In [2]:
pd.set_option('display.max_rows', 500)

In [3]:
df = pd.read_json(os.path.expanduser("/home/shibbiry/projects/dctn/small_experiments/logmatmulexp_benchmark/results.json"))
#df = df[~ (df["device"] == "cpu")] # look only at GPUs
#df = df[~ (df["dim_size"] == 64)] # small dim size execution times are too chaotic
df = df.set_index(["dim_size", "func", "device", "dtype"]).sort_index()

In [4]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,forward_backward_seconds_per_iteration,forward_seconds_per_iteration,num_iterations,num_matrices
dim_size,func,device,dtype,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
64,logmatmulexp,GeForce GTX 1070,torch.float32,0.001806,0.000763,50,6
64,logmatmulexp,GeForce GTX 1070,torch.float64,0.001583,0.000823,50,6
64,logmatmulexp,Graphics Device,torch.float32,0.0017,0.000779,50,6
64,logmatmulexp,Graphics Device,torch.float64,0.001644,0.000773,50,6
64,logmatmulexp,cpu,torch.float32,0.004893,0.002908,50,6
64,logmatmulexp,cpu,torch.float64,0.010602,0.005202,50,6
64,logmatmulexp_lowmem,GeForce GTX 1070,torch.float32,0.003096,0.00088,50,6
64,logmatmulexp_lowmem,GeForce GTX 1070,torch.float64,0.003193,0.000841,50,6
64,logmatmulexp_lowmem,Graphics Device,torch.float32,0.003049,0.000856,50,6
64,logmatmulexp_lowmem,Graphics Device,torch.float64,0.002803,0.000861,50,6


In [5]:
shape = df.index.levshape
assert prod(shape) == len(df)

In [6]:
what = ["forward_backward_seconds_per_iteration", "forward_seconds_per_iteration"]
#what = ["forward_seconds_per_iteration"]

In [7]:
array = df[what].values.reshape(shape[0], shape[1], shape[2], shape[3], len(what))
print(array.shape) # dim_size × func × device × dtype × what
print(df.index.levels)

(6, 3, 3, 2, 2)
[[64, 128, 150, 192, 256, 280], ['logmatmulexp', 'logmatmulexp_lowmem', 'matmul'], ['GeForce GTX 1070', 'Graphics Device', 'cpu'], ['torch.float32', 'torch.float64']]


In [8]:
factors = non_negative_parafac(array, rank=1, tol=1e-8, verbose=True)

reconstruction error=0.09083051780288953, variation=4.0929759581587177e-13.
converged in 2 iterations.


In [9]:
factors

[array([[0.02432675],
        [0.24575081],
        [0.43651074],
        [0.87500978],
        [2.18576621],
        [2.70063728]]),
 array([[0.54947385],
        [0.83549431],
        [0.00526884]]),
 array([[0.05550402],
        [0.03012612],
        [0.99800388]]),
 array([[0.50728041],
        [0.8617811 ]]),
 array([[0.91735388],
        [0.39807275]])]

In [10]:
def calc_errors(x: np.ndarray, factors) -> Tuple[np.ndarray, np.ndarray]:
    abs_err = np.abs(kruskal_to_tensor(factors) - x)
    rel_err = abs_err / np.abs(x)
    print(f"{np.max(abs_err)=:.1e}, {np.mean(abs_err)=:.1e}, {np.max(rel_err)=:.1e}, {np.mean(rel_err)=:.1e})")
    return abs_err, rel_err

In [11]:
abs_err, rel_err = calc_errors(array, factors)

np.max(abs_err)=1.7e-01, np.mean(abs_err)=7.8e-03, np.max(rel_err)=1.2e+00, np.mean(rel_err)=4.1e-01)


In [12]:
def array_to_df(x: np.ndarray) -> pd.DataFrame:
    return pd.DataFrame(x.reshape((-1, len(what))), index=df.index, columns=what)

In [13]:
array_to_df(rel_err)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,forward_backward_seconds_per_iteration,forward_seconds_per_iteration
dim_size,func,device,dtype,Unnamed: 4_level_1,Unnamed: 5_level_1
64,logmatmulexp,GeForce GTX 1070,torch.float32,0.808869,0.803532
64,logmatmulexp,GeForce GTX 1070,torch.float64,0.629395,0.690809
64,logmatmulexp,Graphics Device,torch.float32,0.889795,0.89558
64,logmatmulexp,Graphics Device,torch.float64,0.806387,0.82134
64,logmatmulexp,cpu,torch.float32,0.268678,0.073703
64,logmatmulexp,cpu,torch.float64,0.005256,0.120186
64,logmatmulexp_lowmem,GeForce GTX 1070,torch.float32,0.830441,0.741224
64,logmatmulexp_lowmem,GeForce GTX 1070,torch.float64,0.720648,0.540059
64,logmatmulexp_lowmem,Graphics Device,torch.float32,0.906548,0.855626
64,logmatmulexp_lowmem,Graphics Device,torch.float64,0.827292,0.756033


In [14]:
# now let's normalize array before factoring it once more

In [15]:
normalized_array = array.copy()
for dim, factor in enumerate(factors):
    factor = einops.rearrange(factor, f"i () -> {' '.join('()' for i in range(dim))} i {' '.join('()' for i in range(array.ndim-dim-1))}")
    normalized_array /= factor

In [16]:
print(f"{array.std()=:.1e}, {normalized_array.std()=:.1e}")

array.std()=2.3e-01, normalized_array.std()=2.1e+01


In [17]:
normalized_factors = non_negative_parafac(normalized_array, 1, tol=1e-8, verbose=True)

reconstruction error=0.10227731704582028, variation=1.9082610491771845e-11.
converged in 2 iterations.


In [18]:
normalized_factors

[array([[314.93030605],
        [ 35.83708516],
        [ 18.41960939],
        [ 12.31981299],
        [  7.21467436],
        [  7.37381266]]),
 array([[0.05186907],
        [0.05307245],
        [0.99724282]]),
 array([[0.56375102],
        [0.82557455],
        [0.02475631]]),
 array([[0.88870711],
        [0.45847568]]),
 array([[0.91593733],
        [0.40132469]])]

In [19]:
normalized_abs_err, normalized_rel_err = calc_errors(normalized_array, normalized_factors)

np.max(abs_err)=1.7e+01, np.mean(abs_err)=1.4e+00, np.max(rel_err)=2.1e+00, np.mean(rel_err)=6.9e-01)


In [20]:
new_factors = [normalized_factor * old_factor for normalized_factor, old_factor in zip(normalized_factors, factors)]

In [21]:
new_abs_err, new_rel_err = calc_errors(array, new_factors)

np.max(abs_err)=1.8e+00, np.mean(abs_err)=8.0e-02, np.max(rel_err)=2.1e+00, np.mean(rel_err)=6.9e-01)


In [22]:
array_to_df(new_rel_err).loc[pd.IndexSlice[:, "matmul"], :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,forward_backward_seconds_per_iteration,forward_seconds_per_iteration
dim_size,func,device,dtype,Unnamed: 4_level_1,Unnamed: 5_level_1
64,matmul,GeForce GTX 1070,torch.float32,0.030365,0.35775
64,matmul,GeForce GTX 1070,torch.float64,0.097155,0.140836
64,matmul,Graphics Device,torch.float32,0.008064,0.096116
64,matmul,Graphics Device,torch.float64,0.067577,0.061141
64,matmul,cpu,torch.float32,0.393951,0.399415
64,matmul,cpu,torch.float64,0.102083,0.553429
128,matmul,GeForce GTX 1070,torch.float32,0.243126,0.352207
128,matmul,GeForce GTX 1070,torch.float64,0.035246,0.584339
128,matmul,Graphics Device,torch.float32,0.074637,0.072451
128,matmul,Graphics Device,torch.float64,0.075191,0.374241


In [26]:
for level, factor in zip((*df.index.levels, what), new_factors):
    name = level.name if isinstance(level, pd.Int64Index) else "what"
    print(pd.Series(index=level, data=factor.flatten() / factor.min(), name=name))
    print()
    #print(tuple(zip(level, factor.flatten())))

dim_size
64     1.000000
128    1.149553
150    1.049486
192    1.407079
256    2.058363
280    2.599320
Name: dim_size, dtype: float64

func
logmatmulexp           5.424247
logmatmulexp_lowmem    8.439108
matmul                 1.000000
Name: what, dtype: float64

device
GeForce GTX 1070    1.266466
Graphics Device     1.006657
cpu                 1.000000
Name: what, dtype: float64

dtype
torch.float32    1.141021
torch.float64    1.000000
Name: what, dtype: float64

forward_backward_seconds_per_iteration    5.259498
forward_seconds_per_iteration             1.000000
Name: what, dtype: float64



In [42]:
(df.loc[pd.IndexSlice[:, :, "cpu"], :][what].droplevel(2) / df.loc[pd.IndexSlice[:, :, "Graphics Device"], :][what].droplevel(2)).mean()

forward_backward_seconds_per_iteration    23.326752
forward_seconds_per_iteration             27.936942
dtype: float64

# All-fucking-right, I'll do this shitty factor analysis with my own hands

In [75]:
def extract_factor(array: np.ndarray, dim: int, baseline_slice_index: int) -> np.ndarray:
    baseline_slice = array[tuple(slice(None) for i in range(dim)) + (baseline_slice_index,)]
    elems = []
    for i in range(array.shape[dim]):
        this_slice = array[tuple(slice(None) for i in range(dim)) + (i,)]
        elems.append((this_slice / baseline_slice).mean())
    return np.array(elems)

In [76]:
def print_factor(factor: np.ndarray, dim: int) -> pd.Series:
    if dim < len(df.index.levels):
        index = df.index.levels[dim]
    else:
        index = what
    print(pd.Series(index=index, data=factor))

In [80]:
for dim, baseline_slice_index in (
    (0, 3),
    (1, 0),
    (2, 1),
    (3, 0),
    (4, 1)
):
    print_factor(extract_factor(array, dim, baseline_slice_index), dim)
    print()

dim_size
64     0.242170
128    0.388777
150    0.532644
192    1.000000
256    2.154617
280    2.615546
dtype: float64

func
logmatmulexp           1.000000
logmatmulexp_lowmem    1.298112
matmul                 0.054123
dtype: float64

device
GeForce GTX 1070     1.638247
Graphics Device      1.000000
cpu                 25.631847
dtype: float64

dtype
torch.float32    1.000000
torch.float64    2.467929
dtype: float64

forward_backward_seconds_per_iteration    2.83085
forward_seconds_per_iteration             1.00000
dtype: float64

