In [9]:
import numpy as np
from scipy.sparse import csr_array
from itertools import combinations, starmap

#### Objective 1
* We first want to represent p binary variables into 2^p sequence of 0 and 1
* Example: 
\[0,1,0,1\] $\rightarrow$ \[0,1,0,1,0,0,0,0,1,0,0,0,0,0\]

In [74]:
def prod(*args):
    return np.prod(args)

In [94]:
from tqdm.notebook import trange

In [252]:
# simple way
sample_array = np.array([0,1,0,1,0, 1,0,1,1,1,1,1, 0, 1, 0, 1], dtype = np.byte)
result = sample_array.copy()
n = sample_array.shape[0] + 1
for i in trange(2, n):
    m = starmap(prod, combinations(sample_array.tolist(), i))
    result = np.hstack((result, np.array(list(m), dtype = np.byte)))
simple_result = np.insert(result, 0, 1)

  0%|          | 0/15 [00:00<?, ?it/s]

In [246]:
# time the process
def simulation_sim():
    sample_array = np.array([0,1,0,1,0, 1,0,1,1,1,1,1, 0, 1, 0, 1], dtype = np.byte)
    result = sample_array.copy()
    n = sample_array.shape[0] + 1
    for i in trange(2, n):
        m = starmap(prod, combinations(sample_array.tolist(), i))
        result = np.hstack((result, np.array(list(m), dtype = np.byte)))
    simple_result = np.insert(result, 0, 1)
    return simple_result

In [231]:
%timeit simulation_sim

19.7 ns ± 0.0803 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


In [163]:
def sum_string(*args):
    string = args[0]
    for i in range(1, len(args)):
        string = '*'.join([string, args[i]])
    return string

In [247]:
# Hard way of expanding it using sparse matrix
sample_array = np.array([0,1,0,1,0, 1,0,1,1,1,1,1, 0, 1, 0, 1], dtype = np.byte)
result = sample_array.copy()
main_list = [f"X{i}" for i in range(len(sample_array))]
index_list = main_list.copy()
n = len(main_list) + 1
for i in trange(2, n):
    m = starmap(sum_string, combinations(main_list, i))
    index_list = index_list + list(m)

  0%|          | 0/15 [00:00<?, ?it/s]

In [248]:
len(sample_array)

16

In [259]:
ones_index_main = [f"X{i}" for i in np.where(sample_array == 1)[0]]
ones_index = ones_index_main.copy()
n = len(ones_index_main)+1
for i in trange(2, n):
    m = starmap(sum_string, combinations(ones_index_main, i))
    ones_index = ones_index + list(m)
col = np.array([index_list.index(index)+1 for index in ones_index])
col = np.insert(col, 0, 0)
row = np.zeros((len(col),), dtype = np.byte)
data = np.ones((len(col),), dtype = np.byte)
csr_result = csr_array((data, (row, col)), shape=(1, 2**16))

  0%|          | 0/9 [00:00<?, ?it/s]

## Point 1: Why we use the sparse matrix: MEMORY!

In [262]:
import sys

In [266]:
sys.getsizeof(simple_result), sys.getsizeof(csr_result)

(65640, 48)

In [256]:
def simulation_diff(index_list = index_list):
    ones_index_main = [f"X{i}" for i in np.where(sample_array == 1)[0]]
    ones_index = ones_index_main.copy()
    n = len(ones_index_main)+1
    for i in trange(2, n):
        m = starmap(sum_string, combinations(ones_index_main, i))
        ones_index = ones_index + list(m)
    col = np.array([index_list.index(index)+1 for index in ones_index])
    col = np.insert(col, 0, 0)
    row = np.zeros((len(col),), dtype = np.byte)
    data = np.ones((len(col),), dtype = np.byte)
    csr_result = csr_array((data, (row, col)), shape=(1, 2**16))

## Point 2: Why we use the sparse matrix: Speed!

In [257]:
%timeit simulation_sim

19.8 ns ± 0.0825 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


In [258]:
%timeit simulation_diff

18.8 ns ± 0.0568 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)
