In [1]:
import sys
sys.path.append('../xrun')

import fractions
from fractions import Fraction

from timeit import default_timer as timer
from pathlib import Path

import numba
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from numba import jit, cfunc
from scipy import linalg
from scipy.sparse import linalg as sparse_linalg, issparse
from sklearn.utils.sparsefuncs import mean_variance_axis
from sklearn.utils.extmath import svd_flip, safe_sparse_dot

from xrun.data.loader import load_dataset
from xrun.data.run_info import RunInfo

sns.set(style="whitegrid", font_scale=1.2)

In [2]:
def load_run_info(experiment_dir: Path):
    run_file_paths = list(experiment_dir.glob("*.json"))
    if len(run_file_paths) != 1:
        print(f"Expected a single run file in {experiment_dir} but found {len(run_file_paths)} files.")
        return None
    return RunInfo.load_json(run_file_paths[0])

def create_block_highlighter(k: int):
    def apply_highlight(col: pd.Series):
        col_index = int(col.name)
        colors = ['#b3cde0', '#2ab7ca', '#fed766'] if (col_index // k) % 2 == 0 else ['#e6e6ea', '#f6abb6', "#dec3c3"]
        return [f"background-color: {colors[(i // k) %len(colors)]}" for i in range(len(col))]
    return apply_highlight

def highlight_negative(col: pd.Series):
    colors = ['#2ab7ca', '#f6abb6']
    return [f"background-color: {colors[int('-' in val)]}" for val in col.values]

@jit(nopython=True) # Set "nopython" mode for best performance, equivalent to @njit
def generate_benchmark(k: int, alpha: int, beta: float):
    """Generate benchmark dataset.

    Parameters
    ----------
    k: int
        The size of the initial block which is a square k-by-k matrix.
    alpha: int
        The number of the column blocks.
    beta: float:
        The factor for which to scale the column box.
    Returns
    -------
    np.array
        Returns a matrix of size (k^alpha x alpha*k)
    """
    # Create NxD matrix
    n = k ** alpha
    d = alpha * k
    data = np.zeros((n, d))

    # Construct the first N-by-k left-side of the matrix
    for i in range(n):
        for j in range(k):
            value = 0
            if i % k == j:
                value = (k-1) / k
            else:
                value = -1/k
            data[i,j] = value

    # Fill the rest by using the copy-stack operation
    for j in range(k, d):
        for i in range(n):
            copy_i = i // k
            copy_j = j - k
            data[i,j] = data[copy_i, copy_j]
    
    # Apply columnblock-level scaling factor beta
    if beta > 1:
        for i in range(alpha):
            start_col = i*k
            end_col = i*k + k
            beta_val = np.power(beta, float(-i))
            data[:, start_col:end_col] *= beta_val

    return data

In [3]:
result_path = Path("../data/odin-results/hardinstanceb1/sensitivity-sampling-k10-m2000/2021-09-09-14-01-39/results.txt.gz")
experiment_dir = result_path.parent

In [4]:
run_info = load_run_info(experiment_dir)

In [5]:
run_info

RunInfo(algorithm='sensitivity-sampling', dataset='hardinstanceb1', k=10, m=2000, iteration=8, randomSeed=357918331, output_dir='data/experiments/hardinstanceb1/sensitivity-sampling-k10-m2000/2021-09-09-14-01-39', command='gs/build/gs sensitivity-sampling hardinstanceb1 data/input/benchmark-k10-alpha6-beta1.00.txt.gz 10 2000 357918331 data/experiments/hardinstanceb1/sensitivity-sampling-k10-m2000/2021-09-09-14-01-39', start_time='2021-09-09T14:01:39.817304', end_time='2021-09-09T14:09:47.370186', duration_secs=487.552882, process_id=-2)

In [6]:
a = 0.01
k = 1

In [7]:
computed_coreset = np.loadtxt(
    fname=result_path,
    dtype=np.double,
    delimiter=" ",
    skiprows=1,
    unpack=False
)

In [8]:
coreset_weights = computed_coreset[:,0]
coreset_points = computed_coreset[:,1:]


In [9]:
coreset_points.shape

(2017, 60)

In [10]:
coreset_weights

array([ 516.162,  500.693,  496.551, ..., 6180.14 ,    0.   ,  484.069])

In [11]:
k = 2
alpha = 3
n = k ** alpha

a = 0

induced_clusters = [(n // k**a) % k  for n in range(n)]
np.array(induced_clusters)

array([0, 1, 0, 1, 0, 1, 0, 1])