# ɸ Simplify
Experimenting with auto-simplifying the coefficients of the distilled sparse linear features based off the observation that the learned coefficients were close to integral values. The similarity of the coefficients to integral values were not significantly different from chance.

In [1]:
import spock_reg_model
import numpy as np
import torch

In [2]:
# input variables to the system are scaled to have zero mean and unit variance
# we merge these scaling parameters into the coefficients of the sparse linear features
from sklearn.preprocessing import StandardScaler
import numpy as np
ssX = StandardScaler()
ssX.scale_ = np.array([2.88976974e+03, 6.10019661e-02, 4.03849732e-02, 4.81638693e+01,
           6.72583662e-02, 4.17939679e-02, 8.15995339e+00, 2.26871589e+01,
           4.73612029e-03, 7.09223721e-02, 3.06455099e-02, 7.10726478e-01,
           7.03392022e-01, 7.07873597e-01, 7.06030923e-01, 7.04728204e-01,
           7.09420909e-01, 1.90740659e-01, 4.75502285e-02, 2.77188320e-02,
           7.08891412e-01, 7.05214134e-01, 7.09786887e-01, 7.04371833e-01,
           7.04371110e-01, 7.09828420e-01, 3.33589977e-01, 5.20857790e-02,
           2.84763136e-02, 7.02210626e-01, 7.11815232e-01, 7.10512240e-01,
           7.03646004e-01, 7.08017286e-01, 7.06162814e-01, 2.12569430e-05,
           2.35019125e-05, 2.04211110e-05, 7.51048890e-02, 3.94254400e-01,
           7.11351099e-02])
ssX.mean_ = np.array([ 4.95458585e+03,  5.67411891e-02,  3.83176945e-02,  2.97223474e+00,
           6.29733979e-02,  3.50074471e-02,  6.72845676e-01,  9.92794768e+00,
           9.99628430e-01,  5.39591547e-02,  2.92795061e-02,  2.12480714e-03,
          -1.01500319e-02,  1.82667162e-02,  1.00813201e-02,  5.74404197e-03,
           6.86570242e-03,  1.25316320e+00,  4.76946516e-02,  2.71326280e-02,
           7.02054326e-03,  9.83378673e-03, -5.70616748e-03,  5.50782881e-03,
          -8.44213953e-04,  2.05958338e-03,  1.57866569e+00,  4.31476211e-02,
           2.73316392e-02,  1.05505555e-02,  1.03922250e-02,  7.36865006e-03,
          -6.00523246e-04,  6.53016990e-03, -1.72038113e-03,  1.24807860e-05,
           1.60314173e-05,  1.21732696e-05,  5.67292645e-03,  1.92488263e-01,
           5.08607199e-03])
ssX.var_ = ssX.scale_**2

In [3]:
version = 24880
model = spock_reg_model.load(version)
feature_nn = model.feature_nn

input_linear = feature_nn.linear.weight * feature_nn.mask
input_linear = input_linear.detach().numpy()
if feature_nn.linear.bias is not None:
    input_bias = feature_nn.linear.bias.detach().numpy()
else:
    input_bias = np.zeros(input_linear.shape[0])

In [4]:
def get_nonzero(arr):
    return arr[arr.nonzero()], arr.nonzero()

def set_nonzero(arr, values, indices):
    arr[indices] = values

In [5]:
input_linear_scaled = input_linear.copy()
for i in range(input_linear.shape[0]):
    values, indices = get_nonzero(input_linear_scaled[i])
    values_scaled = values / ssX.scale_[indices]
    print(values_scaled)
    set_nonzero(input_linear_scaled[i], values_scaled, indices)

[ 14.04819614 -34.99083446]
[  209.78123596 85414.8542688 ]
[-4.01863051e+00  2.70515748e+04]
[-1.10101459  0.76371067]
[-27.54581049  13.96021603]
[ 1.46652031 29.11841953]
[37.52963759 13.10458969]
[-4.91574017e+00  5.84305225e+04]
[21.14366905 -4.47539705]
[9.57226289e-01 6.46782448e+04]


In [6]:
def simplify(x, y, epsilon=0.1, zeroing_allowed=True):
    if x == 0 and y == 0:
        return (0, 0, 1), 0
    if x == 0:
        return (0, 1, 1/y), 0
    if y == 0:
        return (1, 0, 1/x), 0

    best_simplification, best_magnitude, best_error = None, None, None
    possible_values = list(range(-10, 11))
    candidate_ratios = [(a, c) for a in possible_values for c in possible_values]

    for a, b in candidate_ratios:
        if not zeroing_allowed and (a == 0 or b == 0):
            continue

        k = 1
        if a != 0:
            k = x / a
        if b != 0 and (a == 0 or abs(y) > abs(x)):
            k = y / b

        if k < 0: continue
        x2, y2 = k * a, k * b

        error = abs(x - x2) + abs(y - y2)
        # should be measured with the normalized values, not the original.
        if error >= epsilon: continue

        magnitude = abs(a) + abs(b)

        if best_error is None or magnitude < best_magnitude or magnitude == best_magnitude and error < best_error:
            best_simplification, best_error, best_magnitude = (a, b, x2, y2), error, magnitude

    return best_simplification, best_error

In [7]:
def simplify_stuff(input_linear, epsilon=0.1, normalize=True, zeroing_allowed=True):
    input_linear2 = input_linear.copy()
    for i in range(input_linear.shape[0]):
        nonzero, indices = get_nonzero(input_linear[i])
        x, y = nonzero
        print('original:\t', f'{x:.3f} {y:.3f}')

        if normalize:
            l1 = abs(x) + abs(y)
            x, y = x / l1, y / l1
            print('normalized:\t', f'{x:.3f} {y:.3f}')

        simplification, error = simplify(x, y, epsilon=epsilon, zeroing_allowed=zeroing_allowed)
        if simplification is None:
            print("no simplification found")
            nonzero = [x, y]
        else:
            a, b, x2, y2 = simplification
            print("new values:\t", f"{x2:.3f} {y2:.3f}", "with error", f"{error:.3f}")
            print("ratio:\t\t", f"{a} {b}")
            nonzero = [x2, y2]

            if normalize:
                l1 = abs(x2) + abs(y2)
                x2, y2 = x2 / l1, y2 / l1
                print("final normed:\t", f"{x2:.3f} {y2:.3f}")
                nonzero = [x2, y2]


        print()
        set_nonzero(input_linear2[i], nonzero, indices)

    input_linear2 = torch.tensor(input_linear2)
    feature_nn.linear.weight = torch.nn.Parameter(input_linear2)

    s = '24880_feature_nn_simplified_v4_'
    # if normalize:
        # s += 'norm_'
    # if not zeroing_allowed:
        # s += 'nozero_'
    s += f'eps={epsilon}.pt'

    torch.save(feature_nn, s)
    print(f'saved to', s)

In [14]:
simplify_stuff(input_linear_scaled, epsilon=0.001, normalize=True, zeroing_allowed=False)

original:	 14.048 -34.991
normalized:	 0.286 -0.714
no simplification found

original:	 209.781 85414.852
normalized:	 0.002 0.998
no simplification found

original:	 -4.019 27051.574
normalized:	 -0.000 1.000
no simplification found

original:	 -1.101 0.764
normalized:	 -0.590 0.410
no simplification found

original:	 -27.546 13.960
normalized:	 -0.664 0.336
no simplification found

original:	 1.467 29.118
normalized:	 0.048 0.952
no simplification found

original:	 37.530 13.105
normalized:	 0.741 0.259
no simplification found

original:	 -4.916 58430.523
normalized:	 -0.000 1.000
no simplification found

original:	 21.144 -4.475
normalized:	 0.825 -0.175
no simplification found

original:	 0.957 64678.246
normalized:	 0.000 1.000
no simplification found

saved to 24880_feature_nn_simplified_v4_eps=0.001.pt


In [9]:
# test whether matching to numbers is just luck
ratios = np.linspace(0, 1, 1001)

In [10]:
ratios[0:10]

array([0.   , 0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008,
       0.009])

In [11]:
ratios[-10:]

array([0.991, 0.992, 0.993, 0.994, 0.995, 0.996, 0.997, 0.998, 0.999,
       1.   ])

In [12]:
N = 10000
ratios = np.random.rand(N)

for epsilon in [0.1, 0.01, 0.005, 0.001, 0.0001]:
    simplifiable = 0
    total = len(ratios)

    for r in ratios:
        x = 1
        y = r * x
        simplification, error = simplify(x, y, epsilon=epsilon, zeroing_allowed=False)
        if simplification is not None:
            simplifiable += 1

    percent_simplifiable = simplifiable / total
    print(epsilon, percent_simplifiable)

0.1 1.0
0.01 0.579
0.005 0.3197
0.001 0.0636
0.0001 0.006


In [None]:
# conclusion: we want to be simplifying with epsilon = 0.001 to make the simplifications significant. But at that level, we don't get any simplified coefficients.