In [1]:
import torch
import pickle
from matplotlib import pyplot as plt
import numpy as np
import pysr

In [2]:
labels = ['time', 'e+_near', 'e-_near', 'max_strength_mmr_near', 'e+_far', 'e-_far', 'max_strength_mmr_far', 'megno', 'a1', 'e1', 'i1', 'cos_Omega1', 'sin_Omega1', 'cos_pomega1', 'sin_pomega1', 'cos_theta1', 'sin_theta1', 'a2', 'e2', 'i2', 'cos_Omega2', 'sin_Omega2', 'cos_pomega2', 'sin_pomega2', 'cos_theta2', 'sin_theta2', 'a3', 'e3', 'i3', 'cos_Omega3', 'sin_Omega3', 'cos_pomega3', 'sin_pomega3', 'cos_theta3', 'sin_theta3', 'm1', 'm2', 'm3', 'nan_mmr_near', 'nan_mmr_far', 'nan_megno']

# not all of these labels are actually used. for training, these inputs are zeroed out, but still passed in as zeroes.
# ideally, the linear layer ignores them, which does happen if i do l1 regularization to it
skipped = ['nan_mmr_near', 'nan_mmr_far', 'nan_megno', 'e+_near', 'e-_near', 'max_strength_mmr_near', 'e+_far', 'e-_far', 'max_strength_mmr_far', 'megno']

In [6]:
# l1 reg = 2: 95944
# feature_nn = torch.load('models/95944_feature_nn.pt')

# l1 reg = 0.2: 92122
# feature_nn = torch.load('models/92122_feature_nn.pt')

# l1 nonabs reg: 63524
# feature_nn = torch.load('models/63524_feature_nn.pt')

# topk 2 pruned masked linear
feature_nn = torch.load('models/52410_feature_nn.pt')

  from .autonotebook import tqdm as notebook_tqdm


AttributeError: Can't get attribute 'MaskedLinear' on <module 'modules' from '/Users/simon/code/bnn_chaos_model/modules.py'>

In [17]:
# this gives the (n_features, n_inputs) matrix of the linear transformation used as f1
input_linear = feature_nn.weight.detach().numpy()
input_linear.shape

(20, 41)

In [18]:

# m_i is the mean of the i'th feature, s_i is the standard deviation
# get the linear transformation that creates feature i
def linear_transformation(i):
    return input_linear[i]

In [19]:

# let's make the linear transformation a bit easier to read
def format_num(x):
    if abs(x) > 0.1:
        return f'{x:.2f}'
    if abs(x) > 0.01:
        return f'{x:.3f}'
    elif abs(x) > 0.001:
        return f'{x:.4f}'
    else:
        return f'{x:.2e}'

format_vec = np.vectorize(format_num)

In [24]:

# now we can write it as a combination of the input features
# we'll sort the features by their absolute value to make it a bit easier to read
def feature_equation(i):
    transformation = linear_transformation(i)
    bias = feature_nn.bias.detach().numpy()[i]
    sorted_ixs = np.argsort(np.abs(transformation))[::-1]
    return [format_num(bias)] + [format_num(transformation[i]) + ' * ' + labels[i] for i in sorted_ixs if transformation[i] != 0]

In [25]:

for i in range(input_linear.shape[0]):
    print(f'feature {i}:')
    print(' +\n'.join(feature_equation(i)[:10]))
    print('+ ... (smaller terms omitted)')
    print()

feature 0:
-0.0034 +
-2.63 * m1 +
0.47 * a3 +
-0.016 * i3 +
-0.011 * sin_Omega2 +
-0.0096 * i2 +
0.0095 * a2 +
-0.0095 * sin_Omega3 +
-0.0086 * e1 +
0.0067 * sin_Omega1
+ ... (smaller terms omitted)

feature 1:
0.0064 +
1.33 * e3 +
-0.45 * e2 +
0.041 * i2 +
-0.036 * time +
0.023 * cos_Omega1 +
-0.021 * cos_Omega3 +
0.0075 * sin_Omega3 +
0.0073 * cos_pomega1 +
0.0073 * sin_pomega2
+ ... (smaller terms omitted)

feature 2:
0.0082 +
1.31 * e2 +
0.035 * i1 +
0.033 * e1 +
0.022 * e3 +
-0.015 * sin_Omega3 +
-0.014 * cos_pomega3 +
0.012 * time +
0.010 * cos_Omega1 +
0.0071 * sin_pomega3
+ ... (smaller terms omitted)

feature 3:
0.0015 +
-0.37 * sin_Omega2 +
0.17 * sin_Omega1 +
-0.042 * cos_Omega3 +
-0.023 * cos_pomega1 +
0.022 * sin_pomega1 +
0.022 * sin_pomega2 +
-0.019 * i3 +
0.016 * cos_Omega1 +
0.015 * cos_Omega2
+ ... (smaller terms omitted)

feature 4:
5.46e-04 +
3.51 * a1 +
-0.040 * e1 +
-0.013 * i2 +
0.013 * cos_Omega3 +
-0.012 * e3 +
-0.0086 * cos_pomega3 +
-0.0071 * sin_Omega3 +
-0.