# Simple Example

First, let's make some basic demo data.

In [1]:
import numpy as np
from tqdm.notebook import tqdm
np.random.seed(0)

def create_fake_data(n_timepoints_train, n_timepoints_test, n_features, n_targets, noise_amounts):
    # Create features
    X_train = np.random.rand(n_timepoints_train, n_features)
    X_test = np.random.rand(n_timepoints_test, n_features)
    true_weights = np.random.randn(n_features, n_targets)

    # Create targets
    Y_train = X_train @ true_weights
    Y_test = X_test @ true_weights

    # Add different amounts of noise
    for target_i in range(n_targets):
        Y_train[:,target_i] += noise_amounts[target_i]  * np.random.randn(n_timepoints_train)
        Y_test[:,target_i] += noise_amounts[target_i] * np.random.randn(n_timepoints_test)
    
    return X_train, X_test, Y_train, Y_test


n_timepoints_train, n_timepoints_test = 500, 100
n_features, n_targets = 10, 3
noise_amounts = [1,5,15]

X_train, X_test, Y_train, Y_test = create_fake_data(
    n_timepoints_train=n_timepoints_train,
    n_timepoints_test=n_timepoints_test,
    n_features=n_features,
    n_targets=n_targets,
    noise_amounts=noise_amounts,
    )

COPTeRR computes permutation tests on already-fit ridge regression models. You can perform this initial fit using a variety of packages, but we recommend Himalaya.

In [2]:
from himalaya.ridge import RidgeCV

model = RidgeCV(alphas=np.logspace(-2, 5, 8))
model.fit(X_train, Y_train)
himalaya_scores = model.score(X_test, Y_test)
print("R2 Score Per Target: ", himalaya_scores)

R2 Score Per Target:  [ 0.50214792  0.0137505  -0.0056149 ]


This performance seems pretty good, but how can we be sure that it's significant? We can determine this by generating a null distribution of chance values using COPTeRR!

In [3]:
from copterr import PermuteWeights

permuter = PermuteWeights(X_train, Y_train, model.best_alphas_)
permuter.prepare()

himalaya_weights = model.coef_
copterr_weights = permuter.fit_true_weights()
print('Weights Equivalent:', np.allclose(himalaya_weights, copterr_weights, atol=1e-5))

copterr_scores = permuter.score(X_test, Y_test).numpy()
print('R2 Scores Equivalent:', np.allclose(himalaya_scores, copterr_scores, atol=1e-5))

Computing Initial SVDs: 100%|██████████| 3/3 [00:00<00:00, 2101.00it/s]

Weights Equivalent: True
R2 Scores Equivalent: True





Now, let's perform a bunch of permutations, and compute p-values for the model's performance.

In [4]:
from copterr.utils import compute_p_values

perm_performance = []
for permutation in tqdm(range(10000), dynamic_ncols=True):
    perm_weights = permuter.fit_permutation(permutation=True)
    perm_r2 = permuter.score(X_test, Y_test, permutation=True)
    perm_performance.append(perm_r2.numpy())

p_values = compute_p_values(copterr_scores, perm_performance)

for target_i in range(n_targets):
    print(f"Target {target_i} R2:", "%0.3f" % copterr_scores[target_i], "    ", "P-Value:", "%0.2f" % p_values[target_i])

  0%|          | 0/10000 [00:00<?, ?it/s]

Target 0 R2: 0.502      P-Value: 0.00
Target 1 R2: 0.014      P-Value: 0.01
Target 2 R2: -0.006      P-Value: 0.11


Feel free to vary the noise_amount variable and check out the effects--lower amounts of noise should yield smaller p-values for our model's performance. 

Also note that model R2 is just a single example--anything you compute from your model's weights can be tested in this manner!