In [75]:
%load_ext autoreload
%autoreload
import inlp_dataset_handler
import inlp
import inlp_linear_model
import numpy as np
import sklearn
from sklearn.linear_model import LinearRegression, Ridge, SGDClassifier
from sklearn.svm import LinearSVC, SVR

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Classification

In [8]:
x_train, x_dev = np.random.rand(1000,100) - 0.5, np.random.rand(1000,100) - 0.5
y_train, y_dev = np.sum(x_train, axis = 1) > 0, np.sum(x_dev, axis = 1) > 0

inlp_dataset = inlp_dataset_handler.ClassificationDatasetHandler(x_train, y_train, x_dev, y_dev, dropout_rate = 0, Y_train_main = None, Y_dev_main = None, by_class = False, equal_chance_for_main_task_labels = False)

inlp_model_handler = inlp_linear_model.SKlearnClassifier(LinearSVC, {"dual": False})

In [9]:
P, rowspace_projections, Ws = inlp.run_INLP(num_classifiers = 5, input_dim = 100, is_autoregressive = True, min_accuracy = 0, dataset_handler = inlp_dataset, model = inlp_model_handler)

iteration: 4, accuracy: 0.506: 100%|██████████| 5/5 [00:00<00:00, 23.40it/s]


## Sanity checks

In [10]:
def do_sanity_check(P, Ws, x_train):

    assert np.allclose(P.dot(P), P)
    assert np.allclose(Ws[0].dot(P.dot(x_train[0])), 0.0)

    for w in Ws:
        for w2 in Ws:
            if w is w2: continue
            assert np.allclose(w.dot(w2.T).item(), 0.0)
            
do_sanity_check(P, Ws, x_train)

## Siamese

In [76]:
x_train1, x_train2 = np.random.rand(1000,100) - 0.5,  np.random.rand(1000,100) - 0.5,
x_dev1, x_dev2 =  np.random.rand(1000,100) - 0.5, np.random.rand(1000,100) - 0.5

y_train = (np.sign(np.sum(x_train1, axis = 1)) ==  np.sign(np.sum(x_train2, axis = 1))).astype(int)
y_dev = (np.sign(np.sum(x_dev1, axis = 1)) ==  np.sign(np.sum(x_dev2, axis = 1))).astype(int)

inlp_dataset = inlp_dataset_handler.SiameseDatasetHandler((x_train1, x_train2), y_train, (x_dev1, x_dev2), y_dev, dropout_rate = 0, Y_train_main = None, Y_dev_main = None, by_class = False, equal_chance_for_main_task_labels = False)
params = {"num_iter": 25, "input_dim": 100, "hidden_dim": 32, "batch_size": 64, "verbose": False, "device": "cuda",
         "compare_by": "cosine", "same_weights": True}
inlp_model_handler = inlp_linear_model.SiameseLinearClassifier(model_params = params, concat_weights = True)

In [77]:
#inlp_model_handler.train_model(inlp_dataset)
P, rowspace_projections, Ws = inlp.run_INLP(num_classifiers = 3, input_dim = 100, is_autoregressive = True, min_accuracy = 0, dataset_handler = inlp_dataset, model = inlp_model_handler)


  0%|          | 0/3 [00:00<?, ?it/s][AINFO:root:         Name               Type Params
0          l1             Linear    3 K
1  cosine_sim   CosineSimilarity    0  
2     loss_fn  BCEWithLogitsLoss    0  

iteration: 0, accuracy: 0.866992175579071:   0%|          | 0/3 [00:05<?, ?it/s][A
iteration: 0, accuracy: 0.866992175579071:  33%|███▎      | 1/3 [00:05<00:10,  5.45s/it][AINFO:root:         Name               Type Params
0          l1             Linear    3 K
1  cosine_sim   CosineSimilarity    0  
2     loss_fn  BCEWithLogitsLoss    0  

iteration: 1, accuracy: 0.4925781190395355:  33%|███▎      | 1/3 [00:11<00:10,  5.45s/it][A
iteration: 1, accuracy: 0.4925781190395355:  67%|██████▋   | 2/3 [00:11<00:05,  5.51s/it][AINFO:root:         Name               Type Params
0          l1             Linear    3 K
1  cosine_sim   CosineSimilarity    0  
2     loss_fn  BCEWithLogitsLoss    0  

iteration: 2, accuracy: 0.501171886920929:  67%|██████▋   | 2/3 [00:16<00:05,  5.51s/i

## note that the cosine/l2 distance loss is no longer convex, so w_i.dot(w_j) != 0

In [37]:
Ws[0].dot(Ws[1].T)

array([[ 0.02186606,  0.0346785 , -0.02964997, ...,  0.02251494,
         0.06070557,  0.03205736],
       [-0.00448008, -0.02645985,  0.03417505, ..., -0.00667388,
        -0.01322363, -0.00543375],
       [-0.00049647,  0.01373247, -0.02147561, ...,  0.00711308,
         0.08852425,  0.019908  ],
       ...,
       [ 0.01660385,  0.02621097, -0.01645809, ..., -0.02676257,
        -0.05707281, -0.02487635],
       [-0.00046501,  0.00114721,  0.09579111, ..., -0.02665293,
        -0.15518732, -0.00907945],
       [-0.00971164,  0.05872897, -0.03290517, ...,  0.00484753,
         0.0505405 ,  0.00218303]], dtype=float32)

In [52]:
print(np.linalg.norm(P.dot(P) - P))
print( np.linalg.norm( Ws[-1][:32, :].dot(P.dot(x_train1[0]))) ) # note that the norm is not exactly 0 due to pytorch floating point precision

7.69745433042437e-06
0.000550595020200669
