In [130]:
%load_ext autoreload
%autoreload
import inlp_dataset_handler
import inlp
import inlp_linear_model
import numpy as np
import sklearn
from sklearn.linear_model import LinearRegression, Ridge, SGDClassifier
from sklearn.svm import LinearSVC, SVR

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Classification

In [86]:
x_train, x_dev = np.random.rand(1000,100) - 0.5, np.random.rand(1000,100) - 0.5
y_train, y_dev = np.sum(x_train, axis = 1) > 0, np.sum(x_dev, axis = 1) > 0

inlp_dataset = inlp_dataset_handler.ClassificationDatasetHandler(x_train, y_train, x_dev, y_dev, dropout_rate = 0, Y_train_main = None, Y_dev_main = None, by_class = False, equal_chance_for_main_task_labels = False)

inlp_model_handler = inlp_linear_model.SKlearnClassifier(LinearSVC, {"dual": False})

In [87]:
P, rowspace_projections, Ws = inlp.run_INLP(num_classifiers = 5, input_dim = 100, is_autoregressive = True, min_accuracy = 0, dataset_handler = inlp_dataset, model = inlp_model_handler)


  0%|          | 0/5 [00:00<?, ?it/s][A
iteration: 0, accuracy: 0.944:   0%|          | 0/5 [00:00<?, ?it/s][A
iteration: 1, accuracy: 0.49:   0%|          | 0/5 [00:00<?, ?it/s] [A
iteration: 1, accuracy: 0.49:  40%|████      | 2/5 [00:00<00:00, 13.34it/s][A
iteration: 2, accuracy: 0.507:  40%|████      | 2/5 [00:00<00:00, 13.34it/s][A
iteration: 3, accuracy: 0.518:  40%|████      | 2/5 [00:00<00:00, 13.34it/s][A
iteration: 3, accuracy: 0.518:  80%|████████  | 4/5 [00:00<00:00, 14.20it/s][A
iteration: 4, accuracy: 0.541: 100%|██████████| 5/5 [00:00<00:00, 13.68it/s][A


## Sanity checks

In [88]:
def do_sanity_check(P, Ws, x_train):

    assert np.allclose(P.dot(P), P)
    assert np.allclose(Ws[0].dot(P.dot(x_train[0])), 0.0)

    for w in Ws:
        for w2 in Ws:
            if w is w2: continue
            assert np.allclose(w.dot(w2.T).item(), 0.0)
            
do_sanity_check(P, Ws, x_train)

## Siamese

In [131]:
x_train1, x_train2 = np.random.rand(1000,100) - 0.5,  np.random.rand(1000,100) - 0.5,
x_dev1, x_dev2 =  np.random.rand(1000,100) - 0.5, np.random.rand(1000,100) - 0.5

y_train = (np.sign(np.sum(x_train1, axis = 1)) ==  np.sign(np.sum(x_train2, axis = 1))).astype(int)
y_dev = (np.sign(np.sum(x_dev1, axis = 1)) ==  np.sign(np.sum(x_dev2, axis = 1))).astype(int)

inlp_dataset = inlp_dataset_handler.SiameseDatasetHandler((x_train1, x_train2), y_train, (x_dev1, x_dev2), y_dev, dropout_rate = 0, Y_train_main = None, Y_dev_main = None, by_class = False, equal_chance_for_main_task_labels = False)
params = {"num_iter": 25, "input_dim": 100, "hidden_dim": 32, "batch_size": 64}
inlp_model_handler = inlp_linear_model.SiameseLinearClassifier(model_params = params)

In [132]:
#inlp_model_handler.train_model(inlp_dataset)
P, rowspace_projections, Ws = inlp.run_INLP(num_classifiers = 3, input_dim = 100, is_autoregressive = True, min_accuracy = 0, dataset_handler = inlp_dataset, model = inlp_model_handler)



  0%|          | 0/3 [00:00<?, ?it/s][A[AINFO:root:         Name               Type Params
0          l1             Linear    3 K
1          l2             Linear    3 K
2  cosine_sim   CosineSimilarity    0  
3     loss_fn  BCEWithLogitsLoss    0  
Validation sanity check:   0%|          | 0/5 [00:00<?, ?batch/s]

[A[A                               
Epoch 1:  50%|█████     | 16/32 [00:00<00:00, 99.54batch/s, batch_nb=15, loss=0.698, v_nb=70]
Epoch 1:  72%|███████▏  | 23/32 [00:00<00:00, 106.81batch/s, batch_nb=15, loss=0.698, v_nb=70]
Epoch 1: 100%|██████████| 32/32 [00:00<00:00, 106.81batch/s, batch_nb=15, loss=0.698, v_nb=70]
Epoch 2:  50%|█████     | 16/32 [00:00<00:00, 96.56batch/s, batch_nb=15, loss=0.686, v_nb=70] 
Epoch 2:  62%|██████▎   | 20/32 [00:00<00:00, 102.22batch/s, batch_nb=15, loss=0.686, v_nb=70]
Epoch 2: 100%|██████████| 32/32 [00:00<00:00, 102.22batch/s, batch_nb=15, loss=0.686, v_nb=70]
Epoch 3:  50%|█████     | 16/32 [00:00<00:00, 101.26batch/s, batch_nb=1

Epoch 2:  62%|██████▎   | 20/32 [00:00<00:00, 94.50batch/s, batch_nb=15, loss=0.689, v_nb=71]
Epoch 2: 100%|██████████| 32/32 [00:00<00:00, 94.50batch/s, batch_nb=15, loss=0.689, v_nb=71]
Epoch 3:  50%|█████     | 16/32 [00:00<00:00, 86.04batch/s, batch_nb=15, loss=0.685, v_nb=71]
Epoch 3:  59%|█████▉    | 19/32 [00:00<00:00, 88.57batch/s, batch_nb=15, loss=0.685, v_nb=71]
Epoch 3: 100%|██████████| 32/32 [00:00<00:00, 88.57batch/s, batch_nb=15, loss=0.685, v_nb=71]
Epoch 4:  50%|█████     | 16/32 [00:00<00:00, 90.62batch/s, batch_nb=15, loss=0.682, v_nb=71]
Epoch 4: 100%|██████████| 32/32 [00:00<00:00, 101.68batch/s, batch_nb=15, loss=0.682, v_nb=71]
Epoch 5:  50%|█████     | 16/32 [00:00<00:00, 102.86batch/s, batch_nb=15, loss=0.679, v_nb=71]
Epoch 5:  75%|███████▌  | 24/32 [00:00<00:00, 108.48batch/s, batch_nb=15, loss=0.679, v_nb=71]
Epoch 5: 100%|██████████| 32/32 [00:00<00:00, 108.48batch/s, batch_nb=15, loss=0.679, v_nb=71]
Epoch 6:  50%|█████     | 16/32 [00:00<00:00, 106.44batc

Epoch 7:  72%|███████▏  | 23/32 [00:00<00:00, 103.61batch/s, batch_nb=15, loss=0.641, v_nb=72]
Epoch 7: 100%|██████████| 32/32 [00:00<00:00, 103.61batch/s, batch_nb=15, loss=0.641, v_nb=72]
Epoch 8:  50%|█████     | 16/32 [00:00<00:00, 101.76batch/s, batch_nb=15, loss=0.628, v_nb=72]
Epoch 8:  72%|███████▏  | 23/32 [00:00<00:00, 106.47batch/s, batch_nb=15, loss=0.628, v_nb=72]
Epoch 8: 100%|██████████| 32/32 [00:00<00:00, 106.47batch/s, batch_nb=15, loss=0.628, v_nb=72]
Epoch 9:  50%|█████     | 16/32 [00:00<00:00, 107.31batch/s, batch_nb=15, loss=0.616, v_nb=72]
Epoch 9: 100%|██████████| 32/32 [00:00<00:00, 116.22batch/s, batch_nb=15, loss=0.616, v_nb=72]
Epoch 10:  50%|█████     | 16/32 [00:00<00:00, 112.34batch/s, batch_nb=15, loss=0.604, v_nb=72]
Epoch 10: 100%|██████████| 32/32 [00:00<00:00, 121.82batch/s, batch_nb=15, loss=0.604, v_nb=72]
Epoch 11:  50%|█████     | 16/32 [00:00<00:00, 116.00batch/s, batch_nb=15, loss=0.594, v_nb=72]
Epoch 11: 100%|██████████| 32/32 [00:00<00:00, 

## note that the cosine/l2 distance loss is no longer convex, so w_i.dot(w_j) != 0

In [133]:
Ws[0].dot(Ws[1].T)

array([[-0.02524728, -0.0030968 ,  0.10105404, ..., -0.04164061,
        -0.04635901, -0.04721296],
       [-0.015494  , -0.0843562 , -0.04074406, ...,  0.04730713,
         0.04409238, -0.03214298],
       [-0.04279207,  0.04437556, -0.01076072, ..., -0.05580216,
        -0.05694133, -0.00659285],
       ...,
       [ 0.02467375, -0.08991509, -0.14390472, ..., -0.00712462,
         0.02172526,  0.12529522],
       [-0.01994726, -0.00422698,  0.08100867, ..., -0.03052652,
        -0.08544648, -0.03796824],
       [-0.03969179,  0.02470817, -0.0938241 , ...,  0.02075064,
         0.07615001, -0.08765589]], dtype=float32)

In [134]:
np.linalg.norm(P.dot(P) - P)

7.442013957321746e-06