In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import sys
sys.path.append("../")

import gurobipy
from json import dumps, loads
from time import time

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression as skLogisticRegression
from sklearn.metrics import (classification_report, f1_score, precision_score, recall_score)
from tqdm import tnrange, trange
import tensorflow as tf

from mlsql.influence import InfluenceRanker
from mlsql.fixer import AutoFixer
from mlsql.manager import ModelManagerLM
from mlsql.manager_test import ModelManagerTest

from models.simple_cnn import SimpleCNN
from models.logreg import LogReg
from models.linear_comb import LinearComb
from models.linear_comb_test import LinearCombTest
from processors.diabetes import DiabetesProcessor
from processors.diabetes_corruption import DiabetesCorrProcessor


import logging
logging.getLogger("tensorflow").setLevel(logging.CRITICAL)

import time
import altair as alt
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [2]:
# @tf.function
def rank_fix(ranker, fixer, n):
    rank = ranker.predict()
    fixer.fix(rank, n)
    return rank

@tf.function
def rankit(ranker):
    rank = ranker.predict()
    return rank


@tf.function
def fixit(fixer, rank, n):
    fixer.fix(rank, n)


# @tf.function
def train(manager):
    manager.fit()

In [3]:
proc = DiabetesCorrProcessor()

In [4]:
# Init
# proc = DiabetesCorrProcessor()
model = LogReg(1)
manager0 = ModelManagerLM(proc.x_train, proc.y_corr, model, 256)
start = time.time()
manager0.fit(print_value=True, tol=1e-7, max_iter=10000)
print(time.time() - start)
manager0.report(proc.x_train, proc.y_corr, proc.x_test, proc.y_test)
# manager0.report(proc.x_train, proc.y_corr, proc.x_query, proc.y_query)

KeyboardInterrupt: 

In [37]:
# K = 2600
# corrsel = tf.cast(tf.ones(proc.y_train.shape[0]), dtype='bool')

In [63]:
corrsel = proc.corrsel
K = len(list(np.where(corrsel)[0]))
print(len(list(np.where(corrsel)[0])))

51


In [68]:
from tqdm.notebook import tnrange, trange
manager = ModelManagerLM(proc.x_train, proc.y_corr, LogReg(1), 256)
manager.model.set_weights(manager0.model.get_weights())
manager.delta = tf.Variable(manager0.delta.value(), name="delta")
# ranker = InfluenceRanker(manager=manager, on=proc.complain)
from mlsql.loss_ranker import LossRanker
ranker = LossRanker(manager=manager_test)
fixer = AutoFixer(manager, corrsel, K)

AQs = []
weighted_f1 = []
rank_list = []
rank_time_rain = 0
model_time_rain = 0
# AQ = proc.complain(manager).AQ
f1 = f1_score(proc.y_test.numpy(), manager.model.predict(proc.x_test).numpy(), average='weighted')
# f1 = f1_score(proc.y_query.numpy(), manager.model.predict(proc.x_query).numpy(), average='weighted')
# AQs.append(float(AQ))
weighted_f1.append(f1)

step_size = 10
rain_k = int(np.ceil(K / step_size))
for k in trange(0, rain_k):
    nfix = min(step_size, K - step_size * k)
    assert nfix > 0

    start = time.time()
    rank = rank_fix(ranker, fixer, nfix)
    middle = time.time()
    manager.fit(max_iter=5000, tol=1e-7, print_value=True)
    end = time.time()
    
    rank_list.append(rank.numpy())
    rank_time_rain += middle - start
    model_time_rain += end - middle

#     AQ = proc.complain(manager).AQ
    f1 = f1_score(proc.y_test.numpy(), manager.model.predict(proc.x_test).numpy(), average='weighted')
#     f1 = f1_score(proc.y_query.numpy(), manager.model.predict(proc.x_query).numpy(), average='weighted')
#     AQs.append(float(AQ))
    weighted_f1.append(f1)

print("Rank_time:", rank_time_rain)
print("Model_time:", model_time_rain)
AC = proc.complain(manager).AC

df_rain_test = pd.DataFrame({
#     "Complain": np.array(AQs) - AC,
    "F1": np.array(weighted_f1),
    "K": list(range(0, K, step_size)) + [K],
    "Method": np.repeat("Rain", len(weighted_f1)),
})
alt.Chart(pd.concat([df_rain])).mark_line().encode(
    alt.X('K:Q', axis=alt.Axis(tickCount=df_rain.shape[0], grid=False)),
    alt.Y("Complain:Q"),
    color = "Method"
)

  0%|          | 0/6 [00:00<?, ?it/s]

SGD loss: tf.Tensor(0.5310756, shape=(), dtype=float32)
SGD steps: 2215
SGD loss: tf.Tensor(0.5266568, shape=(), dtype=float32)
SGD steps: 1558
SGD loss: tf.Tensor(0.5287845, shape=(), dtype=float32)
SGD steps: 1918
SGD loss: tf.Tensor(0.5258005, shape=(), dtype=float32)
SGD steps: 1104
SGD loss: tf.Tensor(0.5253502, shape=(), dtype=float32)
SGD steps: 1533
SGD loss: tf.Tensor(0.5267223, shape=(), dtype=float32)
SGD steps: 1
Rank_time: 0.05564761161804199
Model_time: 47.7028694152832


NameError: name 'df_rain' is not defined

In [72]:
alt.Chart(pd.concat([df_rain_test])).mark_line().encode(
    alt.X('K:Q', axis=alt.Axis(tickCount=df_rain_test.shape[0], grid=False)),
    alt.Y("F1:Q"),
    color = "Method"
)

In [76]:
weighted_f1

[0.6408968390100466,
 0.6408968390100466,
 0.6408968390100466,
 0.6408968390100466,
 0.6408968390100466,
 0.6610219845513963,
 0.6610219845513963]

In [73]:
rain_fiftypercent_recall = fixer.recall_k()

In [74]:
rain_fiftypercent_recall

array([0.        , 0.00840336, 0.01680672, 0.02521008, 0.03361345,
       0.04201681, 0.05042017, 0.05882353, 0.05882353, 0.06722689,
       0.07563025, 0.08403361, 0.09243697, 0.09243697, 0.10084034,
       0.1092437 , 0.11764706, 0.12605042, 0.13445378, 0.14285714,
       0.14285714, 0.1512605 , 0.15966387, 0.15966387, 0.16806723,
       0.17647059, 0.18487395, 0.19327731, 0.20168067, 0.21008403,
       0.21008403, 0.21008403, 0.21848739, 0.22689076, 0.23529412,
       0.24369748, 0.25210084, 0.2605042 , 0.26890756, 0.26890756,
       0.26890756, 0.27731092, 0.28571429, 0.29411765, 0.29411765,
       0.30252101, 0.31092437, 0.31932773, 0.32773109, 0.33613445,
       0.33613445, 0.34453782, 0.34453782, 0.35294118, 0.35294118,
       0.36134454, 0.3697479 , 0.37815126, 0.38655462, 0.39495798,
       0.39495798, 0.39495798, 0.39495798, 0.40336134, 0.41176471,
       0.42016807, 0.42857143, 0.43697479, 0.43697479, 0.43697479,
       0.43697479, 0.44537815, 0.45378151, 0.46218487, 0.47058

In [526]:
print(sorted(fixer.deletions.numpy()))
print(list(np.where(corrsel)[0]))
len(list(np.where(corrsel)[0]))

[3, 4, 6, 7, 12, 13, 15, 16, 19, 24, 26, 29, 38, 40, 41, 44, 50, 57, 61, 66, 67, 69, 80, 83, 93, 97, 100, 101, 102, 103, 105, 108, 111, 112, 114, 115, 117, 122, 126, 129, 136, 146, 152, 153, 155, 156, 157, 160, 162, 163, 164, 165, 166, 170, 172, 174, 175, 178, 181, 187, 188, 189, 190, 197, 199, 204, 209, 211, 214, 224, 229, 231, 233, 234, 235, 242, 244, 246, 250, 251, 260, 261, 263, 266, 267, 269, 270, 272, 275, 279, 283, 284, 289, 290, 295, 299, 303, 304, 307, 319, 320, 321, 322, 323, 324, 325, 327, 328, 330, 333, 335, 340, 344, 345, 346, 348, 350, 351, 352]
[0, 3, 4, 7, 11, 12, 13, 15, 16, 19, 23, 26, 29, 33, 40, 41, 43, 44, 50, 54, 56, 57, 61, 62, 74, 75, 80, 83, 85, 87, 92, 96, 100, 102, 105, 106, 108, 111, 116, 117, 123, 127, 129, 134, 136, 139, 146, 148, 150, 156, 157, 162, 163, 165, 170, 172, 174, 175, 177, 178, 181, 185, 187, 188, 194, 199, 204, 209, 210, 213, 214, 220, 222, 226, 227, 228, 231, 234, 241, 242, 244, 246, 251, 257, 259, 260, 261, 263, 270, 272, 279, 280, 283, 284,

119

In [42]:
proc = DiabetesCorrProcessor()

In [43]:
# LinearCombTest
model = LinearCombTest(1)
manager_test0 = ModelManagerTest(proc.x_a_train, proc.x_b_train, proc.y_corr, model, 256)
start = time.time()
manager_test0.fit(print_value=True, tol=1e-8, lr=0.1, max_iter=40000)
print(time.time() - start)
manager_test0.report(proc.x_a_train, proc.x_b_train, proc.y_corr, proc.x_a_test, proc.x_b_test, proc.y_test)
manager_test0.report(proc.x_a_train, proc.x_b_train, proc.y_corr, proc.x_a_query, proc.x_b_query, proc.y_query)

SGD loss: tf.Tensor(0.090167016, shape=(), dtype=float32)
SGD steps: 27464
221.64013290405273
Model name: LinearCombTest
On Training
               precision    recall  f1-score   support

         0.0       0.76      0.85      0.80       233
         1.0       0.62      0.47      0.54       120

    accuracy                           0.72       353
   macro avg       0.69      0.66      0.67       353
weighted avg       0.71      0.72      0.71       353

On Testing
               precision    recall  f1-score   support

         0.0       0.58      0.90      0.70        21
         1.0       0.82      0.39      0.53        23

    accuracy                           0.64        44
   macro avg       0.70      0.65      0.62        44
weighted avg       0.70      0.64      0.61        44

Model name: LinearCombTest
On Training
               precision    recall  f1-score   support

         0.0       0.76      0.85      0.80       233
         1.0       0.62      0.47      0.54       1

In [44]:
corrsel = proc.corrsel
K = len(list(np.where(corrsel)[0]))
print(len(list(np.where(corrsel)[0])))

51


In [49]:
manager_test = ModelManagerTest(proc.x_a_train, proc.x_b_train, proc.y_corr, LinearCombTest(1), 256)
manager_test.model.set_weights(manager_test0.model.get_weights())
manager_test.delta = tf.Variable(manager_test0.delta.value(), name="delta")
from mlsql.loss_ranker import LossRanker
ranker = LossRanker(manager=manager_test)
# ranker = InfluenceRanker(manager=manager_test, on=proc.test_complain)
fixer = AutoFixer(manager_test, proc.corrsel, K)

# AQs = []
weighted_f1 = []
rank_list = []
rank_time_rain = 0
model_time_rain = 0
# AQ = proc.test_complain(manager_test).AQ
f1 = f1_score(proc.y_test.numpy(), manager_test.model.predict(proc.x_a_test, proc.x_b_test).numpy(), average='weighted')
# f1 = f1_score(proc.y_query.numpy(), manager_test.model.predict(proc.x_a_query, proc.x_b_query).numpy(), average='weighted')
# AQs.append(float(AQ))
weighted_f1.append(f1)

step_size = 10
rain_k = int(np.ceil(K / step_size))
for k in trange(0, rain_k):
    nfix = min(step_size, K - step_size * k)
    assert nfix > 0

    start = time.time()
    rank = rank_fix(ranker, fixer, nfix)
    middle = time.time()
    manager_test.fit(max_iter=20000, tol=1e-8, print_value=True)
    end = time.time()
    
    rank_list.append(rank.numpy())
    rank_time_rain += middle - start
    model_time_rain += end - middle

#     AQ = proc.test_complain(manager_test).AQ
    f1 = f1_score(proc.y_test.numpy(), manager_test.model.predict(proc.x_a_test, proc.x_b_test).numpy(), average='weighted')
#     f1 = f1_score(proc.y_query.numpy(), manager_test.model.predict(proc.x_a_query, proc.x_b_query).numpy(), average='weighted')
#     AQs.append(float(AQ))
    weighted_f1.append(f1)

print("Rank_time:", rank_time_rain)
print("Model_time:", model_time_rain)
# AC = proc.test_complain(manager_test).AC

df_rain_test = pd.DataFrame({
#     "Complain": np.array(AQs) - AC,
    "F1": np.array(weighted_f1),
    "K": list(range(0, K, step_size)) + [K],
    "Method": np.repeat("Rain", len(weighted_f1)),
})
alt.Chart(pd.concat([df_rain_test])).mark_line().encode(
    alt.X('K:Q', axis=alt.Axis(tickCount=df_rain_test.shape[0], grid=False)),
    alt.Y("F1:Q"),
    color = "Method"
)

 17%|█▋        | 1/6 [02:01<10:09, 121.98s/it]

SGD loss: tf.Tensor(0.08117104, shape=(), dtype=float32)
SGD steps: 15457


 33%|███▎      | 2/6 [04:00<07:59, 119.89s/it]

SGD loss: tf.Tensor(0.07295228, shape=(), dtype=float32)
SGD steps: 14692


 50%|█████     | 3/6 [05:52<05:49, 116.41s/it]

SGD loss: tf.Tensor(0.064613916, shape=(), dtype=float32)
SGD steps: 14086


 67%|██████▋   | 4/6 [08:27<04:23, 131.73s/it]

SGD loss: tf.Tensor(0.056171227, shape=(), dtype=float32)
SGD steps: 19999


 83%|████████▎ | 5/6 [11:02<02:20, 140.07s/it]

SGD loss: tf.Tensor(0.04884723, shape=(), dtype=float32)
SGD steps: 19999


100%|██████████| 6/6 [13:34<00:00, 135.73s/it]

SGD loss: tf.Tensor(0.047705185, shape=(), dtype=float32)
SGD steps: 19125
Rank_time: 0.05276131629943848
Model_time: 814.3235311508179





In [56]:
fixer.deletions

<tf.Variable 'Variable:0' shape=(51,) dtype=int32, numpy=
array([ 92,  96,   6, 134,  75, 351,  43,  93,  33, 139, 163, 319, 280,
        94, 313, 328, 318, 160, 271,  69, 260, 100, 204,  26,  76,  10,
       257, 116,  54, 259, 186, 213, 143,  71,  80, 220, 164, 332, 292,
       165, 131,   3, 123, 325,  12, 199, 339, 150, 166, 178, 331],
      dtype=int32)>

In [57]:
np.where(proc.corrsel)

(array([  0,  11,  12,  16,  29,  44,  50,  56,  62,  74,  80,  85,  87,
        100, 105, 106, 108, 127, 148, 156, 157, 163, 165, 175, 177, 178,
        185, 187, 194, 199, 210, 222, 226, 227, 241, 244, 251, 260, 279,
        283, 286, 289, 315, 316, 321, 325, 329, 334, 340, 350, 352]),)

In [58]:
fixer.recall_k()

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.01960784, 0.01960784, 0.01960784, 0.01960784, 0.01960784,
       0.01960784, 0.01960784, 0.01960784, 0.01960784, 0.01960784,
       0.03921569, 0.05882353, 0.05882353, 0.05882353, 0.05882353,
       0.05882353, 0.05882353, 0.05882353, 0.05882353, 0.05882353,
       0.05882353, 0.05882353, 0.05882353, 0.05882353, 0.07843137,
       0.07843137, 0.07843137, 0.07843137, 0.07843137, 0.09803922,
       0.09803922, 0.09803922, 0.09803922, 0.11764706, 0.1372549 ,
       0.15686275, 0.15686275, 0.15686275, 0.15686275, 0.17647059,
       0.17647059])

In [55]:
f1_score(proc.y_query.numpy(), manager_test.model.predict(proc.x_a_query, proc.x_b_query).numpy(), average='weighted')

0.6630380999505195

In [78]:
lc_fifpercent_recall

array([0.00840336, 0.01680672, 0.02521008, 0.03361345, 0.04201681,
       0.05042017, 0.05042017, 0.05882353, 0.05882353, 0.06722689,
       0.07563025, 0.08403361, 0.08403361, 0.09243697, 0.10084034,
       0.1092437 , 0.11764706, 0.12605042, 0.13445378, 0.13445378,
       0.13445378, 0.14285714, 0.1512605 , 0.1512605 , 0.15966387,
       0.15966387, 0.16806723, 0.17647059, 0.18487395, 0.19327731,
       0.19327731, 0.20168067, 0.21008403, 0.21008403, 0.21848739,
       0.22689076, 0.23529412, 0.24369748, 0.25210084, 0.2605042 ,
       0.26890756, 0.27731092, 0.28571429, 0.29411765, 0.30252101,
       0.30252101, 0.30252101, 0.31092437, 0.31932773, 0.31932773,
       0.32773109, 0.33613445, 0.34453782, 0.34453782, 0.35294118,
       0.36134454, 0.36134454, 0.3697479 , 0.37815126, 0.38655462,
       0.39495798, 0.40336134, 0.41176471, 0.41176471, 0.41176471,
       0.42016807, 0.42016807, 0.42857143, 0.43697479, 0.44537815,
       0.45378151, 0.45378151, 0.45378151, 0.46218487, 0.47058

In [272]:
# from mlsql.lc_protocol import fit as lc_fit
# from mlsql.lc_protocol import fit_test as lc_fit_test
# from mlsql.lc_protocol import report as lc_report
# from models.linear_comb import LinearComb
# model0_a = LinearComb(1)
# manager0_a = ModelManagerLM(proc.x_a_train, proc.y_train, model0_a, 256)
# model0_b = LinearComb(1)
# manager0_b = ModelManagerLM(proc.x_b_train, proc.y_train, model0_b, 256)
# # enc_time, cpu_time = lc_fit_test(manager0_a, manager0_b, max_iter=2000, tol=1e-6, lr=0.5, print_value=True)
# enc_time, cpu_time = lc_fit(manager0_a, manager0_b, max_iter=1000, tol=1e-6, lr=0.5, print_value=True)
# print(enc_time, cpu_time)
# lc_report(manager0_a, manager0_b, proc.x_a_train, proc.x_b_train, proc.y_train, 
#           proc.x_a_test, proc.x_b_test, proc.y_test)


In [384]:
from mlsql.lc_protocol import fit as lc_fit
from mlsql.lc_protocol import fit_test as lc_fit_test
from mlsql.lc_protocol import report as lc_report
from models.linear_comb import LinearComb
model0_a = LinearComb(1)
manager0_a = ModelManagerLM(proc.x_a_train, proc.y_corr, model0_a, 256)
model0_b = LinearComb(1)
manager0_b = ModelManagerLM(proc.x_b_train, proc.y_corr, model0_b, 256)
# enc_time, cpu_time = lc_fit_test(manager0_a, manager0_b, max_iter=2000, tol=1e-6, lr=0.5, print_value=True)
enc_time, cpu_time = lc_fit(manager0_a, manager0_b, max_iter=20000, tol=1e-6, lr=0.5, print_value=True)
print(enc_time, cpu_time)
lc_report(manager0_a, manager0_b, proc.x_a_train, proc.x_b_train, proc.y_corr, 
          proc.x_a_test, proc.x_b_test, proc.y_test)
lc_report(manager0_a, manager0_b, proc.x_a_train, proc.x_b_train, proc.y_corr, 
          proc.x_a_query, proc.x_b_query, proc.y_query)

SGD loss: tf.Tensor(0.090409495, shape=(), dtype=float32)
SGD steps: 3010
0.5648119449615479 66.21756076812744
Model A name: LinearComb
Model B name: LinearComb
On Training
               precision    recall  f1-score   support

         0.0       0.75      0.76      0.76       199
         1.0       0.69      0.67      0.68       154

    accuracy                           0.72       353
   macro avg       0.72      0.72      0.72       353
weighted avg       0.72      0.72      0.72       353

On Testing
               precision    recall  f1-score   support

         0.0       0.75      0.86      0.80        21
         1.0       0.85      0.74      0.79        23

    accuracy                           0.80        44
   macro avg       0.80      0.80      0.80        44
weighted avg       0.80      0.80      0.80        44

Model A name: LinearComb
Model B name: LinearComb
On Training
               precision    recall  f1-score   support

         0.0       0.75      0.76      0.7

In [357]:
from mlsql.lc_protocol import f1 as lc_f1
from mlsql.lc_protocol import rank_fix as lc_rank_fix
from mlsql.lc_protocol import complain_value

model_a = LinearComb(1)
manager_a = ModelManagerLM(proc.x_a_train, proc.y_corr, model_a, 256)
manager_a.model.set_weights(model0_a.get_weights())
manager_a.delta = tf.Variable(manager0_a.delta.value(), name="delta")
model_b = LinearComb(1)
manager_b = ModelManagerLM(proc.x_b_train, proc.y_corr, model_b, 256)
manager_b.model.set_weights(model0_b.get_weights())
manager_b.delta = tf.Variable(manager0_b.delta.value(), name="delta")

fixer_a = AutoFixer(manager_a, corrsel, K)
fixer_b = AutoFixer(manager_b, corrsel, K)

AQs = []
weighted_f1 = []
rank_list = []
rank_time_rain = 0
model_time_enc, model_time_rain = 0, 0
query_data = proc.query_data()
AQ = proc.lc_complain(manager_a, manager_b).AQ
# AQ = complain_value(manager_a, manager_b, proc.fl_complain, query_data)
# f1 = lc_f1(manager_a, manager_b, proc.x_a_test, proc.x_b_test, proc.y_test)
f1 = lc_f1(manager_a, manager_b, proc.x_a_query, proc.x_b_query, proc.y_query)
AQs.append(float(AQ))
weighted_f1.append(f1)

step_size = 5
rain_k = int(np.ceil(K / step_size))
for k in trange(0, rain_k):
    nfix = min(step_size, K - step_size * k)
    assert nfix > 0

    start = time.time()
#     rank = lc_rank_fix(fixer_a, fixer_b, nfix, manager_a, manager_b, proc.lc_complain, query_data)
    rank = lc_rank_fix(fixer_a, fixer_b, nfix, manager_a, manager_b, proc.fl_complain, query_data)
    middle = time.time()
#     enc_time, cpu_time = lc_fit_test(manager_a, manager_b, max_iter=1000, tol=1e-6, lr=0.5, print_value=True)
    enc_time, cpu_time = lc_fit(manager_a, manager_b, max_iter=1000, tol=1e-6, lr=0.1, print_value=True)
    end = time.time()
    
    rank_list.append(rank.numpy())
    rank_time_rain += middle - start
    model_time_rain += cpu_time
    model_time_enc += enc_time

    AQ = proc.lc_complain(manager_a, manager_b).AQ
#     AQ = complain_value(manager_a, manager_b, proc.fl_complain, query_data)
#     f1 = lc_f1(manager_a, manager_b, proc.x_a_test, proc.x_b_test, proc.y_test)
    f1 = lc_f1(manager_a, manager_b, proc.x_a_query, proc.x_b_query, proc.y_query)
    AQs.append(float(AQ))
    weighted_f1.append(f1)

print("Rank_time:", rank_time_rain)
print("Retrain_cpu_time:", model_time_rain)
print("Retrain_enc_time:", model_time_enc)

df_rain = pd.DataFrame({
    "Complain": np.array(AQs) - AC,
    "F1": np.array(weighted_f1),
    "K": list(range(0, K, step_size)) + [K],
    "Method": np.repeat("LC_Rain", len(AQs)),
})
alt.Chart(pd.concat([df_rain])).mark_line().encode(
    alt.X('K:Q', axis=alt.Axis(tickCount=df_rain.shape[0], grid=False)),
    alt.Y("Complain:Q"),
    color = "Method"
)

  0%|          | 0/4 [00:00<?, ?it/s]

SGD loss: tf.Tensor(0.095387205, shape=(), dtype=float32)
SGD steps: 1
SGD loss: tf.Tensor(0.09413851, shape=(), dtype=float32)
SGD steps: 236
SGD loss: tf.Tensor(0.093245216, shape=(), dtype=float32)
SGD steps: 215
SGD loss: tf.Tensor(0.0927603, shape=(), dtype=float32)
SGD steps: 196
Rank_time: 20.50556206703186
Retrain_cpu_time: 14.095456600189209
Retrain_enc_time: 0.11449408531188965


In [337]:
fixer_a.recall_k()

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.05882353, 0.05882353, 0.05882353, 0.05882353,
       0.05882353, 0.05882353, 0.11764706, 0.11764706, 0.11764706,
       0.11764706, 0.17647059])

In [338]:
print(sorted(fixer_a.deletions.numpy()))
print(list(np.where(corrsel)[0]))

[7, 12, 15, 44, 47, 61, 69, 80, 83, 166, 172, 174, 246, 260, 304, 309, 327]
[11, 16, 44, 62, 80, 106, 148, 175, 187, 210, 222, 227, 260, 289, 325, 334, 340]


In [318]:
alt.Chart(pd.concat([df_rain])).mark_line().encode(
    alt.X('K:Q', axis=alt.Axis(tickCount=df_rain.shape[0], grid=False)),
    alt.Y("F1:Q"),
    color = "Method"
)

In [None]:
K = 210
step_size = 10
AQs = [-0.16054906056013812, -0.15944249602805974, -0.1580060542769359, -0.15719313225969206, 
       -0.15582570537932483, -0.1567545207834361, -0.15714041809657475, -0.1569623401963131, 
       -0.15633308345656335, -0.15564630866652324, -0.15353626425437816, -0.1547844818297688, 
       -0.15538986484225104, -0.15167052494689795, -0.14868695509097463, -0.14861514040232315, 
       -0.15188442283612708, -0.15251549676939935, -0.153599294588957, -0.15507196712521656, 
       -0.15592721175194144, -0.1586298157054136][:16]
F1 = [0.6959071067570164, 0.7307927546459656, 0.7410932220234546, 0.7533179132935328, 
      0.7619985710883543, 0.7645716278311145, 0.7670189931472592, 0.7661391240338609, 
      0.7661391240338609, 0.7715647347693169, 0.7724421209858103, 0.7692101643752349, 
      0.7692101643752349, 0.7692101643752349, 0.7659901074535221, 0.7659901074535221, 
      0.7659901074535221, 0.7659901074535221, 0.7659901074535221, 0.7692101643752349, 
      0.7692101643752349, 0.7692101643752349][:16]
    
df_flrain = pd.DataFrame({
    "Complain": np.array(AQs),
    "F1": np.array(F1),
    "K": list(range(0, K + step_size, step_size))[:16],
    "Method": np.repeat("LC_Rain", len(AQs)),
})
alt.Chart(pd.concat([df_flrain])).mark_line().encode(
    alt.X('K:Q', axis=alt.Axis(tickCount=df_flrain.shape[0], grid=False)),
    alt.Y("Complain:Q", scale=alt.Scale(domain=[min(AQs),max(AQs)])),
    color = "Method"
)

In [None]:
alt.Chart(pd.concat([df_flrain])).mark_line().encode(
    alt.X('K:Q', axis=alt.Axis(tickCount=df_flrain.shape[0], grid=False)),
    alt.Y("F1:Q", scale=alt.Scale(domain=[0.65, 0.85])),
    color = "Method"
)

In [None]:
from mlsql.lc_protocol import report, predict, hessian
predict(manager_a, manager_b, proc.x_a_test, proc.x_b_test)

In [None]:
np.where(manager_a.delta.numpy() == 0)

In [None]:
np.where(manager_b.delta.numpy() == 0)

In [None]:
np.where(manager.delta.numpy() == 0)

In [None]:
from models.linear_comb import LinearComb
from mlsql.utils.utils import pack
len(pack(manager_a.variables))

In [None]:
manager0_b.egrads().shape[1]