In [2]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import sys
sys.path.append("../")

import gurobipy
from json import dumps, loads
from time import time

import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression as skLogisticRegression
from sklearn.metrics import (classification_report, f1_score, precision_score, recall_score)
from tqdm import tnrange, trange
import tensorflow as tf

from mlsql.influence import InfluenceRanker
from mlsql.fixer import AutoFixer
from mlsql.manager import ModelManagerLM
from mlsql.manager_test import ModelManagerTest

from models.simple_cnn import SimpleCNN
from models.logreg import LogReg
from models.linear_comb import LinearComb
from models.linear_comb_test import LinearCombTest
from processors.mnistbinary import MnistBinaryProcessor


import logging
logging.getLogger("tensorflow").setLevel(logging.CRITICAL)

import time
import altair as alt
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [3]:
# @tf.function
def rank_fix(ranker, fixer, n):
    rank = ranker.predict()
    fixer.fix(rank, n)
    return rank

@tf.function
def rankit(ranker):
    rank = ranker.predict()
    return rank


@tf.function
def fixit(fixer, rank, n):
    fixer.fix(rank, n)


# @tf.function
def train(manager):
    manager.fit()

In [4]:
proc = MnistBinaryProcessor()

In [5]:
model = LogReg(1)
manager0 = ModelManagerLM(proc.x_train, proc.y_corr, model, 256)
start = time.time()
manager0.fit(print_value=True, tol=1e-5, lr=0.1, max_iter=2000)
print(time.time() - start)
manager0.report(proc.x_train, proc.y_corr, proc.x_test, proc.y_test)
manager0.report(proc.x_train, proc.y_corr, proc.x_query, proc.y_query)

SGD loss: tf.Tensor(0.45520002, shape=(), dtype=float32)
SGD steps: 888
160.40223050117493
Model name: LogReg
On Training
               precision    recall  f1-score   support

         0.0       0.82      0.82      0.82     34810
         1.0       0.68      0.68      0.68     19190

    accuracy                           0.77     54000
   macro avg       0.75      0.75      0.75     54000
weighted avg       0.77      0.77      0.77     54000

On Testing
               precision    recall  f1-score   support

         0.0       0.75      0.98      0.85      4926
         1.0       0.97      0.69      0.81      5074

    accuracy                           0.83     10000
   macro avg       0.86      0.83      0.83     10000
weighted avg       0.86      0.83      0.83     10000

Model name: LogReg
On Training
               precision    recall  f1-score   support

         0.0       0.82      0.82      0.82     34810
         1.0       0.68      0.68      0.68     19190

    accuracy   

In [6]:
K = 8223
corrsel = proc.corrsel
print(len(list(np.where(corrsel)[0])))

8223


In [7]:
from tqdm.notebook import tnrange, trange
manager = ModelManagerLM(proc.x_train, proc.y_corr, LogReg(1), 256)
manager.model.set_weights(manager0.model.get_weights())
manager.delta = tf.Variable(manager0.delta.value(), name="delta")
ranker = InfluenceRanker(manager=manager, on=proc.complain)
fixer = AutoFixer(manager, corrsel, K)

AQs = []
weighted_f1 = []
rank_list = []
rank_time_rain = 0
model_time_rain = 0
AQ = proc.complain(manager).AQ
# f1 = f1_score(proc.y_test.numpy(), manager.model.predict(proc.x_test).numpy(), average='weighted')
f1 = f1_score(proc.y_query.numpy(), manager.model.predict(proc.x_query).numpy(), average='weighted')
AQs.append(float(AQ))
weighted_f1.append(f1)

step_size = 8223
rain_k = int(np.ceil(K / step_size))
for k in trange(0, rain_k):
    nfix = min(step_size, K - step_size * k)
    assert nfix > 0

    start = time.time()
    rank = rank_fix(ranker, fixer, nfix)
    middle = time.time()
#     manager.fit(max_iter=1000, print_value=True, lr=0.1, tol=1e-5)
    end = time.time()
    
    rank_list.append(rank.numpy())
    rank_time_rain += middle - start
    model_time_rain += end - middle

    AQ = proc.complain(manager).AQ
#     f1 = f1_score(proc.y_test.numpy(), manager.model.predict(proc.x_test).numpy(), average='weighted')
    f1 = f1_score(proc.y_query.numpy(), manager.model.predict(proc.x_query).numpy(), average='weighted')
    AQs.append(float(AQ))
    weighted_f1.append(f1)

print("Rank_time:", rank_time_rain)
print("Model_time:", model_time_rain)
AC = proc.complain(manager).AC

df_rain = pd.DataFrame({
    "Complain": np.array(AQs) - AC,
    "F1": np.array(weighted_f1),
    "K": list(range(0, K, step_size)) + [K],
    "Method": np.repeat("Rain", len(AQs)),
})
alt.Chart(pd.concat([df_rain])).mark_line().encode(
    alt.X('K:Q', axis=alt.Axis(tickCount=df_rain.shape[0], grid=False)),
    alt.Y("Complain:Q"),
    color = "Method"
)

  0%|          | 0/1 [00:00<?, ?it/s]

Rank_time: 551.1078689098358
Model_time: 1.6689300537109375e-06


In [8]:
np.concatenate((fixer.recall_k()[0::1000], fixer.recall_k()[-1:]))

array([1.21610118e-04, 1.12124529e-01, 2.23032956e-01, 3.28712149e-01,
       4.29526937e-01, 5.22558677e-01, 6.07320929e-01, 6.83570473e-01,
       7.42916211e-01, 7.55563663e-01])

In [24]:
# LinearCombTest
model = LinearCombTest(1)
manager_test0 = ModelManagerTest(proc.x_a_train, proc.x_b_train, proc.y_corr, model, 256)
start = time.time()
manager_test0.fit(print_value=True, tol=1e-10, lr=0.5, max_iter=100000)
print(time.time() - start)
manager_test0.report(proc.x_a_train, proc.x_b_train, proc.y_corr, proc.x_a_test, proc.x_b_test, proc.y_test)
manager_test0.report(proc.x_a_train, proc.x_b_train, proc.y_corr, proc.x_a_query, proc.x_b_query, proc.y_query)

SGD loss: tf.Tensor(0.070950076, shape=(), dtype=float32)
SGD steps: 61585
7602.085188627243
Model name: LinearCombTest
On Training
               precision    recall  f1-score   support

         0.0       0.89      0.77      0.83     34810
         1.0       0.67      0.83      0.74     19190

    accuracy                           0.79     54000
   macro avg       0.78      0.80      0.79     54000
weighted avg       0.81      0.79      0.80     54000

On Testing
               precision    recall  f1-score   support

         0.0       0.84      0.96      0.90      4926
         1.0       0.95      0.83      0.89      5074

    accuracy                           0.89     10000
   macro avg       0.90      0.89      0.89     10000
weighted avg       0.90      0.89      0.89     10000

Model name: LinearCombTest
On Training
               precision    recall  f1-score   support

         0.0       0.89      0.77      0.83     34810
         1.0       0.67      0.83      0.74     1919

In [None]:
manager_test = ModelManagerTest(proc.x_a_train, proc.x_b_train, proc.y_corr, LinearCombTest(1), 256)
manager_test.model.set_weights(manager_test0.model.get_weights())
manager_test.delta = tf.Variable(manager_test0.delta.value(), name="delta")
ranker = InfluenceRanker(manager=manager_test, on=proc.test_complain)
fixer = AutoFixer(manager_test, proc.corrsel, K)

AQs = []
weighted_f1 = []
rank_list = []
rank_time_rain = 0
model_time_rain = 0
AQ = proc.test_complain(manager_test).AQ
# f1 = f1_score(proc.y_test.numpy(), manager.model.predict(proc.x_test).numpy(), average='weighted')
f1 = f1_score(proc.y_query.numpy(), manager_test.model.predict(proc.x_a_query, proc.x_b_query).numpy(), average='weighted')
AQs.append(float(AQ))
weighted_f1.append(f1)

step_size = 1000
rain_k = int(np.ceil(K / step_size))
for k in trange(0, rain_k):
    nfix = min(step_size, K - step_size * k)
    assert nfix > 0

    start = time.time()
    rank = rank_fix(ranker, fixer, nfix)
    middle = time.time()
    manager_test.fit(max_iter=5000, tol=1e-8, lr=0.1, print_value=True)
    end = time.time()
    
    rank_list.append(rank.numpy())
    rank_time_rain += middle - start
    model_time_rain += end - middle

    AQ = proc.test_complain(manager_test).AQ
#     f1 = f1_score(proc.y_test.numpy(), manager.model.predict(proc.x_test).numpy(), average='weighted')
    f1 = f1_score(proc.y_query.numpy(), manager_test.model.predict(proc.x_a_query, proc.x_b_query).numpy(), average='weighted')
    AQs.append(float(AQ))
    weighted_f1.append(f1)

print("Rank_time:", rank_time_rain)
print("Model_time:", model_time_rain)
AC = proc.test_complain(manager_test).AC

df_rain_test = pd.DataFrame({
    "Complain": np.array(AQs) - AC,
    "F1": np.array(weighted_f1),
    "K": list(range(0, K, step_size)) + [K],
    "Method": np.repeat("Rain", len(AQs)),
})
alt.Chart(pd.concat([df_rain_test])).mark_line().encode(
    alt.X('K:Q', axis=alt.Axis(tickCount=df_rain_test.shape[0], grid=False)),
    alt.Y("Complain:Q"),
    color = "Method"
)

In [25]:
manager_test = ModelManagerTest(proc.x_a_train, proc.x_b_train, proc.y_corr, LinearCombTest(1), 256)
manager_test.model.set_weights(manager_test0.model.get_weights())
manager_test.delta = tf.Variable(manager_test0.delta.value(), name="delta")
ranker = InfluenceRanker(manager=manager_test, on=proc.test_complain)
fixer = AutoFixer(manager_test, proc.corrsel, K)

AQs = []
weighted_f1 = []
rank_list = []
rank_time_rain = 0
model_time_rain = 0
AQ = proc.test_complain(manager_test).AQ
# f1 = f1_score(proc.y_test.numpy(), manager.model.predict(proc.x_test).numpy(), average='weighted')
f1 = f1_score(proc.y_query.numpy(), manager_test.model.predict(proc.x_a_query, proc.x_b_query).numpy(), average='weighted')
AQs.append(float(AQ))
weighted_f1.append(f1)

step_size = 8223
rain_k = int(np.ceil(K / step_size))
for k in trange(0, rain_k):
    nfix = min(step_size, K - step_size * k)
    assert nfix > 0

    start = time.time()
    rank = rank_fix(ranker, fixer, nfix)
    middle = time.time()
#     manager_test.fit(max_iter=5000, tol=1e-8, lr=0.1, print_value=True)
    end = time.time()
    
    rank_list.append(rank.numpy())
    rank_time_rain += middle - start
    model_time_rain += end - middle

    AQ = proc.test_complain(manager_test).AQ
#     f1 = f1_score(proc.y_test.numpy(), manager.model.predict(proc.x_test).numpy(), average='weighted')
    f1 = f1_score(proc.y_query.numpy(), manager_test.model.predict(proc.x_a_query, proc.x_b_query).numpy(), average='weighted')
    AQs.append(float(AQ))
    weighted_f1.append(f1)

print("Rank_time:", rank_time_rain)
print("Model_time:", model_time_rain)
AC = proc.test_complain(manager_test).AC

df_rain_test = pd.DataFrame({
    "Complain": np.array(AQs) - AC,
    "F1": np.array(weighted_f1),
    "K": list(range(0, K, step_size)) + [K],
    "Method": np.repeat("Rain", len(AQs)),
})
alt.Chart(pd.concat([df_rain_test])).mark_line().encode(
    alt.X('K:Q', axis=alt.Axis(tickCount=df_rain_test.shape[0], grid=False)),
    alt.Y("Complain:Q"),
    color = "Method"
)

  0%|          | 0/1 [00:00<?, ?it/s]

Rank_time: 505.00672936439514
Model_time: 2.1457672119140625e-06


In [26]:
np.concatenate((fixer.recall_k()[0::1000], fixer.recall_k()[-1:]))

array([0.        , 0.08707284, 0.19275204, 0.29247233, 0.39243585,
       0.48996717, 0.58336374, 0.66508574, 0.73768698, 0.75337468])