In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import sys
sys.path.append("../")

import gurobipy
from json import dumps, loads
from time import time

import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression as skLogisticRegression
from sklearn.metrics import (classification_report, f1_score, precision_score, recall_score)
from tqdm import tnrange, trange
import tensorflow as tf

from mlsql.influence import InfluenceRanker
from mlsql.fixer import AutoFixer
from mlsql.manager import ModelManagerLM
from mlsql.manager_test import ModelManagerTest

from models.simple_cnn import SimpleCNN
from models.logreg import LogReg
from models.linear_comb import LinearComb
from models.linear_comb_test import LinearCombTest
from processors.mnistbinary_5 import MnistBinaryProcessor


import logging
logging.getLogger("tensorflow").setLevel(logging.CRITICAL)

import time
import altair as alt
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [2]:
# @tf.function
def rank_fix(ranker, fixer, n):
    rank = ranker.predict()
    fixer.fix(rank, n)
    return rank

@tf.function
def rankit(ranker):
    rank = ranker.predict()
    return rank


@tf.function
def fixit(fixer, rank, n):
    fixer.fix(rank, n)


# @tf.function
def train(manager):
    manager.fit()

In [3]:
proc = MnistBinaryProcessor()

In [4]:
model = LogReg(1)
manager0 = ModelManagerLM(proc.x_train, proc.y_corr, model, 256)
start = time.time()
manager0.fit(print_value=True, tol=1e-5, lr=0.1, max_iter=2000)
print(time.time() - start)
manager0.report(proc.x_train, proc.y_corr, proc.x_test, proc.y_test)
manager0.report(proc.x_train, proc.y_corr, proc.x_query, proc.y_query)

SGD loss: tf.Tensor(0.44957373, shape=(), dtype=float32)
SGD steps: 587
106.88467240333557
Model name: LogReg
On Training
               precision    recall  f1-score   support

         0.0       0.78      0.91      0.84     40293
         1.0       0.50      0.27      0.35     13707

    accuracy                           0.75     54000
   macro avg       0.64      0.59      0.59     54000
weighted avg       0.71      0.75      0.72     54000

On Testing
               precision    recall  f1-score   support

         0.0       0.58      1.00      0.73      4926
         1.0       0.99      0.29      0.44      5074

    accuracy                           0.64     10000
   macro avg       0.78      0.64      0.59     10000
weighted avg       0.78      0.64      0.58     10000

Model name: LogReg
On Training
               precision    recall  f1-score   support

         0.0       0.78      0.91      0.84     40293
         1.0       0.50      0.27      0.35     13707

    accuracy   

In [5]:
K = 13706
corrsel = proc.corrsel
print(len(list(np.where(corrsel)[0])))

13706


In [6]:
from tqdm.notebook import tnrange, trange
manager = ModelManagerLM(proc.x_train, proc.y_corr, LogReg(1), 256)
manager.model.set_weights(manager0.model.get_weights())
manager.delta = tf.Variable(manager0.delta.value(), name="delta")
ranker = InfluenceRanker(manager=manager, on=proc.complain)
fixer = AutoFixer(manager, corrsel, K)

AQs = []
weighted_f1 = []
rank_list = []
rank_time_rain = 0
model_time_rain = 0
AQ = proc.complain(manager).AQ
# f1 = f1_score(proc.y_test.numpy(), manager.model.predict(proc.x_test).numpy(), average='weighted')
f1 = f1_score(proc.y_query.numpy(), manager.model.predict(proc.x_query).numpy(), average='weighted')
AQs.append(float(AQ))
weighted_f1.append(f1)

step_size = 13706
rain_k = int(np.ceil(K / step_size))
for k in trange(0, rain_k):
    nfix = min(step_size, K - step_size * k)
    assert nfix > 0

    start = time.time()
    rank = rank_fix(ranker, fixer, nfix)
    middle = time.time()
#     manager.fit(max_iter=1000, print_value=True, lr=0.1, tol=1e-5)
    end = time.time()
    
    rank_list.append(rank.numpy())
    rank_time_rain += middle - start
    model_time_rain += end - middle

    AQ = proc.complain(manager).AQ
#     f1 = f1_score(proc.y_test.numpy(), manager.model.predict(proc.x_test).numpy(), average='weighted')
    f1 = f1_score(proc.y_query.numpy(), manager.model.predict(proc.x_query).numpy(), average='weighted')
    AQs.append(float(AQ))
    weighted_f1.append(f1)

print("Rank_time:", rank_time_rain)
print("Model_time:", model_time_rain)
AC = proc.complain(manager).AC

df_rain = pd.DataFrame({
    "Complain": np.array(AQs) - AC,
    "F1": np.array(weighted_f1),
    "K": list(range(0, K, step_size)) + [K],
    "Method": np.repeat("Rain", len(AQs)),
})
alt.Chart(pd.concat([df_rain])).mark_line().encode(
    alt.X('K:Q', axis=alt.Axis(tickCount=df_rain.shape[0], grid=False)),
    alt.Y("Complain:Q"),
    color = "Method"
)

  0%|          | 0/1 [00:00<?, ?it/s]

Rank_time: 557.5538785457611
Model_time: 2.384185791015625e-06


In [7]:
np.concatenate((fixer.recall_k()[0::1000], fixer.recall_k()[-1:]))

array([7.29607471e-05, 7.09908069e-02, 1.40522399e-01, 2.08886619e-01,
       2.76813075e-01, 3.43791040e-01, 4.08507223e-01, 4.71837152e-01,
       5.30862396e-01, 5.87844740e-01, 6.40741281e-01, 6.88019845e-01,
       7.32088137e-01, 7.69444039e-01, 7.91551145e-01])

In [16]:
# LinearCombTest
model = LinearCombTest(1)
manager_test0 = ModelManagerTest(proc.x_a_train, proc.x_b_train, proc.y_corr, model, 256)
start = time.time()
manager_test0.fit(print_value=True, tol=1e-10, lr=0.5, max_iter=100000)
print(time.time() - start)
manager_test0.report(proc.x_a_train, proc.x_b_train, proc.y_corr, proc.x_a_test, proc.x_b_test, proc.y_test)
manager_test0.report(proc.x_a_train, proc.x_b_train, proc.y_corr, proc.x_a_query, proc.x_b_query, proc.y_query)

SGD loss: tf.Tensor(0.072627865, shape=(), dtype=float32)
SGD steps: 46871
5778.531663894653
Model name: LinearCombTest
On Training
               precision    recall  f1-score   support

         0.0       0.79      0.90      0.84     40293
         1.0       0.50      0.31      0.38     13707

    accuracy                           0.75     54000
   macro avg       0.65      0.60      0.61     54000
weighted avg       0.72      0.75      0.73     54000

On Testing
               precision    recall  f1-score   support

         0.0       0.58      0.99      0.73      4926
         1.0       0.98      0.31      0.47      5074

    accuracy                           0.65     10000
   macro avg       0.78      0.65      0.60     10000
weighted avg       0.78      0.65      0.60     10000

Model name: LinearCombTest
On Training
               precision    recall  f1-score   support

         0.0       0.79      0.90      0.84     40293
         1.0       0.50      0.31      0.38     1370

In [17]:
manager_test = ModelManagerTest(proc.x_a_train, proc.x_b_train, proc.y_corr, LinearCombTest(1), 256)
manager_test.model.set_weights(manager_test0.model.get_weights())
manager_test.delta = tf.Variable(manager_test0.delta.value(), name="delta")
ranker = InfluenceRanker(manager=manager_test, on=proc.test_complain)
fixer = AutoFixer(manager_test, proc.corrsel, K)

AQs = []
weighted_f1 = []
rank_list = []
rank_time_rain = 0
model_time_rain = 0
AQ = proc.test_complain(manager_test).AQ
# f1 = f1_score(proc.y_test.numpy(), manager.model.predict(proc.x_test).numpy(), average='weighted')
f1 = f1_score(proc.y_query.numpy(), manager_test.model.predict(proc.x_a_query, proc.x_b_query).numpy(), average='weighted')
AQs.append(float(AQ))
weighted_f1.append(f1)

step_size = 13706
rain_k = int(np.ceil(K / step_size))
for k in trange(0, rain_k):
    nfix = min(step_size, K - step_size * k)
    assert nfix > 0

    start = time.time()
    rank = rank_fix(ranker, fixer, nfix)
    middle = time.time()
#     manager_test.fit(max_iter=5000, tol=1e-8, lr=0.1, print_value=True)
    end = time.time()
    
    rank_list.append(rank.numpy())
    rank_time_rain += middle - start
    model_time_rain += end - middle

    AQ = proc.test_complain(manager_test).AQ
#     f1 = f1_score(proc.y_test.numpy(), manager.model.predict(proc.x_test).numpy(), average='weighted')
    f1 = f1_score(proc.y_query.numpy(), manager_test.model.predict(proc.x_a_query, proc.x_b_query).numpy(), average='weighted')
    AQs.append(float(AQ))
    weighted_f1.append(f1)

print("Rank_time:", rank_time_rain)
print("Model_time:", model_time_rain)
AC = proc.test_complain(manager_test).AC

df_rain_test = pd.DataFrame({
    "Complain": np.array(AQs) - AC,
    "F1": np.array(weighted_f1),
    "K": list(range(0, K, step_size)) + [K],
    "Method": np.repeat("Rain", len(AQs)),
})
alt.Chart(pd.concat([df_rain_test])).mark_line().encode(
    alt.X('K:Q', axis=alt.Axis(tickCount=df_rain_test.shape[0], grid=False)),
    alt.Y("Complain:Q"),
    color = "Method"
)

  0%|          | 0/1 [00:00<?, ?it/s]

Rank_time: 492.27111172676086
Model_time: 3.337860107421875e-06


In [18]:
np.concatenate((fixer.recall_k()[0::1000], fixer.recall_k()[-1:]))

array([0.        , 0.05807675, 0.11739384, 0.17882679, 0.24113527,
       0.30373559, 0.3652415 , 0.42725814, 0.48890997, 0.54757041,
       0.60345834, 0.65693857, 0.70604115, 0.75229826, 0.77951262])