In [1]:
import numpy as np
import tensorflow as tf
import logging
tf.get_logger().setLevel(logging.ERROR)

In [2]:
from mil.data.datasets import musk1, musk2, protein, elephant, corel_dogs, \
                              ucsb_breast_cancer, web_recommendation_1, birds_brown_creeper, \
                              mnist_bags

In [3]:
(bags_train, y_train), (bags_test, y_test) = musk1.load()

# Train discriminative mapping

In [4]:
from mil.trainer import Trainer

In [5]:
from mil.metrics import AUC, BinaryAccuracy
from mil.validators import KFold
from mil.trainer.trainer import Trainer
from mil.models import SVC
from mil.bag_representation.mapping import DiscriminativeMapping
from mil.preprocessing import StandarizerBagsList

In [6]:
trainer = Trainer()

metrics = [AUC, BinaryAccuracy]
model = SVC(kernel='linear', C=1, class_weight='balanced')
pipeline = [('scale', StandarizerBagsList()), ('disc_mapping', DiscriminativeMapping(m=30))]

trainer.prepare(model, preprocess_pipeline=pipeline ,metrics=metrics)

In [7]:
valid = KFold(n_splits=10, shuffle=True)

history = trainer.fit(bags_train, y_train, sample_weights='balanced', validation_strategy=valid, verbose=1)



In [8]:
print(np.mean([e['binaryaccuracy'] for e in history['metrics_val']]))
history['metrics_val']

0.65694517


[{'auc': 0.5, 'binaryaccuracy': 0.625},
 {'auc': 0.5, 'binaryaccuracy': 0.625},
 {'auc': 0.6118882, 'binaryaccuracy': 0.625},
 {'auc': 0.6806723, 'binaryaccuracy': 0.68303573},
 {'auc': 0.6842105, 'binaryaccuracy': 0.68928576},
 {'auc': 0.64426875, 'binaryaccuracy': 0.6458334},
 {'auc': 0.6488095, 'binaryaccuracy': 0.6556123},
 {'auc': 0.6480186, 'binaryaccuracy': 0.66294646},
 {'auc': 0.68014705, 'binaryaccuracy': 0.6845238},
 {'auc': 0.6711712, 'binaryaccuracy': 0.6732143}]

In [9]:
# most discriminative instances
trainer.pipeline['disc_mapping'].items_

array([ 48,  28, 243,  73,  24,  54, 248, 316, 246, 314,  91, 284,   9,
        40,  10,  31,  37,  21,  20,  36,  19,  18,  92,  93,  94, 288,
       287, 286, 282, 283], dtype=int64)

In [10]:
trainer.predict_metrics(bags_test, y_test)

{'auc': 0.5, 'binaryaccuracy': 0.57894737}

# Train MILES

In [11]:
from mil.validators import LeaveOneOut
from mil.models import MILES

In [12]:
trainer = Trainer()

metrics = [AUC, BinaryAccuracy]
model = MILES()
pipeline = [('scale', StandarizerBagsList())]

trainer.prepare(model, preprocess_pipeline=pipeline, metrics=metrics)

In [13]:
history = trainer.fit(bags_train, y_train, validation_strategy=LeaveOneOut(), sample_weights='balanced', verbose=1)



In [14]:
print(np.mean([e['binaryaccuracy'] for e in history['metrics_val']]))
#history['metrics_val']

0.6725049


In [15]:
trainer.predict_metrics(bags_test, y_test)

{'auc': 0.6363636, 'binaryaccuracy': 0.57894737}

# Train MILES mapping + SVC

In [16]:
from mil.bag_representation import MILESMapping

In [17]:
trainer = Trainer()

metrics = [AUC, BinaryAccuracy]
model = SVC(kernel='linear', C=1, class_weight='balanced')
pipeline = [('scale', StandarizerBagsList()), ('disc_mapping', MILESMapping())]

trainer.prepare(model, preprocess_pipeline=pipeline ,metrics=metrics)

In [18]:
valid = KFold(n_splits=10, shuffle=True)

history = trainer.fit(bags_train, y_train, sample_weights='balanced', validation_strategy=valid, verbose=1)



In [19]:
print(np.mean([e['binaryaccuracy'] for e in history['metrics_val']]))
history['metrics_val']

0.7616539


[{'auc': 0.8, 'binaryaccuracy': 0.75},
 {'auc': 0.7777778, 'binaryaccuracy': 0.75},
 {'auc': 0.7692308, 'binaryaccuracy': 0.75},
 {'auc': 0.78125, 'binaryaccuracy': 0.77678573},
 {'auc': 0.75, 'binaryaccuracy': 0.7357143},
 {'auc': 0.73913044, 'binaryaccuracy': 0.73214287},
 {'auc': 0.76, 'binaryaccuracy': 0.77040815},
 {'auc': 0.76785713, 'binaryaccuracy': 0.78125},
 {'auc': 0.77272725, 'binaryaccuracy': 0.77380955},
 {'auc': 0.7916666, 'binaryaccuracy': 0.79642856}]

In [20]:
trainer.predict_metrics(bags_test, y_test)

{'auc': 0.72727275, 'binaryaccuracy': 0.68421054}

# Train DeepAttentionMIL

In [21]:
from mil.models.bag_level.deep_attention import AttentionDeepPoolingMil
from mil.utils.utils import get_samples_weight
from mil.utils.padding import Padding

In [22]:
trainer = Trainer()

metrics = [AUC, BinaryAccuracy]
model = AttentionDeepPoolingMil(gated=False, threshold=0.4)
pipeline = [('scale', StandarizerBagsList()), ('padding', Padding())]

trainer.prepare(model, preprocess_pipeline=pipeline ,metrics=metrics)

In [23]:
valid = KFold(n_splits=10, shuffle=True)

history = trainer.fit(bags_train, y_train, validation_strategy=valid, sample_weights='balanced',
                      verbose=1, model__epochs=10, model__batch_size=2, model__verbose=0)



In [24]:
print(np.mean([e['binaryaccuracy'] for e in history['metrics_val']]))
history['metrics_val']

0.8256321


[{'auc': 1.0, 'binaryaccuracy': 0.875},
 {'auc': 1.0, 'binaryaccuracy': 0.875},
 {'auc': 0.8916084, 'binaryaccuracy': 0.8333333},
 {'auc': 0.90208334, 'binaryaccuracy': 0.80357146},
 {'auc': 0.91596633, 'binaryaccuracy': 0.81428576},
 {'auc': 0.909, 'binaryaccuracy': 0.7976191},
 {'auc': 0.9125926, 'binaryaccuracy': 0.8061225},
 {'auc': 0.922235, 'binaryaccuracy': 0.81250006},
 {'auc': 0.9149816, 'binaryaccuracy': 0.81746036},
 {'auc': 0.91216207, 'binaryaccuracy': 0.8214286}]

In [25]:
trainer.predict_metrics(bags_test, y_test)

{'auc': 0.8863636, 'binaryaccuracy': 0.94736844}

In [26]:
# positive instances with more than 0.4 in attention weight
trainer.get_positive_instances(bags_test)

<tf.Tensor: shape=(11, 2), dtype=int64, numpy=
array([[ 0,  0],
       [ 1,  3],
       [ 2,  1],
       [ 3,  0],
       [ 3,  4],
       [ 4,  0],
       [ 5,  3],
       [11,  0],
       [12,  1],
       [14,  0],
       [16,  0]], dtype=int64)>

# Train APR

In [27]:
from mil.models import APR

In [28]:
trainer = Trainer()

metrics = [AUC, BinaryAccuracy]
model = APR(thres=0.5, epsilon=0.05, step=1, verbose=0)
pipeline = [('scale', StandarizerBagsList())]

trainer.prepare(model, preprocess_pipeline=pipeline ,metrics=metrics)

In [29]:
valid = KFold(n_splits=10, shuffle=True)

history = trainer.fit(bags_train, y_train, validation_strategy=valid, sample_weights='balanced',
                      verbose=1, model__epochs=10, model__batch_size=2, model__verbose=0)



In [30]:
print(np.mean([e['binaryaccuracy'] for e in history['metrics_val']]))
history['metrics_val']

0.84615296


[{'auc': 1.0, 'binaryaccuracy': 1.0},
 {'auc': 0.8333333, 'binaryaccuracy': 0.875},
 {'auc': 0.875, 'binaryaccuracy': 0.875},
 {'auc': 0.84375, 'binaryaccuracy': 0.83482146},
 {'auc': 0.825, 'binaryaccuracy': 0.81071436},
 {'auc': 0.8023716, 'binaryaccuracy': 0.7946429},
 {'auc': 0.822963, 'binaryaccuracy': 0.8239797},
 {'auc': 0.8248849, 'binaryaccuracy': 0.82812506},
 {'auc': 0.8030303, 'binaryaccuracy': 0.7996032},
 {'auc': 0.8235736, 'binaryaccuracy': 0.8196429}]

In [31]:
trainer.predict_metrics(bags_test, y_test)

{'auc': 0.5795455, 'binaryaccuracy': 0.6315789}

In [32]:
trainer.get_positive_instances(bags_test)

array([[ 0,  1],
       [ 1,  1],
       [ 2,  0],
       [ 2,  1],
       [ 4,  0],
       [ 5,  1],
       [ 5,  3],
       [ 6,  0],
       [ 7,  0],
       [ 8, 20],
       [ 8, 24],
       [ 8, 31],
       [ 8, 38],
       [10,  0],
       [10,  2],
       [10,  4],
       [10,  6],
       [11,  1],
       [12,  0],
       [12,  1],
       [13,  0],
       [14,  0],
       [14,  1],
       [14,  2],
       [16,  0],
       [16,  1],
       [16,  3],
       [17,  0],
       [17,  3],
       [17,  4],
       [17,  5],
       [17,  8],
       [17, 12],
       [17, 13],
       [18,  1]])

# Train arithmetic mean bag + SVM

In [33]:
from mil.bag_representation import ArithmeticMeanBagRepresentation

In [34]:
trainer = Trainer()

metrics = [AUC, BinaryAccuracy]
model = SVC(kernel='linear', C=1, class_weight='balanced')
pipeline = [('mean_bag', ArithmeticMeanBagRepresentation())]

trainer.prepare(model, preprocess_pipeline=pipeline ,metrics=metrics)

In [35]:
valid = KFold(n_splits=10, shuffle=True)

history = trainer.fit(bags_train, y_train, validation_strategy=valid, sample_weights='balanced',
                      verbose=1)



In [36]:
print(np.mean([e['binaryaccuracy'] for e in history['metrics_val']]))
history['metrics_val']

0.81275237


[{'auc': 0.85714287, 'binaryaccuracy': 0.75},
 {'auc': 0.9090909, 'binaryaccuracy': 0.875},
 {'auc': 0.8666667, 'binaryaccuracy': 0.8333333},
 {'auc': 0.8172269, 'binaryaccuracy': 0.80357146},
 {'auc': 0.8222222, 'binaryaccuracy': 0.81428576},
 {'auc': 0.83, 'binaryaccuracy': 0.8214286},
 {'auc': 0.8333333, 'binaryaccuracy': 0.82653064},
 {'auc': 0.7943548, 'binaryaccuracy': 0.7946429},
 {'auc': 0.80303025, 'binaryaccuracy': 0.80158734},
 {'auc': 0.8085585, 'binaryaccuracy': 0.8071429}]

In [37]:
trainer.predict_metrics(bags_test, y_test)

{'auc': 0.875, 'binaryaccuracy': 0.8947368}

# Train arithmetic mean bag + LogisticRegression

In [38]:
from mil.models import LogisticRegressionCV

In [39]:
trainer = Trainer()

metrics = [AUC, BinaryAccuracy]
model = LogisticRegressionCV(class_weight='balanced', max_iter=1000)
pipeline = [('mean_bag', ArithmeticMeanBagRepresentation())]

trainer.prepare(model, preprocess_pipeline=pipeline ,metrics=metrics)

In [40]:
valid = KFold(n_splits=10, shuffle=True)

history = trainer.fit(bags_train, y_train, validation_strategy=valid, sample_weights='balanced',
                      verbose=1)



In [41]:
print(np.mean([e['binaryaccuracy'] for e in history['metrics_val']]))
history['metrics_val']

0.8263613


[{'auc': 1.0, 'binaryaccuracy': 1.0},
 {'auc': 0.7833333, 'binaryaccuracy': 0.8125},
 {'auc': 0.7916667, 'binaryaccuracy': 0.7916667},
 {'auc': 0.8416667, 'binaryaccuracy': 0.84375},
 {'auc': 0.87222224, 'binaryaccuracy': 0.875},
 {'auc': 0.77766794, 'binaryaccuracy': 0.7767857},
 {'auc': 0.7888889, 'binaryaccuracy': 0.7882653},
 {'auc': 0.77988505, 'binaryaccuracy': 0.77901787},
 {'auc': 0.78768384, 'binaryaccuracy': 0.78769845},
 {'auc': 0.8081832, 'binaryaccuracy': 0.8089286}]

In [42]:
trainer.predict_metrics(bags_test, y_test)

{'auc': 0.8295455, 'binaryaccuracy': 0.84210527}

# Train median mean bag + RandomForest

In [43]:
from mil.models import RandomForestClassifier
from mil.bag_representation import MedianBagRepresentation

In [44]:
trainer = Trainer()

metrics = [AUC, BinaryAccuracy]
model = RandomForestClassifier(class_weight='balanced')
pipeline = [('median_bag', MedianBagRepresentation())]

trainer.prepare(model, preprocess_pipeline=pipeline ,metrics=metrics)

In [45]:
valid = KFold(n_splits=10, shuffle=True)

history = trainer.fit(bags_train, y_train, validation_strategy=valid, sample_weights='balanced',
                      verbose=1)



In [46]:
print(np.mean([e['binaryaccuracy'] for e in history['metrics_val']]))
history['metrics_val']

0.85787773


[{'auc': 1.0, 'binaryaccuracy': 1.0},
 {'auc': 0.95, 'binaryaccuracy': 0.9375},
 {'auc': 0.9615385, 'binaryaccuracy': 0.9583333},
 {'auc': 0.9059829, 'binaryaccuracy': 0.89732146},
 {'auc': 0.78888893, 'binaryaccuracy': 0.77500004},
 {'auc': 0.8221344, 'binaryaccuracy': 0.8125},
 {'auc': 0.80952376, 'binaryaccuracy': 0.7984694},
 {'auc': 0.7971264, 'binaryaccuracy': 0.78794646},
 {'auc': 0.8033088, 'binaryaccuracy': 0.795635},
 {'auc': 0.822072, 'binaryaccuracy': 0.8160715}]

In [47]:
trainer.predict_metrics(bags_test, y_test)

{'auc': 0.8295455, 'binaryaccuracy': 0.84210527}