## Machine Learning project
[MI-net, instance space](#section_id)
[MI-net deep supervision, instance space](#MInetdeepsuper)


In [5]:
pip install -r requirements.txt

Collecting git+https://github.com/chlorochrule/cknn (from -r requirements.txt (line 9))
  Cloning https://github.com/chlorochrule/cknn to /tmp/pip-req-build-wsl8xmq4
  Running command git clone --filter=blob:none --quiet https://github.com/chlorochrule/cknn /tmp/pip-req-build-wsl8xmq4
  Resolved https://github.com/chlorochrule/cknn to commit 7d05c5049da72a573bd486fca6647f8b0376243c
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting misvm
  Cloning https://github.com/garydoranjr/misvm.git to /tmp/pip-install-3fnb55yk/misvm_c76dfc420c444166aa160bbde23ddccf
  Running command git clone --filter=blob:none --quiet https://github.com/garydoranjr/misvm.git /tmp/pip-install-3fnb55yk/misvm_c76dfc420c444166aa160bbde23ddccf
  Resolved https://github.com/garydoranjr/misvm.git to commit b2118fe04d98c00436bdf8a0e4bbfb6082c5751c
  Preparing metadata (setup.py) ... [?25ldo



Note: you may need to restart the kernel to use updated packages.


### Embedded-Space


### MI-net
<a id='section_id'></a>

In [6]:
import numpy as np
import sys
import time
import random
from random import shuffle
import argparse

from keras.models import Model
from keras.optimizers import SGD
from keras.regularizers import l2
from keras.layers import Input, Dense, Layer, Dropout

from mil_nets.dataset import load_dataset
from mil_nets.layer import Feature_pooling
from mil_nets.metrics import bag_accuracy
from mil_nets.objectives import bag_loss
from mil_nets.utils import convertToBatch

In [7]:
def test_eval(model, test_set):
    """Evaluate on testing set.
    Parameters
    -----------------
    model : keras.engine.training.Model object
        The training MI-Net model.
    test_set : list
        A list of testing set contains all training bags features and labels.
    Returns
    -----------------
    test_loss : float
        Mean loss of evaluating on testing set.
    test_acc : float
        Mean accuracy of evaluating on testing set.
    """
    num_test_batch = len(test_set)
    test_loss = np.zeros((num_test_batch, 1), dtype=np.float32)
    test_acc = np.zeros((num_test_batch, 1), dtype=np.float32)
    for ibatch, batch in enumerate(test_set):
        result = model.test_on_batch({'input':batch[0].astype(np.float32)}, {'fp':batch[1].astype(np.float32)})
        test_loss[ibatch] = result[0]
        test_acc[ibatch][0] = result[1]
    return np.mean(test_loss), np.mean(test_acc)

def train_eval(model, train_set):
    """Evaluate on training set.
    Parameters
    -----------------
    model : keras.engine.training.Model object
        The training MI-Net model.
    train_set : list
        A list of training set contains all training bags features and labels.
    Returns
    -----------------
    test_loss : float
        Mean loss of evaluating on traing set..astype(np.float32)
    test_acc : float
        Mean accuracy of evaluating on testing set.
    """
    num_train_batch = len(train_set)
    train_loss = np.zeros((num_train_batch, 1), dtype=np.float32)
    train_acc = np.zeros((num_train_batch, 1), dtype=np.float32)
    shuffle(train_set)
    for ibatch, batch in enumerate(train_set):
        result = model.train_on_batch({'input':batch[0].astype(np.float32)}, {'fp':batch[1].astype(np.float32)})
        train_loss[ibatch] = result[0]
        train_acc[ibatch][0] = result[1]
    return np.mean(train_loss), np.mean(train_acc)

def MI_Net(dataset):
    """Train and evaluate on MI-Net.
    Parameters
    -----------------
    dataset : dict
        A dictionary contains all dataset information. We split train/test by keys.
    Returns
    -----------------
    test_acc : float
        Testing accuracy of MI-Net.
    """
    weight_decay=0.005
    init_lr=5e-4
    pooling_mode='max'
    momentum=0.9
    max_epoch=50
    # load data and convert type
    train_bags = dataset['train']
    test_bags = dataset['test']

    # convert bag to batch
    train_set = convertToBatch(train_bags)
    test_set = convertToBatch(test_bags)
    dimension = train_set[0][0].shape[1]

    # data: instance feature, n*d, n = number of training instance
    data_input = Input(shape=(dimension,), dtype='float32', name='input')

    # fully-connected
    fc1 = Dense(256, activation='relu', kernel_regularizer=l2(weight_decay))(data_input)
    fc2 = Dense(128, activation='relu', kernel_regularizer=l2(weight_decay))(fc1)
    fc3 = Dense(64, activation='relu', kernel_regularizer=l2(weight_decay))(fc2)

    # dropout
    dropout = Dropout(rate=0.5)(fc3)

    # features pooling
    fp = Feature_pooling(output_dim=1, kernel_regularizer=l2(weight_decay), pooling_mode=pooling_mode, name='fp')(dropout)

    model = Model(inputs=[data_input], outputs=[fp])
    sgd = SGD(lr=init_lr, decay=1e-4, momentum=momentum, nesterov=True)
    model.compile(loss=bag_loss, optimizer=sgd, metrics=[bag_accuracy])

    # train model
    t1 = time.time()
    num_batch = len(train_set)
    for epoch in range(max_epoch):
        train_loss, train_acc = train_eval(model, train_set)
        test_loss, test_acc = test_eval(model, test_set)
        print('epoch=', epoch, '  train_loss= {:.3f}'.format(train_loss), '  train_acc= {:.3f}'.format(train_acc), '  test_loss={:.3f}'.format(test_loss), '  test_acc= {:.3f}'.format(test_acc))
    t2 = time.time()
    print('run time:', (t2-t1) / 60, 'min')
    print('test_acc={:.3f}'.format(test_acc))

    return test_acc

In [8]:
# perform five times 10-fold cross-validation experiments
run = 5
n_folds = 10
acc = np.zeros((run, n_folds), dtype=np.float32)
for irun in range(run):
    dataset = load_dataset('musk1', n_folds)
    for ifold in range(n_folds):
        print('run=', irun, '  fold=', ifold)
        acc[irun][ifold] = MI_Net(dataset[ifold])
print('MI-Net mean accuracy = ', np.mean(acc))
print('std = ', np.std(acc))

run= 0   fold= 0
epoch= 0   train_loss= 3.015   train_acc= 0.610   test_loss=2.832   test_acc= 0.600
epoch= 1   train_loss= 2.635   train_acc= 0.841   test_loss=2.704   test_acc= 0.800
epoch= 2   train_loss= 2.479   train_acc= 0.915   test_loss=2.624   test_acc= 0.900
epoch= 3   train_loss= 2.404   train_acc= 0.963   test_loss=2.583   test_acc= 0.900
epoch= 4   train_loss= 2.368   train_acc= 0.951   test_loss=2.574   test_acc= 0.900
epoch= 5   train_loss= 2.295   train_acc= 0.988   test_loss=2.524   test_acc= 0.900
epoch= 6   train_loss= 2.256   train_acc= 1.000   test_loss=2.543   test_acc= 0.900
epoch= 7   train_loss= 2.275   train_acc= 0.963   test_loss=2.460   test_acc= 0.900
epoch= 8   train_loss= 2.230   train_acc= 0.988   test_loss=2.489   test_acc= 0.900
epoch= 9   train_loss= 2.205   train_acc= 1.000   test_loss=2.468   test_acc= 0.900
epoch= 10   train_loss= 2.190   train_acc= 0.988   test_loss=2.462   test_acc= 0.900
epoch= 11   train_loss= 2.166   train_acc= 0.988   test_lo

epoch= 46   train_loss= 1.704   train_acc= 1.000   test_loss=2.075   test_acc= 0.900
epoch= 47   train_loss= 1.695   train_acc= 1.000   test_loss=2.066   test_acc= 0.900
epoch= 48   train_loss= 1.688   train_acc= 1.000   test_loss=2.096   test_acc= 0.800
epoch= 49   train_loss= 1.674   train_acc= 1.000   test_loss=2.082   test_acc= 0.800
run time: 0.4483219861984253 min
test_acc=0.800
run= 0   fold= 2
epoch= 0   train_loss= 3.026   train_acc= 0.566   test_loss=2.823   test_acc= 0.889
epoch= 1   train_loss= 2.655   train_acc= 0.843   test_loss=2.776   test_acc= 0.778
epoch= 2   train_loss= 2.519   train_acc= 0.892   test_loss=2.669   test_acc= 0.889
epoch= 3   train_loss= 2.414   train_acc= 0.952   test_loss=2.617   test_acc= 0.778
epoch= 4   train_loss= 2.358   train_acc= 0.976   test_loss=2.598   test_acc= 0.889
epoch= 5   train_loss= 2.328   train_acc= 0.988   test_loss=2.555   test_acc= 1.000
epoch= 6   train_loss= 2.289   train_acc= 0.976   test_loss=2.531   test_acc= 1.000
epoch= 

epoch= 42   train_loss= 1.754   train_acc= 1.000   test_loss=2.036   test_acc= 0.889
epoch= 43   train_loss= 1.746   train_acc= 1.000   test_loss=2.051   test_acc= 0.889
epoch= 44   train_loss= 1.729   train_acc= 1.000   test_loss=2.003   test_acc= 0.889
epoch= 45   train_loss= 1.723   train_acc= 1.000   test_loss=1.996   test_acc= 0.889
epoch= 46   train_loss= 1.710   train_acc= 1.000   test_loss=1.992   test_acc= 0.889
epoch= 47   train_loss= 1.704   train_acc= 1.000   test_loss=1.976   test_acc= 0.889
epoch= 48   train_loss= 1.694   train_acc= 1.000   test_loss=1.961   test_acc= 0.889
epoch= 49   train_loss= 1.681   train_acc= 1.000   test_loss=1.961   test_acc= 0.889
run time: 0.4253049731254578 min
test_acc=0.889
run= 0   fold= 4
epoch= 0   train_loss= 2.938   train_acc= 0.614   test_loss=2.713   test_acc= 0.778
epoch= 1   train_loss= 2.582   train_acc= 0.904   test_loss=2.669   test_acc= 0.889
epoch= 2   train_loss= 2.508   train_acc= 0.928   test_loss=2.611   test_acc= 0.889
epo

epoch= 38   train_loss= 1.785   train_acc= 1.000   test_loss=2.068   test_acc= 0.778
epoch= 39   train_loss= 1.773   train_acc= 1.000   test_loss=2.037   test_acc= 0.889
epoch= 40   train_loss= 1.760   train_acc= 1.000   test_loss=2.042   test_acc= 0.778
epoch= 41   train_loss= 1.750   train_acc= 1.000   test_loss=2.026   test_acc= 0.778
epoch= 42   train_loss= 1.743   train_acc= 1.000   test_loss=2.020   test_acc= 0.778
epoch= 43   train_loss= 1.730   train_acc= 1.000   test_loss=1.997   test_acc= 0.889
epoch= 44   train_loss= 1.723   train_acc= 1.000   test_loss=1.984   test_acc= 0.889
epoch= 45   train_loss= 1.715   train_acc= 1.000   test_loss=2.000   test_acc= 0.778
epoch= 46   train_loss= 1.709   train_acc= 1.000   test_loss=1.952   test_acc= 0.889
epoch= 47   train_loss= 1.691   train_acc= 1.000   test_loss=1.963   test_acc= 0.889
epoch= 48   train_loss= 1.684   train_acc= 1.000   test_loss=1.952   test_acc= 0.889
epoch= 49   train_loss= 1.676   train_acc= 1.000   test_loss=1.95

epoch= 34   train_loss= 1.837   train_acc= 1.000   test_loss=1.873   test_acc= 1.000
epoch= 35   train_loss= 1.825   train_acc= 1.000   test_loss=1.855   test_acc= 1.000
epoch= 36   train_loss= 1.811   train_acc= 1.000   test_loss=1.844   test_acc= 1.000
epoch= 37   train_loss= 1.802   train_acc= 1.000   test_loss=1.834   test_acc= 1.000
epoch= 38   train_loss= 1.790   train_acc= 1.000   test_loss=1.825   test_acc= 1.000
epoch= 39   train_loss= 1.783   train_acc= 1.000   test_loss=1.809   test_acc= 1.000
epoch= 40   train_loss= 1.767   train_acc= 1.000   test_loss=1.799   test_acc= 1.000
epoch= 41   train_loss= 1.759   train_acc= 1.000   test_loss=1.791   test_acc= 1.000
epoch= 42   train_loss= 1.749   train_acc= 1.000   test_loss=1.785   test_acc= 1.000
epoch= 43   train_loss= 1.738   train_acc= 1.000   test_loss=1.774   test_acc= 1.000
epoch= 44   train_loss= 1.730   train_acc= 1.000   test_loss=1.764   test_acc= 1.000
epoch= 45   train_loss= 1.722   train_acc= 1.000   test_loss=1.75

epoch= 30   train_loss= 1.878   train_acc= 1.000   test_loss=2.133   test_acc= 0.889
epoch= 31   train_loss= 1.869   train_acc= 1.000   test_loss=2.122   test_acc= 0.889
epoch= 32   train_loss= 1.856   train_acc= 1.000   test_loss=2.111   test_acc= 0.889
epoch= 33   train_loss= 1.843   train_acc= 1.000   test_loss=2.103   test_acc= 0.889
epoch= 34   train_loss= 1.830   train_acc= 1.000   test_loss=2.087   test_acc= 0.889
epoch= 35   train_loss= 1.818   train_acc= 1.000   test_loss=2.077   test_acc= 0.889
epoch= 36   train_loss= 1.807   train_acc= 1.000   test_loss=2.068   test_acc= 0.889
epoch= 37   train_loss= 1.800   train_acc= 1.000   test_loss=2.055   test_acc= 0.889
epoch= 38   train_loss= 1.787   train_acc= 1.000   test_loss=2.040   test_acc= 0.889
epoch= 39   train_loss= 1.775   train_acc= 1.000   test_loss=2.030   test_acc= 0.889
epoch= 40   train_loss= 1.766   train_acc= 1.000   test_loss=2.028   test_acc= 0.889
epoch= 41   train_loss= 1.757   train_acc= 1.000   test_loss=2.01

epoch= 26   train_loss= 1.936   train_acc= 1.000   test_loss=2.158   test_acc= 0.900
epoch= 27   train_loss= 1.923   train_acc= 1.000   test_loss=2.143   test_acc= 0.900
epoch= 28   train_loss= 1.909   train_acc= 1.000   test_loss=2.127   test_acc= 0.900
epoch= 29   train_loss= 1.891   train_acc= 1.000   test_loss=2.127   test_acc= 0.900
epoch= 30   train_loss= 1.880   train_acc= 1.000   test_loss=2.113   test_acc= 0.900
epoch= 31   train_loss= 1.873   train_acc= 1.000   test_loss=2.088   test_acc= 0.900
epoch= 32   train_loss= 1.856   train_acc= 1.000   test_loss=2.079   test_acc= 0.900
epoch= 33   train_loss= 1.846   train_acc= 1.000   test_loss=2.071   test_acc= 0.900
epoch= 34   train_loss= 1.836   train_acc= 1.000   test_loss=2.063   test_acc= 0.900
epoch= 35   train_loss= 1.824   train_acc= 1.000   test_loss=2.048   test_acc= 0.900
epoch= 36   train_loss= 1.813   train_acc= 1.000   test_loss=2.033   test_acc= 0.900
epoch= 37   train_loss= 1.803   train_acc= 1.000   test_loss=2.02

epoch= 22   train_loss= 1.986   train_acc= 1.000   test_loss=2.070   test_acc= 1.000
epoch= 23   train_loss= 1.973   train_acc= 1.000   test_loss=2.061   test_acc= 1.000
epoch= 24   train_loss= 1.961   train_acc= 1.000   test_loss=2.036   test_acc= 1.000
epoch= 25   train_loss= 1.949   train_acc= 1.000   test_loss=2.023   test_acc= 1.000
epoch= 26   train_loss= 1.935   train_acc= 1.000   test_loss=2.005   test_acc= 1.000
epoch= 27   train_loss= 1.921   train_acc= 1.000   test_loss=1.992   test_acc= 1.000
epoch= 28   train_loss= 1.910   train_acc= 1.000   test_loss=1.984   test_acc= 1.000
epoch= 29   train_loss= 1.894   train_acc= 1.000   test_loss=1.968   test_acc= 1.000
epoch= 30   train_loss= 1.885   train_acc= 1.000   test_loss=1.960   test_acc= 1.000
epoch= 31   train_loss= 1.874   train_acc= 1.000   test_loss=1.943   test_acc= 1.000
epoch= 32   train_loss= 1.862   train_acc= 1.000   test_loss=1.931   test_acc= 1.000
epoch= 33   train_loss= 1.850   train_acc= 1.000   test_loss=1.91

epoch= 18   train_loss= 2.049   train_acc= 1.000   test_loss=2.880   test_acc= 0.667
epoch= 19   train_loss= 2.028   train_acc= 1.000   test_loss=2.834   test_acc= 0.667
epoch= 20   train_loss= 2.009   train_acc= 1.000   test_loss=2.754   test_acc= 0.667
epoch= 21   train_loss= 1.998   train_acc= 1.000   test_loss=2.823   test_acc= 0.667
epoch= 22   train_loss= 1.981   train_acc= 1.000   test_loss=2.744   test_acc= 0.667
epoch= 23   train_loss= 1.978   train_acc= 0.988   test_loss=2.742   test_acc= 0.667
epoch= 24   train_loss= 1.959   train_acc= 1.000   test_loss=2.743   test_acc= 0.667
epoch= 25   train_loss= 1.942   train_acc= 1.000   test_loss=2.758   test_acc= 0.667
epoch= 26   train_loss= 1.928   train_acc= 1.000   test_loss=2.754   test_acc= 0.667
epoch= 27   train_loss= 1.912   train_acc= 1.000   test_loss=2.705   test_acc= 0.667
epoch= 28   train_loss= 1.901   train_acc= 1.000   test_loss=2.763   test_acc= 0.667
epoch= 29   train_loss= 1.890   train_acc= 1.000   test_loss=2.74

epoch= 14   train_loss= 2.102   train_acc= 1.000   test_loss=2.467   test_acc= 0.778
epoch= 15   train_loss= 2.090   train_acc= 1.000   test_loss=2.434   test_acc= 0.778
epoch= 16   train_loss= 2.064   train_acc= 1.000   test_loss=2.426   test_acc= 0.778
epoch= 17   train_loss= 2.050   train_acc= 1.000   test_loss=2.414   test_acc= 0.778
epoch= 18   train_loss= 2.039   train_acc= 1.000   test_loss=2.347   test_acc= 0.778
epoch= 19   train_loss= 2.021   train_acc= 1.000   test_loss=2.329   test_acc= 0.778
epoch= 20   train_loss= 2.010   train_acc= 1.000   test_loss=2.356   test_acc= 0.778
epoch= 21   train_loss= 1.994   train_acc= 1.000   test_loss=2.333   test_acc= 0.778
epoch= 22   train_loss= 1.978   train_acc= 1.000   test_loss=2.335   test_acc= 0.778
epoch= 23   train_loss= 1.974   train_acc= 1.000   test_loss=2.275   test_acc= 0.778
epoch= 24   train_loss= 1.952   train_acc= 1.000   test_loss=2.318   test_acc= 0.778
epoch= 25   train_loss= 1.946   train_acc= 1.000   test_loss=2.30

epoch= 10   train_loss= 2.179   train_acc= 1.000   test_loss=2.331   test_acc= 0.889
epoch= 11   train_loss= 2.161   train_acc= 1.000   test_loss=2.303   test_acc= 0.889
epoch= 12   train_loss= 2.149   train_acc= 0.988   test_loss=2.284   test_acc= 0.889
epoch= 13   train_loss= 2.124   train_acc= 1.000   test_loss=2.271   test_acc= 0.889
epoch= 14   train_loss= 2.106   train_acc= 1.000   test_loss=2.249   test_acc= 1.000
epoch= 15   train_loss= 2.085   train_acc= 1.000   test_loss=2.229   test_acc= 1.000
epoch= 16   train_loss= 2.066   train_acc= 1.000   test_loss=2.215   test_acc= 0.889
epoch= 17   train_loss= 2.051   train_acc= 1.000   test_loss=2.198   test_acc= 1.000
epoch= 18   train_loss= 2.042   train_acc= 1.000   test_loss=2.182   test_acc= 1.000
epoch= 19   train_loss= 2.024   train_acc= 1.000   test_loss=2.171   test_acc= 1.000
epoch= 20   train_loss= 2.008   train_acc= 1.000   test_loss=2.153   test_acc= 1.000
epoch= 21   train_loss= 1.999   train_acc= 1.000   test_loss=2.13

epoch= 6   train_loss= 2.263   train_acc= 1.000   test_loss=2.845   test_acc= 0.700
epoch= 7   train_loss= 2.247   train_acc= 1.000   test_loss=2.805   test_acc= 0.700
epoch= 8   train_loss= 2.217   train_acc= 1.000   test_loss=2.835   test_acc= 0.700
epoch= 9   train_loss= 2.201   train_acc= 0.988   test_loss=2.844   test_acc= 0.700
epoch= 10   train_loss= 2.169   train_acc= 1.000   test_loss=2.849   test_acc= 0.700
epoch= 11   train_loss= 2.159   train_acc= 1.000   test_loss=2.816   test_acc= 0.700
epoch= 12   train_loss= 2.138   train_acc= 1.000   test_loss=2.790   test_acc= 0.700
epoch= 13   train_loss= 2.122   train_acc= 1.000   test_loss=2.803   test_acc= 0.700
epoch= 14   train_loss= 2.099   train_acc= 1.000   test_loss=2.779   test_acc= 0.700
epoch= 15   train_loss= 2.086   train_acc= 1.000   test_loss=2.777   test_acc= 0.700
epoch= 16   train_loss= 2.078   train_acc= 1.000   test_loss=2.727   test_acc= 0.700
epoch= 17   train_loss= 2.058   train_acc= 1.000   test_loss=2.729   

epoch= 2   train_loss= 2.523   train_acc= 0.904   test_loss=2.527   test_acc= 1.000
epoch= 3   train_loss= 2.417   train_acc= 0.976   test_loss=2.503   test_acc= 1.000
epoch= 4   train_loss= 2.366   train_acc= 0.952   test_loss=2.446   test_acc= 1.000
epoch= 5   train_loss= 2.305   train_acc= 0.988   test_loss=2.392   test_acc= 1.000
epoch= 6   train_loss= 2.295   train_acc= 0.976   test_loss=2.388   test_acc= 1.000
epoch= 7   train_loss= 2.266   train_acc= 0.988   test_loss=2.372   test_acc= 1.000
epoch= 8   train_loss= 2.204   train_acc= 1.000   test_loss=2.304   test_acc= 1.000
epoch= 9   train_loss= 2.209   train_acc= 0.988   test_loss=2.272   test_acc= 1.000
epoch= 10   train_loss= 2.176   train_acc= 1.000   test_loss=2.263   test_acc= 1.000
epoch= 11   train_loss= 2.157   train_acc= 1.000   test_loss=2.243   test_acc= 1.000
epoch= 12   train_loss= 2.139   train_acc= 1.000   test_loss=2.235   test_acc= 1.000
epoch= 13   train_loss= 2.137   train_acc= 0.988   test_loss=2.211   test

epoch= 48   train_loss= 1.683   train_acc= 1.000   test_loss=1.933   test_acc= 0.889
epoch= 49   train_loss= 1.672   train_acc= 1.000   test_loss=1.913   test_acc= 0.889
run time: 0.4215832193692525 min
test_acc=0.889
run= 2   fold= 5
epoch= 0   train_loss= 2.911   train_acc= 0.614   test_loss=2.799   test_acc= 0.778
epoch= 1   train_loss= 2.620   train_acc= 0.867   test_loss=2.754   test_acc= 0.778
epoch= 2   train_loss= 2.497   train_acc= 0.952   test_loss=2.629   test_acc= 0.889
epoch= 3   train_loss= 2.437   train_acc= 0.940   test_loss=2.608   test_acc= 0.889
epoch= 4   train_loss= 2.379   train_acc= 0.952   test_loss=2.535   test_acc= 0.889
epoch= 5   train_loss= 2.328   train_acc= 0.976   test_loss=2.530   test_acc= 0.889
epoch= 6   train_loss= 2.298   train_acc= 0.988   test_loss=2.451   test_acc= 0.889
epoch= 7   train_loss= 2.258   train_acc= 0.988   test_loss=2.435   test_acc= 0.889
epoch= 8   train_loss= 2.225   train_acc= 0.988   test_loss=2.392   test_acc= 0.889
epoch= 9 

epoch= 44   train_loss= 1.736   train_acc= 1.000   test_loss=1.989   test_acc= 0.778
epoch= 45   train_loss= 1.720   train_acc= 1.000   test_loss=1.978   test_acc= 0.778
epoch= 46   train_loss= 1.713   train_acc= 1.000   test_loss=1.977   test_acc= 0.778
epoch= 47   train_loss= 1.703   train_acc= 1.000   test_loss=1.954   test_acc= 0.778
epoch= 48   train_loss= 1.690   train_acc= 1.000   test_loss=1.934   test_acc= 0.778
epoch= 49   train_loss= 1.684   train_acc= 1.000   test_loss=1.933   test_acc= 0.778
run time: 0.43807835976282755 min
test_acc=0.778
run= 2   fold= 7
epoch= 0   train_loss= 2.908   train_acc= 0.651   test_loss=2.976   test_acc= 0.667
epoch= 1   train_loss= 2.633   train_acc= 0.819   test_loss=2.787   test_acc= 0.778
epoch= 2   train_loss= 2.503   train_acc= 0.892   test_loss=2.850   test_acc= 0.667
epoch= 3   train_loss= 2.454   train_acc= 0.928   test_loss=2.715   test_acc= 0.778
epoch= 4   train_loss= 2.364   train_acc= 0.964   test_loss=2.770   test_acc= 0.667
epoc

epoch= 40   train_loss= 1.757   train_acc= 1.000   test_loss=2.019   test_acc= 0.889
epoch= 41   train_loss= 1.746   train_acc= 1.000   test_loss=2.007   test_acc= 0.889
epoch= 42   train_loss= 1.733   train_acc= 1.000   test_loss=1.997   test_acc= 0.889
epoch= 43   train_loss= 1.725   train_acc= 1.000   test_loss=1.994   test_acc= 0.889
epoch= 44   train_loss= 1.715   train_acc= 1.000   test_loss=1.981   test_acc= 0.889
epoch= 45   train_loss= 1.704   train_acc= 1.000   test_loss=1.973   test_acc= 0.889
epoch= 46   train_loss= 1.696   train_acc= 1.000   test_loss=1.959   test_acc= 0.889
epoch= 47   train_loss= 1.684   train_acc= 1.000   test_loss=1.952   test_acc= 0.889
epoch= 48   train_loss= 1.674   train_acc= 1.000   test_loss=1.940   test_acc= 0.889
epoch= 49   train_loss= 1.667   train_acc= 1.000   test_loss=1.927   test_acc= 0.889
run time: 0.4604421774546305 min
test_acc=0.889
run= 2   fold= 9
epoch= 0   train_loss= 2.842   train_acc= 0.723   test_loss=3.064   test_acc= 0.667
e

epoch= 36   train_loss= 1.814   train_acc= 1.000   test_loss=2.511   test_acc= 0.700
epoch= 37   train_loss= 1.806   train_acc= 1.000   test_loss=2.444   test_acc= 0.700
epoch= 38   train_loss= 1.794   train_acc= 1.000   test_loss=2.428   test_acc= 0.800
epoch= 39   train_loss= 1.783   train_acc= 1.000   test_loss=2.445   test_acc= 0.800
epoch= 40   train_loss= 1.771   train_acc= 1.000   test_loss=2.450   test_acc= 0.700
epoch= 41   train_loss= 1.760   train_acc= 1.000   test_loss=2.440   test_acc= 0.800
epoch= 42   train_loss= 1.752   train_acc= 1.000   test_loss=2.429   test_acc= 0.800
epoch= 43   train_loss= 1.742   train_acc= 1.000   test_loss=2.437   test_acc= 0.800
epoch= 44   train_loss= 1.729   train_acc= 1.000   test_loss=2.424   test_acc= 0.800
epoch= 45   train_loss= 1.720   train_acc= 1.000   test_loss=2.402   test_acc= 0.800
epoch= 46   train_loss= 1.711   train_acc= 1.000   test_loss=2.393   test_acc= 0.700
epoch= 47   train_loss= 1.700   train_acc= 1.000   test_loss=2.39

epoch= 32   train_loss= 1.842   train_acc= 1.000   test_loss=2.291   test_acc= 0.889
epoch= 33   train_loss= 1.831   train_acc= 1.000   test_loss=2.259   test_acc= 0.889
epoch= 34   train_loss= 1.830   train_acc= 1.000   test_loss=2.261   test_acc= 0.778
epoch= 35   train_loss= 1.809   train_acc= 1.000   test_loss=2.261   test_acc= 0.778
epoch= 36   train_loss= 1.799   train_acc= 1.000   test_loss=2.257   test_acc= 0.778
epoch= 37   train_loss= 1.786   train_acc= 1.000   test_loss=2.230   test_acc= 0.889
epoch= 38   train_loss= 1.777   train_acc= 1.000   test_loss=2.219   test_acc= 0.889
epoch= 39   train_loss= 1.764   train_acc= 1.000   test_loss=2.226   test_acc= 0.778
epoch= 40   train_loss= 1.756   train_acc= 1.000   test_loss=2.206   test_acc= 0.889
epoch= 41   train_loss= 1.744   train_acc= 1.000   test_loss=2.198   test_acc= 0.778
epoch= 42   train_loss= 1.735   train_acc= 1.000   test_loss=2.205   test_acc= 0.778
epoch= 43   train_loss= 1.722   train_acc= 1.000   test_loss=2.20

epoch= 28   train_loss= 1.904   train_acc= 1.000   test_loss=2.092   test_acc= 1.000
epoch= 29   train_loss= 1.889   train_acc= 1.000   test_loss=2.087   test_acc= 0.889
epoch= 30   train_loss= 1.882   train_acc= 1.000   test_loss=2.035   test_acc= 1.000
epoch= 31   train_loss= 1.869   train_acc= 1.000   test_loss=2.044   test_acc= 1.000
epoch= 32   train_loss= 1.857   train_acc= 1.000   test_loss=2.043   test_acc= 1.000
epoch= 33   train_loss= 1.847   train_acc= 1.000   test_loss=2.014   test_acc= 1.000
epoch= 34   train_loss= 1.832   train_acc= 1.000   test_loss=2.022   test_acc= 1.000
epoch= 35   train_loss= 1.819   train_acc= 1.000   test_loss=1.976   test_acc= 1.000
epoch= 36   train_loss= 1.810   train_acc= 1.000   test_loss=2.014   test_acc= 0.889
epoch= 37   train_loss= 1.799   train_acc= 1.000   test_loss=1.965   test_acc= 1.000
epoch= 38   train_loss= 1.789   train_acc= 1.000   test_loss=1.959   test_acc= 1.000
epoch= 39   train_loss= 1.777   train_acc= 1.000   test_loss=1.96

epoch= 24   train_loss= 1.960   train_acc= 1.000   test_loss=2.179   test_acc= 0.889
epoch= 25   train_loss= 1.956   train_acc= 1.000   test_loss=2.146   test_acc= 0.889
epoch= 26   train_loss= 1.939   train_acc= 1.000   test_loss=2.149   test_acc= 0.889
epoch= 27   train_loss= 1.924   train_acc= 1.000   test_loss=2.127   test_acc= 0.889
epoch= 28   train_loss= 1.914   train_acc= 1.000   test_loss=2.119   test_acc= 0.889
epoch= 29   train_loss= 1.899   train_acc= 1.000   test_loss=2.113   test_acc= 0.889
epoch= 30   train_loss= 1.907   train_acc= 0.988   test_loss=2.088   test_acc= 0.889
epoch= 31   train_loss= 1.876   train_acc= 1.000   test_loss=2.075   test_acc= 0.889
epoch= 32   train_loss= 1.863   train_acc= 1.000   test_loss=2.070   test_acc= 0.889
epoch= 33   train_loss= 1.851   train_acc= 1.000   test_loss=2.060   test_acc= 0.889
epoch= 34   train_loss= 1.841   train_acc= 1.000   test_loss=2.060   test_acc= 0.889
epoch= 35   train_loss= 1.830   train_acc= 1.000   test_loss=2.02

epoch= 20   train_loss= 2.021   train_acc= 1.000   test_loss=2.386   test_acc= 0.778
epoch= 21   train_loss= 1.996   train_acc= 1.000   test_loss=2.355   test_acc= 0.778
epoch= 22   train_loss= 1.986   train_acc= 1.000   test_loss=2.320   test_acc= 0.778
epoch= 23   train_loss= 1.978   train_acc= 1.000   test_loss=2.293   test_acc= 0.778
epoch= 24   train_loss= 1.956   train_acc= 1.000   test_loss=2.291   test_acc= 0.778
epoch= 25   train_loss= 1.942   train_acc= 1.000   test_loss=2.252   test_acc= 0.778
epoch= 26   train_loss= 1.928   train_acc= 1.000   test_loss=2.244   test_acc= 0.778
epoch= 27   train_loss= 1.923   train_acc= 1.000   test_loss=2.206   test_acc= 0.778
epoch= 28   train_loss= 1.906   train_acc= 1.000   test_loss=2.225   test_acc= 0.778
epoch= 29   train_loss= 1.894   train_acc= 1.000   test_loss=2.212   test_acc= 0.778
epoch= 30   train_loss= 1.877   train_acc= 1.000   test_loss=2.194   test_acc= 0.778
epoch= 31   train_loss= 1.873   train_acc= 1.000   test_loss=2.17

epoch= 16   train_loss= 2.090   train_acc= 1.000   test_loss=2.269   test_acc= 1.000
epoch= 17   train_loss= 2.078   train_acc= 0.988   test_loss=2.231   test_acc= 1.000
epoch= 18   train_loss= 2.046   train_acc= 1.000   test_loss=2.226   test_acc= 1.000
epoch= 19   train_loss= 2.042   train_acc= 1.000   test_loss=2.219   test_acc= 1.000
epoch= 20   train_loss= 2.018   train_acc= 1.000   test_loss=2.207   test_acc= 1.000
epoch= 21   train_loss= 2.005   train_acc= 1.000   test_loss=2.177   test_acc= 1.000
epoch= 22   train_loss= 1.988   train_acc= 1.000   test_loss=2.159   test_acc= 1.000
epoch= 23   train_loss= 1.982   train_acc= 1.000   test_loss=2.179   test_acc= 1.000
epoch= 24   train_loss= 1.969   train_acc= 1.000   test_loss=2.145   test_acc= 1.000
epoch= 25   train_loss= 1.958   train_acc= 1.000   test_loss=2.134   test_acc= 1.000
epoch= 26   train_loss= 1.945   train_acc= 1.000   test_loss=2.121   test_acc= 1.000
epoch= 27   train_loss= 1.927   train_acc= 1.000   test_loss=2.10

epoch= 12   train_loss= 2.146   train_acc= 1.000   test_loss=2.407   test_acc= 0.889
epoch= 13   train_loss= 2.126   train_acc= 1.000   test_loss=2.455   test_acc= 0.778
epoch= 14   train_loss= 2.113   train_acc= 1.000   test_loss=2.389   test_acc= 0.778
epoch= 15   train_loss= 2.088   train_acc= 1.000   test_loss=2.379   test_acc= 0.778
epoch= 16   train_loss= 2.075   train_acc= 1.000   test_loss=2.388   test_acc= 0.778
epoch= 17   train_loss= 2.057   train_acc= 1.000   test_loss=2.330   test_acc= 0.778
epoch= 18   train_loss= 2.042   train_acc= 1.000   test_loss=2.347   test_acc= 0.778
epoch= 19   train_loss= 2.030   train_acc= 1.000   test_loss=2.362   test_acc= 0.778
epoch= 20   train_loss= 2.016   train_acc= 1.000   test_loss=2.298   test_acc= 0.778
epoch= 21   train_loss= 2.002   train_acc= 1.000   test_loss=2.329   test_acc= 0.778
epoch= 22   train_loss= 1.988   train_acc= 1.000   test_loss=2.263   test_acc= 0.778
epoch= 23   train_loss= 1.988   train_acc= 1.000   test_loss=2.31

epoch= 8   train_loss= 2.214   train_acc= 0.988   test_loss=2.684   test_acc= 0.889
epoch= 9   train_loss= 2.174   train_acc= 1.000   test_loss=2.674   test_acc= 0.778
epoch= 10   train_loss= 2.167   train_acc= 1.000   test_loss=2.681   test_acc= 0.889
epoch= 11   train_loss= 2.140   train_acc= 1.000   test_loss=2.693   test_acc= 0.778
epoch= 12   train_loss= 2.132   train_acc= 1.000   test_loss=2.646   test_acc= 0.778
epoch= 13   train_loss= 2.107   train_acc= 1.000   test_loss=2.666   test_acc= 0.889
epoch= 14   train_loss= 2.092   train_acc= 1.000   test_loss=2.632   test_acc= 0.889
epoch= 15   train_loss= 2.075   train_acc= 1.000   test_loss=2.638   test_acc= 0.889
epoch= 16   train_loss= 2.058   train_acc= 1.000   test_loss=2.617   test_acc= 0.778
epoch= 17   train_loss= 2.043   train_acc= 1.000   test_loss=2.627   test_acc= 0.889
epoch= 18   train_loss= 2.035   train_acc= 1.000   test_loss=2.586   test_acc= 0.889
epoch= 19   train_loss= 2.015   train_acc= 1.000   test_loss=2.559 

epoch= 4   train_loss= 2.370   train_acc= 0.964   test_loss=2.714   test_acc= 0.778
epoch= 5   train_loss= 2.348   train_acc= 0.976   test_loss=2.661   test_acc= 0.778
epoch= 6   train_loss= 2.264   train_acc= 1.000   test_loss=2.730   test_acc= 0.667
epoch= 7   train_loss= 2.289   train_acc= 0.964   test_loss=2.654   test_acc= 0.667
epoch= 8   train_loss= 2.236   train_acc= 0.976   test_loss=2.602   test_acc= 0.778
epoch= 9   train_loss= 2.195   train_acc= 1.000   test_loss=2.552   test_acc= 0.778
epoch= 10   train_loss= 2.192   train_acc= 0.988   test_loss=2.570   test_acc= 0.778
epoch= 11   train_loss= 2.167   train_acc= 1.000   test_loss=2.508   test_acc= 0.778
epoch= 12   train_loss= 2.135   train_acc= 1.000   test_loss=2.430   test_acc= 0.889
epoch= 13   train_loss= 2.127   train_acc= 1.000   test_loss=2.452   test_acc= 0.778
epoch= 14   train_loss= 2.104   train_acc= 1.000   test_loss=2.461   test_acc= 0.778
epoch= 15   train_loss= 2.087   train_acc= 1.000   test_loss=2.465   te

epoch= 0   train_loss= 2.970   train_acc= 0.639   test_loss=2.753   test_acc= 0.889
epoch= 1   train_loss= 2.595   train_acc= 0.867   test_loss=2.645   test_acc= 0.889
epoch= 2   train_loss= 2.486   train_acc= 0.928   test_loss=2.613   test_acc= 0.778
epoch= 3   train_loss= 2.424   train_acc= 0.952   test_loss=2.643   test_acc= 0.778
epoch= 4   train_loss= 2.355   train_acc= 0.964   test_loss=2.656   test_acc= 0.778
epoch= 5   train_loss= 2.302   train_acc= 0.988   test_loss=2.507   test_acc= 0.778
epoch= 6   train_loss= 2.289   train_acc= 0.976   test_loss=2.559   test_acc= 0.778
epoch= 7   train_loss= 2.257   train_acc= 0.988   test_loss=2.571   test_acc= 0.778
epoch= 8   train_loss= 2.235   train_acc= 0.988   test_loss=2.442   test_acc= 0.778
epoch= 9   train_loss= 2.198   train_acc= 0.988   test_loss=2.444   test_acc= 0.778
epoch= 10   train_loss= 2.179   train_acc= 0.988   test_loss=2.505   test_acc= 0.778
epoch= 11   train_loss= 2.156   train_acc= 0.988   test_loss=2.352   test_a

epoch= 46   train_loss= 1.711   train_acc= 1.000   test_loss=1.740   test_acc= 1.000
epoch= 47   train_loss= 1.700   train_acc= 1.000   test_loss=1.731   test_acc= 1.000
epoch= 48   train_loss= 1.696   train_acc= 1.000   test_loss=1.724   test_acc= 1.000
epoch= 49   train_loss= 1.683   train_acc= 1.000   test_loss=1.711   test_acc= 1.000
run time: 0.43170444170633954 min
test_acc=1.000
MI-Net mean accuracy =  0.88555557
std =  0.087918304


In [10]:
from loader import parse_c45, bag_set
from __future__ import print_function, division
from sklearn.model_selection import StratifiedKFold
from score import result
import matplotlib.pyplot as plt
from pathlib import Path
import numpy as np
import time


In [13]:
# Load list of C4.5 Examples
example_set = parse_c45('fox')

# Get stats to normalize data
raw_data = np.array(example_set.to_float())
data_mean = np.average(raw_data, axis=0)
data_std  = np.std(raw_data, axis=0)
data_std[np.nonzero(data_std == 0.0)] = 1.0
def normalizer(ex):
    ex = np.array(ex)
    return ex


# Group examples into bags
bagset = bag_set(example_set)

molecule_names = []
conformation_names = []
bags = []
labels = []

#bag_id = 0
for bag in bagset: 
    for ro in bag.to_float(normalizer):
        if ro[0] not in molecule_names:
            molecule_names.append(ro[0])
            #bag_id += 1
        labels.append(int(((float(ro[-1]) * 2) - 1)))
        conformation_names.append(ro[1])
        bags.append(list(map(float, ro[2:-1])))

bags = np.array(bags,dtype="float")
labels = np.array(labels,dtype="int")
        
fold = StratifiedKFold(n_splits=5, shuffle=False, random_state=None)
splittt = 1
for train_index, test_index in fold.split(bags,labels):
    X_train, X_test = bags[train_index], bags[test_index]
    y_train, y_test = labels[train_index], labels[test_index]
    epoch_time = time.time()
    model = MI_Net(bag_set)
    model.fit(X_train,y_train)
    start = time.time()
    predicted = model.predict(X_test)
    time_ep = time.time() - start
    result("MI-Net",y_test,predicted,time_ep, splittt)
    splittt += 1

TypeError: 'function' object is not subscriptable

### MI-net pooling deep supervision

In [None]:
MIimport numpy as np
import sys
import time
import random
from random import shuffle
import argparse

from keras.models import Model
from keras.optimizers import SGD
from keras.regularizers import l2
from keras.layers import Input, Dense, Layer, Dropout, average

from mil_nets.dataset import load_dataset
from mil_nets.layer import Feature_pooling
from mil_nets.metrics import bag_accuracy
from mil_nets.objectives import bag_loss
from mil_nets.utils import convertToBatch

In [None]:
def test_eval(model, test_set):
    """Evaluate on testing set.
    Parameters
    -----------------
    model : keras.engine.training.Model object
        The training MI-Net with deep supervision model.
    test_set : list
        A list of testing set contains all training bags features and labels.
    Returns
    -----------------
    test_loss : float
        Mean loss of evaluating on testing set.
    test_acc : float
        Mean accuracy of evaluating on testing set.
    """
    num_test_batch = len(test_set)
    test_loss = np.zeros((num_test_batch, 1), dtype=np.float32)
    test_acc = np.zeros((num_test_batch, 1), dtype=np.float32)
    for ibatch, batch in enumerate(test_set):
        result = model.test_on_batch({'input':batch[0].astype(np.float32)}, {'fp1':batch[1].astype(np.float32), 'fp2':batch[1].astype(np.float32), 'fp3':batch[1].astype(np.float32), 'ave':batch[1].astype(np.float32)})
        test_loss[ibatch] = result[0]
        test_acc[ibatch] = result[-1]
    return np.mean(test_loss), np.mean(test_acc)

def train_eval(model, train_set):
    """Evaluate on training set.
    Parameters
    -----------------
    model : keras.engine.training.Model object
        The training MI-Net with deep supervision model.
    train_set : list
        A list of training set contains all training bags features and labels.
    Returns
    -----------------
    test_loss : float
        Mean loss of evaluating on traing set.
    test_acc : float
        Mean accuracy of evaluating on testing set.
    """
    num_train_batch = len(train_set)
    train_loss = np.zeros((num_train_batch, 1), dtype=np.float32)
    train_acc = np.zeros((num_train_batch, 1), dtype=np.float32)
    shuffle(train_set)
    for ibatch, batch in enumerate(train_set):
        result = model.train_on_batch({'input':batch[0].astype(np.float32)}, {'fp1':batch[1].astype(np.float32), 'fp2':batch[1].astype(np.float32), 'fp3':batch[1].astype(np.float32), 'ave':batch[1].astype(np.float32)})
        train_loss[ibatch] = result[0]
        train_acc[ibatch] = result[-1]
    return np.mean(train_loss), np.mean(train_acc)

def MI_Net_with_DS(dataset):
    """Train and evaluate on MI-Net with deep supervision.
    Parameters
    -----------------
    dataset : dict
        A dictionary contains all dataset information. We split train/test by keys.
    Returns
    -----------------
    test_acc : float
        Testing accuracy of MI-Net with deep supervision.
    """
    weight_decay=0.005
    init_lr=5e-4
    pooling_mode='max'
    momentum=0.9
    max_epoch=50
    # load data and convert type
    train_bags = dataset['train']
    test_bags = dataset['test']

    # convert bag to batch
    train_set = convertToBatch(train_bags)
    test_set = convertToBatch(test_bags)
    dimension = train_set[0][0].shape[1]
    weight = [1.0, 1.0, 1.0, 0.0]

    # data: instance feature, n*d, n = number of training instance
    data_input = Input(shape=(dimension,), dtype='float32', name='input')

    # fully-connected
    fc1 = Dense(256, activation='relu', kernel_regularizer=l2(weight_decay))(data_input)
    fc2 = Dense(128, activation='relu', kernel_regularizer=l2(weight_decay))(fc1)
    fc3 = Dense(64, activation='relu', kernel_regularizer=l2(weight_decay))(fc2)

    # dropout
    dropout1 = Dropout(rate=0.5)(fc1)
    dropout2 = Dropout(rate=0.5)(fc2)
    dropout3 = Dropout(rate=0.5)(fc3)

    # features pooling
    fp1 = Feature_pooling(output_dim=1, kernel_regularizer=l2(weight_decay), pooling_mode=pooling_mode, name='fp1')(dropout1)
    fp2 = Feature_pooling(output_dim=1, kernel_regularizer=l2(weight_decay), pooling_mode=pooling_mode, name='fp2')(dropout2)
    fp3 = Feature_pooling(output_dim=1, kernel_regularizer=l2(weight_decay), pooling_mode=pooling_mode, name='fp3')(dropout3)

    # score average
    mg_ave =average([fp1,fp2,fp3], name='ave')

    model = Model(inputs=[data_input], outputs=[fp1, fp2, fp3, mg_ave])
    sgd = SGD(lr=init_lr, decay=1e-4, momentum=momentum, nesterov=True)
    model.compile(loss={'fp1':bag_loss, 'fp2':bag_loss, 'fp3':bag_loss, 'ave':bag_loss}, loss_weights={'fp1':weight[0], 'fp2':weight[1], 'fp3':weight[2], 'ave':weight[3]}, optimizer=sgd, metrics=[bag_accuracy])

    # train model
    t1 = time.time()
    num_batch = len(train_set)
    for epoch in range(max_epoch):
        train_loss, train_acc = train_eval(model, train_set)
        test_loss, test_acc = test_eval(model, test_set)
        print('epoch=', epoch, '  train_loss= {:.3f}'.format(train_loss), '  train_acc= {:.3f}'.format(train_acc), '  test_loss={:.3f}'.format(test_loss), '  test_acc= {:.3f}'.format(test_acc))
    t2 = time.time()
    print('run time:', (t2-t1) / 60, 'min')
    print('test_acc={:.3f}'.format(test_acc))

    return test_acc

In [None]:
# perform five times 10-fold cross=validation experiments
run = 5
n_folds = 10
acc = np.zeros((run, n_folds), dtype=float)
for irun in range(run):
    dataset = load_dataset('musk1', n_folds)
    for ifold in range(n_folds):
        print('run=', irun, '  fold=', ifold)
        acc[irun][ifold] = MI_Net_with_DS(dataset[ifold])
print('MI-Net with DS mean accuracy = ', np.mean(acc))
print('std = ', np.std(acc))

## Bag-Space

In [None]:
import pandas as pd
X = pd.read_table("./clean2.data") #pd.read_csv("sample_data/mnist_test.csv") 

In [None]:
from cknn import cknneighbors_graph

#ckng = cknneighbors_graph(X, n_neighbors=5, delta=1.0)

In [None]:
import numpy as np
from sklearn.datasets import load_digits
from sklearn.manifold import SpectralEmbedding
import matplotlib.pyplot as plt
from matplotlib import offsetbox
import seaborn as sns

from cknn import cknneighbors_graph

sns.set()


def plot2d_label(X, title=None):
    digits = load_digits()
    y = digits.target
    x_min, x_max = np.min(X, 0), np.max(X, 0)
    X = (X - x_min) / (x_max - x_min)

    plt.figure()
    ax = plt.subplot(111)
    for i in range(X.shape[0]):
        plt.text(X[i, 0], X[i, 1], str(digits.target[i]),
                 color=plt.cm.Set1(y[i] / 10.),
                 fontdict={'weight': 'bold', 'size': 9})

    
    plt.xticks([])
    plt.yticks([])
    if title is not None:
        plt.title(title)
    plt.show()


def main():
    data = X
    print(data)
    n_neighbors = 2

    model_normal = SpectralEmbedding(n_components=2, n_neighbors=n_neighbors)
    y_normal = model_normal.fit_transform(data)
    plot2d_label(y_normal)

    #ckng = cknneighbors_graph(data, n_neighbors=n_neighbors, delta=1.5)
    #model_cknn = SpectralEmbedding(n_components=2, affinity='precomputed')
    #y_cknn = model_cknn.fit_transform(ckng.toarray())
    #plot2d_label(y_cknn)

main()

In [None]:
import numpy as np
from sklearn.datasets import load_digits
from sklearn.manifold import SpectralEmbedding
import matplotlib.pyplot as plt
from matplotlib import offsetbox
import seaborn as sns

sns.set()


def plot2d_label(X, title=None):
    y = X[1]
    x_min, x_max = np.min(X[0], 0), np.max(X[0], 0)
    X = (X - x_min) / (x_max - x_min)

    plt.figure()
    plt.xticks([])
    plt.yticks([])
    if title is not None:
        plt.title(title)
    plt.show()


def main():
    data = X
    n_neighbors = 10

    model_normal = SpectralEmbedding(n_components=2, n_neighbors=n_neighbors)
    y_normal = model_normal.fit_transform(data)
    #plot2d_label(y_normal)

    ckng = cknneighbors_graph(data, n_neighbors=n_neighbors, delta=1.5)
    model_cknn = SpectralEmbedding(n_components=2, affinity='precomputed')
    y_cknn = model_cknn.fit_transform(ckng.toarray())
    #plot2d_label(y_cknn)
    print(y_cknn)

main()

## instance-Space


MI-SVM and mi-SVM

In [4]:
import misvm
from loader import parse_c45, bag_set
from score import result
from __future__ import print_function, division
import numpy as np
from sklearn.model_selection import StratifiedKFold

In [8]:
# Load list of C4.5 Examples
for dataset in ['fox','musk','mutagenesis-atoms','mutagenesis-bonds','mutagenesis-chains','eastWest','elephant','tiger','westEast']:
    example_set = parse_c45(dataset)


    # Get stats to normalize data
    raw_data = np.array(example_set.to_float())
    data_mean = np.average(raw_data, axis=0)
    data_std  = np.std(raw_data, axis=0)
    data_std[np.nonzero(data_std == 0.0)] = 1.0
    def normalizer(ex):
        ex = np.array(ex)
        normed = ((ex - data_mean) / data_std)
        # The ...[:, 2:-1] removes first two columns and last column,
        # which are the bag/instance ids and class label, as part of the
        # normalization process
        return normed[2:-1]


    # Group examples into bags
    bagset = bag_set(example_set)

    # Convert bags to NumPy arrays
    bags = [np.array(b.to_float(normalizer)) for b in bagset]
    labels = np.array([b.label for b in bagset], dtype=float)
    # Convert 0/1 labels to -1/1 labels
    labels = 2 * labels - 1

    # Spilt dataset arbitrarily to train/test sets
    train_bags = bags[10:]
    train_labels = labels[10:]
    test_bags = bags[:10]
    test_labels = labels[:10]

    # Construct classifiers
    classifiers = {}

    # MISVM   : the MI-SVM algorithm of Andrews, Tsochantaridis, & Hofmann (2002)
    # miSVM   : the mi-SVM algorithm of Andrews, Tsochantaridis, & Hofmann (2002)

    #  : the semi-supervised learning approach of Zhou & Xu (2007)
    #     : the MI classification algorithm of Mangasarian & Wild (2008)
    # sMIL    : sparse MIL (Bunescu & Mooney, 2007)
    # stMIL   : sparse, transductive  MIL (Bunescu & Mooney, 2007)

    classifiers['MissSVM'] = misvm.MissSVM(kernel='linear', C=1.0, max_iters=20)
    classifiers['sbMIL'] = misvm.sbMIL(kernel='linear', eta=0.1, C=1e2)
    classifiers['SIL'] = misvm.SIL(kernel='linear', C=1.0)
    classifiers['STK'] = misvm.STK(kernel='linear', C=1.0)
    classifiers['NSK'] = misvm.NSK(kernel='linear', C=1.0)
    classifiers['MICA'] = misvm.MICA(kernel='linear', C=1.0)

    # Train/Evaluate classifiers
    accuracies = {}

    bags = np.array(bags,dtype=object)
    labels = np.array(labels,dtype=int)
    fold = StratifiedKFold(n_splits=5, shuffle=False, random_state=None)
    for algorithm, classifier in classifiers.items():
        nums = 1
        for train_index, test_index in fold.split(bags,labels):
            X_train, X_test = bags[train_index], bags[test_index]
            y_train, y_test = labels[train_index], labels[test_index]

            classifier.fit(X_train, y_train)
            predictions = classifier.predict(X_test)
            result(algorithm,y_test,predictions,time_ep, nums, dataset)
            print(algorithm, dataset)
            accuracies[algorithm + " " + str(nums)] = {"acc":np.average(y_test == np.sign(predictions)),"kfold":nums}
            nums+=1

    for algorithm, item in accuracies.items():
        print('\n%s, fold:%s Accuracy: %.f%%' % (algorithm,str(item["kfold"]), 100 * item["acc"]))

Non-random start...
     pcost       dcost       gap    pres   dres
 0: -7.0927e+02 -2.4429e+01  1e+04  1e+02  2e-12
 1: -1.9421e+01 -2.4263e+01  2e+02  2e+00  2e-12
 2: -8.0180e+00 -1.9588e+01  3e+01  2e-01  1e-13
 3: -7.3763e+00 -1.3596e+01  9e+00  5e-02  5e-14
 4: -7.6156e+00 -9.1125e+00  2e+00  4e-03  3e-14
 5: -7.9604e+00 -8.4305e+00  5e-01  1e-03  3e-14
 6: -8.0647e+00 -8.2601e+00  2e-01  4e-04  3e-14
 7: -8.1157e+00 -8.1806e+00  7e-02  1e-04  3e-14
 8: -8.1294e+00 -8.1612e+00  3e-02  4e-05  3e-14
 9: -8.1396e+00 -8.1474e+00  8e-03  7e-06  3e-14
10: -8.1423e+00 -8.1441e+00  2e-03  1e-06  3e-14
11: -8.1430e+00 -8.1432e+00  3e-04  6e-15  3e-14
12: -8.1431e+00 -8.1431e+00  3e-05  1e-14  3e-14
13: -8.1431e+00 -8.1431e+00  6e-07  6e-15  3e-14
Optimal solution found.

Iteration 1...
Linearizing constraints...
Computing slacks...
Linearizing...
Solving QP...
     pcost       dcost       gap    pres   dres
 0: -7.0701e+02 -2.4138e+01  1e+04  1e+02  2e-12
 1: -2.2821e+01 -2.3953e+01  3e+0

Solving QP...
     pcost       dcost       gap    pres   dres
 0: -7.0810e+02 -2.3542e+01  1e+04  1e+02  2e-12
 1: -2.3160e+01 -2.3353e+01  3e+02  3e+00  2e-12
 2: -7.5470e+00 -1.9191e+01  4e+01  2e-01  2e-13
 3: -6.2469e+00 -1.1715e+01  9e+00  5e-02  4e-14
 4: -6.2988e+00 -7.5717e+00  2e+00  8e-03  3e-14
 5: -6.4032e+00 -7.0727e+00  9e-01  3e-03  3e-14
 6: -6.4108e+00 -7.0417e+00  8e-01  3e-03  3e-14
 7: -6.4759e+00 -6.8257e+00  4e-01  1e-03  3e-14
 8: -6.5211e+00 -6.6705e+00  2e-01  3e-04  3e-14
 9: -6.5429e+00 -6.6191e+00  9e-02  1e-04  3e-14
10: -6.5575e+00 -6.5880e+00  4e-02  4e-05  3e-14
11: -6.5643e+00 -6.5753e+00  1e-02  1e-05  3e-14
12: -6.5666e+00 -6.5708e+00  4e-03  2e-07  3e-14
13: -6.5682e+00 -6.5689e+00  7e-04  3e-08  3e-14
14: -6.5684e+00 -6.5686e+00  3e-04  8e-09  3e-14
15: -6.5685e+00 -6.5685e+00  4e-05  1e-09  3e-14
16: -6.5685e+00 -6.5685e+00  9e-07  2e-11  3e-14
Optimal solution found.
delta obj ratio: 1.78e+05

Iteration 10...
Linearizing constraints...
Computing s

 4: -6.2100e+00 -7.4487e+00  2e+00  8e-03  3e-14
 5: -6.2996e+00 -7.1079e+00  1e+00  5e-03  3e-14
 6: -6.3437e+00 -6.8608e+00  6e-01  1e-03  3e-14
 7: -6.4323e+00 -6.6087e+00  2e-01  3e-04  3e-14
 8: -6.4654e+00 -6.5399e+00  9e-02  1e-04  3e-14
 9: -6.4747e+00 -6.5237e+00  6e-02  7e-05  3e-14
10: -6.4873e+00 -6.5017e+00  2e-02  2e-05  3e-14
11: -6.4917e+00 -6.4948e+00  4e-03  3e-06  3e-14
12: -6.4927e+00 -6.4933e+00  7e-04  5e-07  3e-14
13: -6.4929e+00 -6.4930e+00  1e-04  9e-08  3e-14
14: -6.4930e+00 -6.4930e+00  2e-05  9e-09  3e-14
15: -6.4930e+00 -6.4930e+00  3e-07  1e-10  3e-14
Optimal solution found.
delta obj ratio: 1.60e+05

Iteration 18...
Linearizing constraints...
Computing slacks...
Linearizing...
Solving QP...
     pcost       dcost       gap    pres   dres
 0: -7.0792e+02 -2.3764e+01  1e+04  1e+02  2e-12
 1: -2.2827e+01 -2.3578e+01  3e+02  3e+00  2e-12
 2: -7.7253e+00 -1.9431e+01  4e+01  3e-01  2e-13
 3: -6.5305e+00 -1.3170e+01  1e+01  7e-02  6e-14
 4: -6.4111e+00 -7.7909e+



NameError: name 'time_ep' is not defined

### mi-Net

In [None]:
import sys
import time
from random import shuffle
import numpy as np
import argparse

from keras.models import Model
from keras.optimizers import SGD
from keras.regularizers import l2
from keras.layers import Input, Dense, Layer, Dropout

from mil_nets.dataset import load_dataset
from mil_nets.layer import Score_pooling
from mil_nets.metrics import bag_accuracy
from mil_nets.objectives import bag_loss
from mil_nets.utils import convertToBatch

In [None]:
def test_eval(model, test_set):
    """Evaluate on testing set.
    Parameters
    -----------------
    model : keras.engine.training.Model object
        The training mi-Net model.
    test_set : list
        A list of testing set contains all training bags features and labels.
    Returns
    -----------------
    test_loss : float
        Mean loss of evaluating on testing set.
    test_acc : float
        Mean accuracy of evaluating on testing set.
    """
    num_test_batch = len(test_set)
    test_loss = np.zeros((num_test_batch, 1), dtype=np.float32)
    test_acc = np.zeros((num_test_batch, 1), dtype=np.float32)
    for ibatch, batch in enumerate(test_set):
        result = model.test_on_batch({'input':batch[0].astype(np.float32)}, {'sp':batch[1].astype(np.float32)})
        test_loss[ibatch] = result[0]
        test_acc[ibatch] = result[1]
    return np.mean(test_loss), np.mean(test_acc)

def train_eval(model, train_set):
    """Evaluate on training set.
    Parameters
    -----------------
    model : keras.engine.training.Model object
        The training mi-Net model.
    train_set : list
        A list of training set contains all training bags features and labels.
    Returns
    -----------------
    test_loss : float
        Mean loss of evaluating on traing set.
    test_acc : float
        Mean accuracy of evaluating on testing set.
    """
    num_train_batch = len(train_set)
    train_loss = np.zeros((num_train_batch, 1), dtype=np.float32)
    train_acc = np.zeros((num_train_batch, 1), dtype=np.float32)
    shuffle(train_set)
    for ibatch, batch in enumerate(train_set):
        result = model.train_on_batch({'input':batch[0].astype(np.float32)}, {'sp':batch[1].astype(np.float32)})
        train_loss[ibatch] = result[0]
        train_acc[ibatch] = result[1]
    return np.mean(train_loss), np.mean(train_acc)

def mi_Net(dataset):
    weight_decay=0.005
    init_lr=5e-4
    pooling_mode='max'
    momentum=0.9
    max_epoch=50
    """Train and evaluate on mi-Net.
    Parameters
    -----------------
    dataset : dict
        A dictionary contains all dataset information. We split train/test by keys.
    Returns
    -----------------
    test_acc : float
        Testing accuracy of mi-Net.
    """
    # load data and convert type
    train_bags = dataset['train']
    test_bags = dataset['test']

    # convert bag to batch
    train_set = convertToBatch(train_bags)
    test_set = convertToBatch(test_bags)
    dimension = train_set[0][0].shape[1]

    # data: instance feature, n*d, n = number of training instance
    data_input = Input(shape=(dimension,), dtype='float32', name='input')

    # fully-connected
    fc1 = Dense(256, activation='relu', kernel_regularizer=l2(weight_decay))(data_input)
    fc2 = Dense(128, activation='relu', kernel_regularizer=l2(weight_decay))(fc1)
    fc3 = Dense(64, activation='relu', kernel_regularizer=l2(weight_decay))(fc2)

    # dropout
    dropout = Dropout(rate=0.5)(fc3)

    # score pooling
    sp = Score_pooling(output_dim=1, kernel_regularizer=l2(weight_decay), pooling_mode=pooling_mode, name='sp')(dropout)

    model = Model(inputs=[data_input], outputs=[sp])
    sgd = SGD(lr=init_lr, decay=1e-4, momentum=momentum, nesterov=True)
    model.compile(loss=bag_loss, optimizer=sgd, metrics=[bag_accuracy])

    # train model
    t1 = time.time()
    num_batch = len(train_set)
    for epoch in range(max_epoch):
        train_loss, train_acc = train_eval(model, train_set)
        test_loss, test_acc = test_eval(model, test_set)
        print('epoch=', epoch, '  train_loss= {:.3f}'.format(train_loss), '  train_acc= {:.3f}'.format(train_acc), '  test_loss={:.3f}'.format(test_loss), '  test_acc= {:.3f}'.format(test_acc))
    t2 = time.time()
    print('run time:', (t2-t1) / 60.0, 'min')
    print('test_acc={:.3f}'.format(test_acc))

    return test_acc


In [None]:
# perform five times 10-fold cross-validation experiments
run = 5
n_folds = 10
acc = np.zeros((run, n_folds), dtype=np.float32)
for irun in range(run):
    dataset = load_dataset('musk1', n_folds)
    for ifold in range(n_folds):
        print('run=', irun, '  fold=', ifold)
        acc[irun][ifold] = mi_Net(dataset[ifold])
print('mi-net mean accuracy = ', np.mean(acc))
print('std = ', np.std(acc))