# Machine Learning project


In [None]:
pip install -r requirements.txt

## MI-net

In [None]:
import numpy as np
import sys
import time
import random
from random import shuffle
import argparse

from keras.models import Model
from keras.optimizers import SGD
from keras.regularizers import l2
from keras.layers import Input, Dense, Layer, Dropout

from mil_nets.dataset import load_dataset
from mil_nets.layer import Feature_pooling
from mil_nets.metrics import bag_accuracy
from mil_nets.objectives import bag_loss
from mil_nets.utils import convertToBatch

In [None]:
def test_eval(model, test_set):
    """Evaluate on testing set.
    Parameters
    -----------------
    model : keras.engine.training.Model object
        The training MI-Net model.
    test_set : list
        A list of testing set contains all training bags features and labels.
    Returns
    -----------------
    test_loss : float
        Mean loss of evaluating on testing set.
    test_acc : float
        Mean accuracy of evaluating on testing set.
    """
    num_test_batch = len(test_set)
    result = []
    for ibatch, batch in enumerate(test_set):
        predicted = model.predict_on_batch({'input':batch[0].astype(np.float32)})[0]

        act = list(batch[1].astype(np.float32))
        if predicted > 0.5:
            result += [1]
        else:
            result += [-1]
    return result

def train_eval(model, train_set):
    """Evaluate on training set.
    Parameters
    -----------------
    model : keras.engine.training.Model object
        The training MI-Net model.
    train_set : list
        A list of training set contains all training bags features and labels.
    Returns
    -----------------
    test_loss : float
        Mean loss of evaluating on traing set..astype(np.float32)
    test_acc : float
        Mean accuracy of evaluating on testing set.
    """
    num_train_batch = len(train_set)
    train_loss = np.zeros((num_train_batch, 1), dtype=np.float32)
    train_acc = np.zeros((num_train_batch, 1), dtype=np.float32)
    shuffle(train_set)
    for ibatch, batch in enumerate(train_set):
        result = model.train_on_batch({'input':batch[0].astype(np.float32)}, {'fp':batch[1].astype(np.float32)})
        train_loss[ibatch] = result[0]
        train_acc[ibatch][0] = result[1]
    return np.mean(train_loss), np.mean(train_acc)

def MI_Net(X_train, X_test,y_train, y_test,fold):
    """Train and evaluate on MI-Net.
    Parameters
    -----------------
    dataset : dict
        A dictionary contains all dataset information. We split train/test by keys.
    Returns
    -----------------
    test_acc : float
        Testing accuracy of MI-Net.
    """
    weight_decay=0.005
    init_lr=5e-4
    pooling_mode='max'
    momentum=0.9
    max_epoch=50
    # load data and convert type
    train_set = []
    test_set = []
    
    batch_num = len(X_train)
    for ibag, bag in enumerate(X_train):
        batch_data = bag
        batch_label = np.array([y_train[ibag]]*len(bag))
        train_set.append((batch_data, batch_label))
        
    batch_num = len(X_test)
    for ibag, bag in enumerate(X_test):
        batch_data = bag
        batch_label = np.array([y_train[ibag]]*len(bag))
        test_set.append((batch_data, batch_label))
        
    dimension = train_set[0][0].shape[1]

    # data: instance feature, n*d, n = number of training instance
    data_input = Input(shape=(dimension,), dtype='float32', name='input')

    # fully-connected
    fc1 = Dense(256, activation='relu', kernel_regularizer=l2(weight_decay))(data_input)
    fc2 = Dense(128, activation='relu', kernel_regularizer=l2(weight_decay))(fc1)
    fc3 = Dense(64, activation='relu', kernel_regularizer=l2(weight_decay))(fc2)

    # dropout
    dropout = Dropout(rate=0.5)(fc3)

    # features pooling
    fp = Feature_pooling(output_dim=1, kernel_regularizer=l2(weight_decay), pooling_mode=pooling_mode, name='fp')(dropout)

    model = Model(inputs=[data_input], outputs=[fp])
    sgd = SGD(learning_rate=init_lr, decay=1e-4, momentum=momentum, nesterov=True)
    model.compile(loss=bag_loss, optimizer=sgd, metrics=[bag_accuracy])

    # train model
    start = time.time()
    predicted = []
    for epoch in range(max_epoch):
        print("epoch",epoch)
        train_loss, train_acc = train_eval(model, train_set)
        predicted = test_eval(model, test_set)
    time_ep = time.time() - start
    result("MI-Net", predicted,y_test, time_ep, fold, "musk1")


In [None]:
for dataset in ['fox','mutagenesis-atoms','mutagenesis-bonds','mutagenesis-chains','eastWest','elephant','tiger','westEast','musk1']:
    example_set = parse_c45(dataset)

    # Get stats to normalize data
    raw_data = np.array(example_set.to_float())
    data_mean = np.average(raw_data, axis=0)
    data_std  = np.std(raw_data, axis=0)
    data_std[np.nonzero(data_std == 0.0)] = 1.0
    def normalizer(ex):
        ex = np.array(ex)
        normed = ((ex - data_mean) / data_std)
        if dataset == "musk1":
            normed[2:-1]
        return normed[1:-1]


    # Group examples into bags
    bagset = bag_set(example_set)

    # Convert bags to NumPy arrays
    bags = [np.array(b.to_float(normalizer)) for b in bagset]
    labels = np.array([b.label for b in bagset], dtype=float)
    # Convert 0/1 labels to -1/1 labels
    labels = 2 * labels - 1

    # perform five times 10-fold cross-validation experiments
    run = 5
    n_folds = 5

    labels = np.array(labels,dtype=int)

    bags = np.array(bags,dtype=object)

    for irun in range(run):
        fold = StratifiedKFold(n_splits=5, shuffle=False, random_state=None)
        splittt = 1
        for train_index, test_index in fold.split(bags,labels):
            X_train, X_test = bags[train_index], bags[test_index]
            y_train, y_test = labels[train_index], labels[test_index]
            print('run=', irun, '  fold=', splittt)
            MI_Net(X_train, X_test,y_train, y_test,splittt)          
            splittt += 1


In [None]:
from loader import parse_c45, bag_set
from __future__ import print_function, division
from sklearn.model_selection import StratifiedKFold
from score import result
import matplotlib.pyplot as plt
from pathlib import Path
import numpy as np
import time


# MI-SVM and mi-SVM

In [2]:
import misvm
import time
from loader import parse_c45, bag_set
from score import result
from __future__ import print_function, division
import numpy as np
from sklearn.model_selection import StratifiedKFold


In [None]:
# Load list of C4.5 Examples
#'fox','mutagenesis-atoms','mutagenesis-bonds',
for dataset in ['mutagenesis-chains','eastWest','elephant','tiger','westEast', 'musk1']:
    example_set = parse_c45(dataset)


    # Get stats to normalize data
    raw_data = np.array(example_set.to_float())
    data_mean = np.average(raw_data, axis=0)
    data_std  = np.std(raw_data, axis=0)
    data_std[np.nonzero(data_std == 0.0)] = 1.0
    def normalizer(ex):
        ex = np.array(ex)
        normed = ((ex - data_mean) / data_std)
        # The ...[:, 2:-1] removes first two columns and last column,
        # which are the bag/instance ids and class label, as part of the
        # normalization process
        if dataset == "musk1":
            normed[2:-1]
        return normed[1:-1]


    # Group examples into bags
    bagset = bag_set(example_set)

    # Convert bags to NumPy arrays
    bags = [np.array(b.to_float(normalizer)) for b in bagset]
    labels = np.array([b.label for b in bagset], dtype=float)
    # Convert 0/1 labels to -1/1 labels
    labels = 2 * labels - 1

    # Construct classifiers
    classifiers = {}

    # MISVM   : the MI-SVM algorithm of Andrews, Tsochantaridis, & Hofmann (2002)
    # miSVM   : the mi-SVM algorithm of Andrews, Tsochantaridis, & Hofmann (2002)

    # sMIL    : sparse MIL (Bunescu & Mooney, 2007)
    # stMIL   : sparse, transductive  MIL (Bunescu & Mooney, 2007)
    classifiers['MISVM'] = misvm.MISVM()
    classifiers['miSVM'] = misvm.miSVM()
    classifiers['MissSVM'] = misvm.MissSVM(kernel='linear', C=1.0, max_iters=20)
    classifiers['sbMIL'] = misvm.sbMIL(kernel='linear', eta=0.1, C=1e2)
    classifiers['SIL'] = misvm.SIL(kernel='linear', C=1.0)
    classifiers['STK'] = misvm.STK(kernel='linear', C=1.0)
    classifiers['NSK'] = misvm.NSK(kernel='linear', C=1.0)
    classifiers['MICA'] = misvm.MICA(kernel='linear')

    # Train/Evaluate classifiers
    accuracies = {}

    bags = np.array(bags,dtype=object)
    labels = np.array(labels,dtype=int)
    fold = StratifiedKFold(n_splits=5, shuffle=False, random_state=None)
    for algorithm, classifier in classifiers.items():
        nums = 1
        start = time.time()
        for train_index, test_index in fold.split(bags,labels):
            X_train, X_test = bags[train_index], bags[test_index]
            y_train, y_test = labels[train_index], labels[test_index]

            classifier.fit(X_train, y_train)
            predictions = classifier.predict(X_test)
            for i, pred in enumerate(predictions):
                if pred < 0:
                    predictions[i] = -1
                else:
                    predictions[i] = 1
            time_ep = time.time() - start
            print(algorithm, dataset)
            print(y_test)
            print(predictions)
            result(algorithm, y_test, predictions, time_ep, nums, dataset)
            accuracies[algorithm + " " + str(nums)] = {"acc":np.average(y_test == np.sign(predictions)),"kfold":nums}
            nums+=1

    for algorithm, item in accuracies.items():
        print('\n%s, fold:%s Accuracy: %.f%%' % (algorithm,str(item["kfold"]), 100 * item["acc"]))

Non-random start...

Iteration 1...
Training SVM...
     pcost       dcost       gap    pres   dres
 0: -8.9058e+01 -4.5925e-01  4e+03  7e+01  1e-14
 1: -3.9588e+00 -4.2201e-01  2e+02  3e+00  1e-14
 2: -4.5934e-01 -3.2670e-01  1e+01  3e-01  2e-15
 3: -1.1535e-01 -2.9474e-01  7e-01  9e-03  2e-15
 4: -9.3349e-02 -1.6165e-01  7e-02  8e-05  6e-15
 5: -9.3709e-02 -1.0992e-01  2e-02  1e-06  2e-15
 6: -9.4368e-02 -9.6671e-02  2e-03  1e-07  7e-15
 7: -9.4555e-02 -9.5221e-02  7e-04  6e-17  1e-13
 8: -9.4600e-02 -9.4679e-02  8e-05  9e-18  2e-13
 9: -9.4606e-02 -9.4626e-02  2e-05  8e-18  1e-12
10: -9.4607e-02 -9.4613e-02  6e-06  8e-18  8e-12
11: -9.4607e-02 -9.4608e-02  7e-07  4e-17  2e-11
12: -9.4607e-02 -9.4607e-02  2e-08  2e-17  6e-11
Optimal solution found.
Recomputing classes...
Selector differences: 1057
Updating QP...

Iteration 2...
Training SVM...
     pcost       dcost       gap    pres   dres
 0: -8.1080e+01 -3.5107e-01  4e+03  7e+01  3e-14
 1: -1.7569e+00 -3.4638e-01  7e+01  1e+00  3e

 8: -9.4234e-02 -9.4939e-02  7e-04  1e-07  2e-13
 9: -9.4416e-02 -9.4693e-02  3e-04  1e-08  8e-13
10: -9.4499e-02 -9.4569e-02  7e-05  2e-09  1e-12
11: -9.4524e-02 -9.4547e-02  2e-05  4e-10  6e-12
12: -9.4534e-02 -9.4540e-02  6e-06  8e-18  9e-12
13: -9.4537e-02 -9.4539e-02  2e-06  3e-17  1e-11
14: -9.4538e-02 -9.4539e-02  6e-07  3e-18  7e-11
15: -9.4539e-02 -9.4539e-02  1e-07  4e-17  9e-11
16: -9.4539e-02 -9.4539e-02  3e-08  3e-17  2e-10
Optimal solution found.
Recomputing classes...
Selector differences: 0
MISVM mutagenesis-chains
[ 1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]
[-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1.]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Non-random start...

Iteration 1...
Training SVM...
     pcost       dcost       gap    pres   dres
 0: -9.0275e+01 -4.2564e-01  4e+03  7e+01  1e-14
 1: -3.4590e+00 -3.7104e-01  1e+02  3e+00  1e-14
 2: -4.2718e-01 -3.0532e-01  1e+01  2e-01  2e-15
 3: -1.0991e-01 -2.7595e-01  6e-01  9e-03  2e-15
 4: -8.8570e-02 -1.5347e-01  7e-02  1e-04  1e-14
 5: -8.8731e-02 -1.0506e-01  2e-02  6e-18  2e-15
 6: -8.9316e-02 -9.1675e-02  2e-03  3e-18  6e-15
 7: -8.9480e-02 -8.9953e-02  5e-04  1e-17  7e-14
 8: -8.9521e-02 -8.9587e-02  7e-05  2e-17  4e-13
 9: -8.9524e-02 -8.9536e-02  1e-05  5e-17  1e-12
10: -8.9525e-02 -8.9529e-02  3e-06  4e-17  2e-11
11: -8.9526e-02 -8.9526e-02  2e-07  2e-18  1e-11
12: -8.9526e-02 -8.9526e-02  2e-09  2e-17  7e-11
Optimal solution found.
Recomputing classes...
Selector differences: 1117
Updating QP...

Iteration 2...
Training SVM...
     pcost       dcost       gap    pres   dres
 0: -8.2127e+01 -3.7545e-01  4e+03  7e+01  2e-14
 1: -2.1173e+00 -3.6351e-01  9e+01  2e+00  3e

 9: -8.9373e-02 -8.9713e-02  3e-04  3e-08  7e-13
10: -8.9455e-02 -8.9543e-02  9e-05  5e-09  1e-12
11: -8.9477e-02 -8.9520e-02  4e-05  4e-10  5e-12
12: -8.9490e-02 -8.9507e-02  2e-05  8e-11  4e-12
13: -8.9498e-02 -8.9501e-02  3e-06  7e-12  1e-11
14: -8.9499e-02 -8.9500e-02  1e-06  3e-12  3e-11
15: -8.9499e-02 -8.9500e-02  6e-07  2e-13  3e-10
16: -8.9500e-02 -8.9500e-02  2e-07  5e-14  2e-10
17: -8.9500e-02 -8.9500e-02  8e-08  1e-14  2e-10
Optimal solution found.
Recomputing classes...
Selector differences: 14
Updating QP...

Iteration 9...
Training SVM...
     pcost       dcost       gap    pres   dres
 0: -8.3431e+01 -3.1357e-01  4e+03  7e+01  3e-14
 1: -1.5816e+00 -3.1175e-01  7e+01  1e+00  3e-14
 2: -1.4972e-01 -2.9684e-01  3e+00  4e-02  2e-15
 3: -9.3076e-02 -2.2438e-01  3e-01  3e-03  3e-15
 4: -8.7906e-02 -1.4671e-01  8e-02  7e-04  2e-14
 5: -8.6791e-02 -1.0830e-01  2e-02  2e-17  1e-14
 6: -8.8181e-02 -9.2827e-02  5e-03  6e-17  1e-14
 7: -8.8744e-02 -9.1561e-02  3e-03  9e-18  1e-13




     pcost       dcost       gap    pres   dres
 0: -8.4385e+01 -3.0697e-01  4e+03  7e+01  6e-14
 1: -1.0488e+00 -3.0564e-01  4e+01  8e-01  8e-14
 2: -1.2293e-01 -2.8866e-01  1e+00  2e-02  3e-15
 3: -8.4732e-02 -1.8019e-01  1e-01  1e-04  7e-15
 4: -8.6303e-02 -1.0165e-01  2e-02  2e-05  2e-14
 5: -8.8576e-02 -9.2107e-02  4e-03  4e-07  9e-14
 6: -8.9229e-02 -9.0104e-02  9e-04  6e-08  8e-14
 7: -8.9451e-02 -8.9647e-02  2e-04  5e-09  3e-13
 8: -8.9508e-02 -8.9555e-02  5e-05  4e-17  3e-12
 9: -8.9522e-02 -8.9531e-02  9e-06  5e-17  4e-12
10: -8.9525e-02 -8.9527e-02  2e-06  1e-17  6e-11
11: -8.9525e-02 -8.9526e-02  4e-07  6e-18  1e-10
12: -8.9525e-02 -8.9526e-02  4e-07  6e-18  2e-10
13: -8.9525e-02 -8.9526e-02  3e-07  2e-17  2e-10
14: -8.9525e-02 -8.9526e-02  2e-07  2e-18  2e-10
15: -8.9525e-02 -8.9526e-02  6e-08  4e-17  3e-10
Optimal solution found.
Recomputing classes...
Selector differences: 52
Updating QP...

Iteration 12...
Training SVM...
     pcost       dcost       gap    pres   dres


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Non-random start...

Iteration 1...
Training SVM...
     pcost       dcost       gap    pres   dres
 0: -8.9590e+01 -4.8006e-01  4e+03  7e+01  1e-14
 1: -3.9313e+00 -4.3627e-01  2e+02  3e+00  1e-14
 2: -5.1801e-01 -3.2158e-01  2e+01  3e-01  2e-15
 3: -1.1562e-01 -2.9466e-01  7e-01  1e-02  2e-15
 4: -9.2246e-02 -1.6257e-01  7e-02  3e-17  7e-15
 5: -9.2855e-02 -1.0531e-01  1e-02  8e-18  3e-15
 6: -9.3386e-02 -9.5859e-02  2e-03  2e-17  9e-15
 7: -9.3546e-02 -9.4782e-02  1e-03  2e-17  3e-13
 8: -9.3619e-02 -9.3774e-02  2e-04  2e-18  7e-14
 9: -9.3631e-02 -9.3659e-02  3e-05  4e-17  8e-13
10: -9.3632e-02 -9.3639e-02  7e-06  1e-17  4e-12
11: -9.3633e-02 -9.3634e-02  9e-07  2e-17  1e-11
12: -9.3633e-02 -9.3633e-02  2e-08  3e-17  4e-11
Optimal solution found.
Recomputing classes...
Selector differences: 1068
Updating QP...

Iteration 2...
Training SVM...
     pcost       dcost       gap    pres   dres
 0: -8.1650e+01 -3.4624e-01  4e+03  7e+01  3e-14
 1: -1.7600e+00 -3.4178e-01  7e+01  1e+00  3e



 1: -9.7843e-01 -3.2565e-01  4e+01  7e-01  3e-14
 2: -1.4778e-01 -3.0689e-01  2e+00  4e-02  2e-15
 3: -9.4803e-02 -2.2224e-01  2e-01  2e-03  4e-15
 4: -9.0070e-02 -1.4749e-01  7e-02  3e-04  3e-14
 5: -9.1141e-02 -1.0409e-01  1e-02  6e-05  6e-15
 6: -9.2430e-02 -9.6297e-02  4e-03  3e-17  4e-14
 7: -9.3155e-02 -9.4423e-02  1e-03  8e-17  1e-13
 8: -9.3442e-02 -9.3725e-02  3e-04  4e-17  4e-13
 9: -9.3518e-02 -9.3633e-02  1e-04  1e-17  2e-12
10: -9.3552e-02 -9.3595e-02  4e-05  5e-17  2e-12
11: -9.3568e-02 -9.3585e-02  2e-05  6e-17  7e-12
12: -9.3576e-02 -9.3580e-02  3e-06  2e-17  9e-12
13: -9.3578e-02 -9.3579e-02  8e-07  6e-18  1e-10
14: -9.3579e-02 -9.3579e-02  2e-07  8e-18  7e-11
15: -9.3579e-02 -9.3579e-02  3e-08  8e-18  4e-10
Optimal solution found.
Recomputing classes...
Selector differences: 9
Updating QP...

Iteration 8...
Training SVM...
     pcost       dcost       gap    pres   dres
 0: -8.0832e+01 -3.2814e-01  4e+03  7e+01  3e-14
 1: -1.0324e+00 -3.2773e-01  4e+01  8e-01  2e-14
 




Iteration 10...
Training SVM...
     pcost       dcost       gap    pres   dres
 0: -8.2549e+01 -3.2350e-01  4e+03  7e+01  3e-14
 1: -1.0997e+00 -3.2308e-01  4e+01  8e-01  3e-14
 2: -1.5100e-01 -3.0663e-01  2e+00  4e-02  2e-15
 3: -9.5158e-02 -2.2446e-01  2e-01  2e-03  3e-15
 4: -9.0726e-02 -1.5693e-01  9e-02  7e-04  2e-14
 5: -9.0472e-02 -1.0588e-01  2e-02  6e-18  5e-15
 6: -9.2196e-02 -9.7121e-02  5e-03  3e-17  2e-14
 7: -9.3036e-02 -9.4776e-02  2e-03  4e-17  9e-14
 8: -9.3381e-02 -9.3822e-02  4e-04  3e-17  3e-13
 9: -9.3445e-02 -9.3736e-02  3e-04  3e-17  5e-13
10: -9.3528e-02 -9.3613e-02  9e-05  2e-17  9e-13
11: -9.3559e-02 -9.3588e-02  3e-05  2e-18  5e-12
12: -9.3570e-02 -9.3580e-02  1e-05  1e-17  4e-12
13: -9.3574e-02 -9.3578e-02  4e-06  2e-17  1e-11
14: -9.3576e-02 -9.3577e-02  7e-07  2e-18  3e-11
15: -9.3577e-02 -9.3577e-02  1e-07  3e-17  1e-10
16: -9.3577e-02 -9.3577e-02  4e-08  5e-17  9e-10
Optimal solution found.
Recomputing classes...
Selector differences: 15
Updating QP...

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Non-random start...

Iteration 1...
Training SVM...
     pcost       dcost       gap    pres   dres
 0: -9.0553e+01 -4.5905e-01  4e+03  7e+01  1e-14
 1: -4.0304e+00 -4.1761e-01  2e+02  3e+00  1e-14
 2: -4.5943e-01 -3.1808e-01  1e+01  3e-01  2e-15
 3: -1.1353e-01 -2.8906e-01  7e-01  9e-03  2e-15
 4: -9.1983e-02 -1.5885e-01  7e-02  3e-05  7e-15
 5: -9.2475e-02 -1.0779e-01  2e-02  3e-07  3e-15
 6: -9.3088e-02 -9.5458e-02  2e-03  4e-08  6e-15
 7: -9.3266e-02 -9.3916e-02  6e-04  5e-17  6e-14
 8: -9.3313e-02 -9.3403e-02  9e-05  4e-17  2e-13
 9: -9.3320e-02 -9.3334e-02  1e-05  4e-17  1e-12
10: -9.3321e-02 -9.3326e-02  5e-06  2e-17  1e-11
11: -9.3321e-02 -9.3322e-02  6e-07  4e-17  1e-11
12: -9.3321e-02 -9.3321e-02  1e-08  4e-17  7e-11
Optimal solution found.
Recomputing classes...
Selector differences: 1093
Updating QP...

Iteration 2...
Training SVM...
     pcost       dcost       gap    pres   dres
 0: -8.3891e+01 -3.2809e-01  4e+03  7e+01  4e-14
 1: -1.6279e+00 -3.2548e-01  7e+01  1e+00  3e

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Non-random start...

Iteration 1...
Training SVM...
     pcost       dcost       gap    pres   dres
 0: -9.1085e+01 -4.6640e-01  4e+03  7e+01  1e-14
 1: -4.0247e+00 -4.2284e-01  2e+02  3e+00  1e-14
 2: -4.9984e-01 -3.1496e-01  2e+01  3e-01  2e-15
 3: -1.1432e-01 -2.8922e-01  7e-01  1e-02  2e-15
 4: -9.1643e-02 -1.6060e-01  7e-02  5e-17  7e-15
 5: -9.2234e-02 -1.0400e-01  1e-02  5e-17  3e-15
 6: -9.2751e-02 -9.5165e-02  2e-03  1e-17  8e-15
 7: -9.2898e-02 -9.4114e-02  1e-03  3e-17  2e-13
 8: -9.2967e-02 -9.3129e-02  2e-04  2e-17  8e-14
 9: -9.2979e-02 -9.3001e-02  2e-05  4e-18  7e-13
10: -9.2980e-02 -9.2985e-02  5e-06  3e-17  6e-12
11: -9.2981e-02 -9.2982e-02  2e-06  2e-18  5e-11
12: -9.2981e-02 -9.2981e-02  1e-07  3e-17  4e-11
Optimal solution found.
Recomputing classes...
Selector differences: 1097
Updating QP...

Iteration 2...
Training SVM...
     pcost       dcost       gap    pres   dres
 0: -8.4100e+01 -3.7197e-01  4e+03  7e+01  4e-14
 1: -1.4317e+00 -3.4147e-01  6e+01  1e+00  3e



Recomputing classes...
Selector differences: 2
Updating QP...

Iteration 10...
Training SVM...
     pcost       dcost       gap    pres   dres
 0: -9.2780e-02 -9.2780e-02  7e-09  3e-17  3e-02
MISVM mutagenesis-chains
[ 1  1  1  1  1  1  1  1  1  1  1  1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
  1  1  1  1  1  1  1  1  1  1  1  1  1]
[-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1.]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Non-random start...

Iteration 1...
Training SVM...
     pcost       dcost       gap    pres   dres
 0: -1.3265e+03 -2.3628e+00  3e+04  2e+02  5e-13
 1: -1.6253e+01 -2.3606e+00  4e+02  2e+00  5e-13
 2: -1.2110e+00 -2.2931e+00  2e+01  1e-01  2e-14
 3: -4.7385e-01 -1.8707e+00  2e+00  4e-03  2e-15
 4: -4.3942e-01 -6.6635e-01  2e-01  4e-04  2e-15
 5: -4.4023e-01 -5.2741e-01  9e-02  7e-05  2e-15
 6: -4.4446e-01 -4.6988e-01  3e-02  2e-05  2e-15
 7: -4.4588e-01 -4.5307e-01  7e-03  3e-06  2e-15
 8: -4.4598e-01 -4.5214e-01  6e-03  2e-06  1e-15
 9: -4.4639e-01 -4.4897e-01  3e-03  6e-07  1e-15
10: -4.4660e-01 -4.4747e-01  9e-04  1e-07  2e-15
11: -4.4667e-01 -4.4709e-01  4e-04  6e-08  1e-15
12: -4.4671e-01 -4.4687e-01  2e-04  1e-08  2e-15
13: -4.4672e-01 -4.4678e-01  6e-05  4e-09  2e-15
14: -4.4673e-01 -4.4675e-01  2e-05  9e-10  2e-15
15: -4.4673e-01 -4.4674e-01  7e-06  2e-16  2e-15
16: -4.4673e-01 -4.4674e-01  3e-06  7e-17  2e-15
17: -4.4673e-01 -4.4673e-01  5e-07  1e-17  2e-15
18: -4.4673e-01 -4

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Non-random start...

Iteration 1...
Training SVM...
     pcost       dcost       gap    pres   dres
 0: -1.4073e+03 -2.3406e+00  3e+04  2e+02  5e-13
 1: -1.5964e+01 -2.3395e+00  3e+02  2e+00  5e-13
 2: -1.0775e+00 -2.2877e+00  1e+01  7e-02  2e-14
 3: -4.8064e-01 -1.7695e+00  2e+00  2e-03  2e-15
 4: -4.6035e-01 -7.2431e-01  3e-01  4e-04  2e-15
 5: -4.5860e-01 -5.2776e-01  7e-02  4e-05  2e-15
 6: -4.6285e-01 -4.7369e-01  1e-02  3e-16  2e-15
 7: -4.6395e-01 -4.6722e-01  3e-03  5e-16  2e-15
 8: -4.6423e-01 -4.6569e-01  1e-03  5e-17  1e-15
 9: -4.6441e-01 -4.6468e-01  3e-04  8e-16  2e-15
10: -4.6444e-01 -4.6455e-01  1e-04  4e-16  1e-15
11: -4.6445e-01 -4.6449e-01  4e-05  6e-17  2e-15
12: -4.6446e-01 -4.6449e-01  3e-05  6e-16  1e-15
13: -4.6446e-01 -4.6447e-01  1e-05  3e-16  2e-15
14: -4.6446e-01 -4.6446e-01  3e-06  2e-17  2e-15
15: -4.6446e-01 -4.6446e-01  1e-06  1e-16  2e-15
16: -4.6446e-01 -4.6446e-01  4e-08  4e-16  2e-15
Optimal solution found.
Recomputing classes...
Class Changes: 7

It

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Non-random start...

Iteration 1...
Training SVM...
     pcost       dcost       gap    pres   dres
 0: -1.3127e+03 -2.4191e+00  3e+04  2e+02  5e-13
 1: -1.4672e+01 -2.4179e+00  4e+02  2e+00  5e-13
 2: -1.0945e+00 -2.3603e+00  2e+01  8e-02  2e-14
 3: -4.6507e-01 -1.8662e+00  2e+00  2e-03  2e-15
 4: -4.4433e-01 -7.0543e-01  3e-01  4e-04  2e-15
 5: -4.4592e-01 -5.1372e-01  7e-02  7e-05  2e-15
 6: -4.4610e-01 -4.8410e-01  4e-02  1e-05  2e-15
 7: -4.4905e-01 -4.5651e-01  8e-03  3e-06  2e-15
 8: -4.4954e-01 -4.5222e-01  3e-03  4e-08  2e-15
 9: -4.4983e-01 -4.5052e-01  7e-04  1e-08  2e-15
10: -4.4990e-01 -4.5021e-01  3e-04  2e-09  1e-15
11: -4.4993e-01 -4.5005e-01  1e-04  8e-10  1e-15
12: -4.4993e-01 -4.5005e-01  1e-04  7e-10  1e-15
13: -4.4993e-01 -4.5003e-01  1e-04  4e-10  1e-15
14: -4.4994e-01 -4.4998e-01  4e-05  1e-10  1e-15
15: -4.4995e-01 -4.4997e-01  2e-05  6e-11  1e-15
16: -4.4995e-01 -4.4997e-01  2e-05  5e-11  1e-15
17: -4.4995e-01 -4.4997e-01  2e-05  4e-11  1e-15
18: -4.4995e-01 -4

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Non-random start...

Iteration 1...
Training SVM...
     pcost       dcost       gap    pres   dres
 0: -1.3612e+03 -2.3180e+00  3e+04  2e+02  6e-13
 1: -1.5396e+01 -2.3168e+00  3e+02  2e+00  6e-13
 2: -1.1351e+00 -2.2623e+00  2e+01  8e-02  3e-14
 3: -4.7635e-01 -1.7869e+00  2e+00  2e-03  2e-15
 4: -4.5536e-01 -6.9812e-01  3e-01  3e-04  2e-15
 5: -4.5381e-01 -5.1565e-01  6e-02  3e-05  2e-15
 6: -4.5835e-01 -4.6862e-01  1e-02  2e-06  2e-15
 7: -4.5907e-01 -4.6342e-01  4e-03  3e-07  2e-15
 8: -4.5948e-01 -4.6073e-01  1e-03  7e-08  2e-15
 9: -4.5963e-01 -4.5987e-01  2e-04  2e-09  2e-15
10: -4.5966e-01 -4.5973e-01  8e-05  3e-10  2e-15
11: -4.5967e-01 -4.5971e-01  4e-05  1e-10  1e-15
12: -4.5967e-01 -4.5968e-01  9e-06  2e-11  2e-15
13: -4.5967e-01 -4.5967e-01  2e-06  9e-13  2e-15
14: -4.5967e-01 -4.5967e-01  5e-07  7e-16  2e-15
15: -4.5967e-01 -4.5967e-01  4e-08  7e-16  2e-15
Optimal solution found.
Recomputing classes...
Class Changes: 4

Iteration 2...
Training SVM...
     pcost       dco

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Non-random start...

Iteration 1...
Training SVM...
     pcost       dcost       gap    pres   dres
 0: -1.3507e+03 -2.4325e+00  3e+04  2e+02  5e-13
 1: -1.4840e+01 -2.4316e+00  4e+02  2e+00  6e-13
 2: -1.1149e+00 -2.3786e+00  2e+01  8e-02  3e-14
 3: -4.7487e-01 -1.8850e+00  2e+00  2e-03  2e-15
 4: -4.5492e-01 -7.4111e-01  3e-01  4e-04  2e-15
 5: -4.5330e-01 -5.3253e-01  8e-02  3e-05  2e-15
 6: -4.5853e-01 -4.7831e-01  2e-02  3e-06  2e-15
 7: -4.6048e-01 -4.6366e-01  3e-03  3e-07  2e-15
 8: -4.6083e-01 -4.6164e-01  8e-04  3e-08  2e-15
 9: -4.6092e-01 -4.6117e-01  2e-04  7e-09  2e-15
10: -4.6095e-01 -4.6107e-01  1e-04  3e-09  1e-15
11: -4.6096e-01 -4.6101e-01  5e-05  4e-10  1e-15
12: -4.6096e-01 -4.6101e-01  5e-05  4e-10  1e-15
13: -4.6096e-01 -4.6100e-01  4e-05  1e-10  1e-15
14: -4.6097e-01 -4.6099e-01  2e-05  7e-11  1e-15
15: -4.6097e-01 -4.6098e-01  1e-05  4e-11  1e-15
16: -4.6097e-01 -4.6098e-01  7e-06  2e-11  1e-15
17: -4.6097e-01 -4.6097e-01  5e-06  7e-12  1e-15
18: -4.6097e-01 -4

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Non-random start...
     pcost       dcost       gap    pres   dres
 0: -3.7499e+03 -4.4224e+01  5e+04  2e+02  6e-13
 1: -5.6813e+01 -4.3857e+01  6e+02  3e+00  7e-13
 2: -1.7957e+01 -3.4025e+01  6e+01  2e-01  5e-14
 3: -1.6321e+01 -2.6918e+01  2e+01  7e-02  2e-14
 4: -1.5954e+01 -2.4399e+01  1e+01  4e-02  1e-14
 5: -1.6082e+01 -2.1097e+01  7e+00  2e-02  6e-15
 6: -1.6557e+01 -1.7812e+01  2e+00  4e-03  5e-15
 7: -1.6614e+01 -1.7474e+01  1e+00  2e-03  4e-15
 8: -1.6702e+01 -1.7122e+01  5e-01  7e-04  4e-15
 9: -1.6724e+01 -1.7034e+01  3e-01  3e-04  4e-15
10: -1.6760e+01 -1.6945e+01  2e-01  2e-04  4e-15
11: -1.6764e+01 -1.6929e+01  2e-01  9e-05  4e-15
12: -1.6779e+01 -1.6895e+01  1e-01  3e-05  5e-15
13: -1.6786e+01 -1.6883e+01  1e-01  2e-05  4e-15
14: -1.6796e+01 -1.6864e+01  7e-02  7e-06  5e-15
15: -1.6800e+01 -1.6856e+01  6e-02  5e-06  4e-15
16: -1.6802e+01 -1.6853e+01  5e-02  2e-06  4e-15
17: -1.6811e+01 -1.6840e+01  3e-02  1e-06  4e-15
18: -1.6811e+01 -1.6838e+01  3e-02  1e-06  4e-15
1

13: -1.2941e+01 -1.2959e+01  2e-02  6e-06  9e-15
14: -1.2942e+01 -1.2957e+01  2e-02  4e-06  9e-15
15: -1.2943e+01 -1.2952e+01  1e-02  2e-06  9e-15
16: -1.2943e+01 -1.2952e+01  1e-02  2e-06  9e-15
17: -1.2944e+01 -1.2949e+01  7e-03  1e-06  1e-14
18: -1.2944e+01 -1.2949e+01  7e-03  9e-07  1e-14
19: -1.2944e+01 -1.2948e+01  5e-03  4e-07  1e-14
20: -1.2944e+01 -1.2947e+01  4e-03  2e-07  2e-14
21: -1.2944e+01 -1.2947e+01  4e-03  2e-07  2e-14
22: -1.2944e+01 -1.2946e+01  3e-03  8e-08  3e-14
23: -1.2944e+01 -1.2946e+01  2e-03  4e-08  4e-14
24: -1.2945e+01 -1.2946e+01  2e-03  3e-08  5e-14
25: -1.2945e+01 -1.2946e+01  1e-03  2e-08  7e-14
26: -1.2945e+01 -1.2945e+01  6e-04  7e-09  8e-14
27: -1.2945e+01 -1.2945e+01  6e-04  6e-09  1e-13
28: -1.2945e+01 -1.2945e+01  3e-04  3e-09  9e-14
29: -1.2945e+01 -1.2945e+01  3e-04  3e-09  1e-13
30: -1.2945e+01 -1.2945e+01  2e-04  2e-09  1e-13
31: -1.2945e+01 -1.2945e+01  4e-05  2e-10  1e-13
32: -1.2945e+01 -1.2945e+01  2e-05  8e-11  1e-13
33: -1.2945e+01 -1.2

23: -1.2729e+01 -1.2730e+01  3e-04  1e-08  3e-14
24: -1.2729e+01 -1.2730e+01  2e-04  7e-09  3e-14
25: -1.2729e+01 -1.2730e+01  2e-04  4e-09  4e-14
26: -1.2729e+01 -1.2730e+01  2e-04  2e-09  6e-14
27: -1.2730e+01 -1.2730e+01  1e-04  2e-09  8e-14
28: -1.2730e+01 -1.2730e+01  3e-05  2e-11  1e-13
29: -1.2730e+01 -1.2730e+01  2e-05  9e-12  3e-13
30: -1.2730e+01 -1.2730e+01  1e-05  4e-12  1e-12
31: -1.2730e+01 -1.2730e+01  1e-05  2e-12  2e-12
Optimal solution found.
delta obj ratio: 5.37e+04

Iteration 13...
Linearizing constraints...
Computing slacks...
Linearizing...
Solving QP...
     pcost       dcost       gap    pres   dres
 0: -3.7502e+03 -4.3768e+01  5e+04  2e+02  6e-13
 1: -5.7390e+01 -4.3388e+01  6e+02  3e+00  6e-13
 2: -1.7587e+01 -3.3109e+01  6e+01  2e-01  5e-14
 3: -1.4125e+01 -2.1482e+01  2e+01  5e-02  2e-14
 4: -1.3192e+01 -1.4939e+01  3e+00  7e-03  2e-14
 5: -1.3068e+01 -1.3796e+01  1e+00  2e-03  2e-14
 6: -1.3082e+01 -1.3572e+01  7e-01  9e-04  1e-14
 7: -1.3103e+01 -1.3238e+

 7: -1.2756e+01 -1.2757e+01  2e-03  1e-06  7e-15
 8: -1.2756e+01 -1.2756e+01  3e-05  2e-08  7e-15
 9: -1.2756e+01 -1.2756e+01  1e-06  2e-10  7e-15
Optimal solution found.
delta obj ratio: 3.61e+05

Iteration 20...
Linearizing constraints...
Computing slacks...
Linearizing...
Solving QP...
     pcost       dcost       gap    pres   dres
 0: -3.7499e+03 -4.4010e+01  5e+04  2e+02  7e-13
 1: -5.8298e+01 -4.3627e+01  6e+02  3e+00  6e-13
 2: -1.7851e+01 -3.3352e+01  7e+01  3e-01  6e-14
 3: -1.3769e+01 -2.1071e+01  1e+01  4e-02  1e-14
 4: -1.2899e+01 -1.4696e+01  3e+00  7e-03  8e-15
 5: -1.2788e+01 -1.3311e+01  7e-01  2e-03  8e-15
 6: -1.2776e+01 -1.2958e+01  2e-01  5e-04  7e-15
 7: -1.2776e+01 -1.2875e+01  1e-01  2e-04  6e-15
 8: -1.2777e+01 -1.2829e+01  7e-02  8e-05  7e-15
 9: -1.2778e+01 -1.2788e+01  1e-02  1e-05  8e-15
10: -1.2779e+01 -1.2781e+01  3e-03  1e-06  8e-15
11: -1.2779e+01 -1.2779e+01  2e-04  7e-08  8e-15
12: -1.2779e+01 -1.2779e+01  3e-06  7e-10  8e-15
Optimal solution found.
d



MissSVM mutagenesis-chains
[ 1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]
[ 1. -1. -1. -1. -1. -1. -1. -1. -1. -1.  1. -1. -1. -1. -1.  1. -1. -1.
 -1. -1. -1. -1. -1.  1. -1. -1. -1. -1. -1. -1. -1.  1. -1. -1. -1. -1.
 -1. -1.]
Non-random start...
     pcost       dcost       gap    pres   dres
 0: -3.8137e+03 -4.8302e+01  5e+04  2e+02  5e-13
 1: -5.8557e+01 -4.7887e+01  6e+02  3e+00  5e-13
 2: -1.9149e+01 -3.7134e+01  7e+01  2e-01  4e-14
 3: -1.7092e+01 -2.6762e+01  2e+01  4e-02  9e-15
 4: -1.6825e+01 -2.6073e+01  1e+01  3e-02  7e-15
 5: -1.6927e+01 -2.4106e+01  1e+01  2e-02  5e-15
 6: -1.7304e+01 -2.1266e+01  5e+00  7e-03  4e-15
 7: -1.7842e+01 -1.9203e+01  2e+00  2e-03  5e-15
 8: -1.8001e+01 -1.8570e+01  6e-01  6e-04  5e-15
 9: -1.8071e+01 -1.8353e+01  3e-01  7e-05  5e-15
10: -1.8125e+01 -1.8249e+01  1e-01  2e-05  5e-15
11: -1.8149e+01 -1.8210e+01  6e-02  8e-06  5e-15
12: -1.8155e+01 -1.8200e+01  5e-02  4e-06  5

 1: -5.9287e+01 -4.8111e+01  6e+02  3e+00  5e-13
 2: -1.9407e+01 -3.6537e+01  7e+01  2e-01  4e-14
 3: -1.7232e+01 -3.0059e+01  4e+01  1e-01  2e-14
 4: -1.6334e+01 -2.6305e+01  3e+01  8e-02  1e-14
 5: -1.6003e+01 -2.1802e+01  1e+01  3e-02  9e-15
 6: -1.6235e+01 -1.8090e+01  3e+00  8e-03  8e-15
 7: -1.6342e+01 -1.7203e+01  1e+00  3e-03  7e-15
 8: -1.6333e+01 -1.7078e+01  1e+00  2e-03  7e-15
 9: -1.6364e+01 -1.6875e+01  7e-01  4e-04  8e-15
10: -1.6440e+01 -1.6671e+01  3e-01  2e-04  8e-15
11: -1.6477e+01 -1.6576e+01  1e-01  2e-05  9e-15
12: -1.6496e+01 -1.6533e+01  4e-02  6e-06  2e-14
13: -1.6501e+01 -1.6522e+01  3e-02  3e-06  1e-14
14: -1.6503e+01 -1.6519e+01  2e-02  2e-06  1e-14
15: -1.6505e+01 -1.6514e+01  1e-02  2e-07  2e-14
16: -1.6506e+01 -1.6511e+01  6e-03  6e-08  6e-14
17: -1.6507e+01 -1.6511e+01  5e-03  4e-08  2e-13
18: -1.6507e+01 -1.6510e+01  4e-03  2e-08  2e-13
19: -1.6508e+01 -1.6509e+01  2e-03  9e-09  1e-13
20: -1.6508e+01 -1.6509e+01  8e-04  3e-09  2e-13
21: -1.6508e+01 -1.6

delta obj ratio: 6.32e+05

Iteration 12...
Linearizing constraints...
Computing slacks...
Linearizing...
Solving QP...
     pcost       dcost       gap    pres   dres
 0: -3.8129e+03 -4.8148e+01  5e+04  2e+02  5e-13
 1: -5.9859e+01 -4.7721e+01  7e+02  3e+00  5e-13
 2: -1.9282e+01 -3.6709e+01  8e+01  3e-01  5e-14
 3: -1.6907e+01 -2.8328e+01  3e+01  1e-01  2e-14
 4: -1.5721e+01 -1.9978e+01  7e+00  2e-02  7e-15
 5: -1.5689e+01 -1.6875e+01  2e+00  3e-03  6e-15
 6: -1.5717e+01 -1.6364e+01  9e-01  1e-03  5e-15
 7: -1.5748e+01 -1.6084e+01  5e-01  7e-04  6e-15
 8: -1.5765e+01 -1.5971e+01  3e-01  3e-04  6e-15
 9: -1.5776e+01 -1.5905e+01  2e-01  2e-04  6e-15
10: -1.5788e+01 -1.5845e+01  8e-02  6e-05  6e-15
11: -1.5793e+01 -1.5824e+01  4e-02  2e-05  6e-15
12: -1.5797e+01 -1.5806e+01  1e-02  7e-06  6e-15
13: -1.5798e+01 -1.5801e+01  3e-03  1e-06  7e-15
14: -1.5799e+01 -1.5799e+01  7e-04  2e-07  6e-15
15: -1.5799e+01 -1.5799e+01  3e-04  5e-08  6e-15
16: -1.5799e+01 -1.5799e+01  3e-05  1e-09  8e-15


 3: -1.4917e+01 -2.3683e+01  2e+01  4e-02  2e-14
 4: -1.4162e+01 -1.5791e+01  2e+00  5e-03  2e-14
 5: -1.4145e+01 -1.4916e+01  1e+00  2e-03  1e-14
 6: -1.4152e+01 -1.4397e+01  3e-01  5e-04  2e-14
 7: -1.4158e+01 -1.4339e+01  2e-01  3e-04  1e-14
 8: -1.4168e+01 -1.4249e+01  1e-01  8e-05  2e-14
 9: -1.4174e+01 -1.4214e+01  5e-02  2e-05  2e-14
10: -1.4178e+01 -1.4197e+01  2e-02  1e-05  2e-14
11: -1.4180e+01 -1.4187e+01  8e-03  2e-06  2e-14
12: -1.4181e+01 -1.4183e+01  3e-03  4e-07  2e-14
13: -1.4181e+01 -1.4182e+01  2e-03  1e-07  2e-14
14: -1.4181e+01 -1.4182e+01  1e-03  8e-08  2e-14
15: -1.4181e+01 -1.4182e+01  9e-04  6e-08  2e-14
16: -1.4181e+01 -1.4182e+01  8e-04  5e-08  2e-14
17: -1.4181e+01 -1.4182e+01  5e-04  1e-08  3e-14
18: -1.4181e+01 -1.4182e+01  2e-04  5e-09  5e-14
19: -1.4181e+01 -1.4182e+01  1e-04  3e-09  5e-14
20: -1.4181e+01 -1.4182e+01  1e-04  2e-09  5e-14
21: -1.4181e+01 -1.4182e+01  1e-04  2e-09  7e-14
22: -1.4181e+01 -1.4182e+01  6e-05  8e-10  6e-14
23: -1.4181e+01 -1.4



MissSVM mutagenesis-chains
[ 1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]
[-1. -1. -1. -1. -1.  1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1.]
Non-random start...
     pcost       dcost       gap    pres   dres
 0: -3.7273e+03 -4.6273e+01  5e+04  2e+02  6e-13
 1: -5.7930e+01 -4.5890e+01  6e+02  3e+00  6e-13
 2: -1.8534e+01 -3.5753e+01  7e+01  3e-01  5e-14
 3: -1.6318e+01 -2.7422e+01  3e+01  8e-02  2e-14
 4: -1.5995e+01 -2.5434e+01  2e+01  5e-02  1e-14
 5: -1.5897e+01 -2.3177e+01  1e+01  3e-02  7e-15
 6: -1.6328e+01 -1.9264e+01  4e+00  8e-03  5e-15
 7: -1.6676e+01 -1.7789e+01  1e+00  2e-03  5e-15
 8: -1.6787e+01 -1.7379e+01  7e-01  1e-03  5e-15
 9: -1.6844e+01 -1.7205e+01  4e-01  5e-04  4e-15
10: -1.6886e+01 -1.7095e+01  2e-01  2e-04  5e-15
11: -1.6893e+01 -1.7072e+01  2e-01  1e-04  5e-15
12: -1.6903e+01 -1.7051e+01  2e-01  8e-05  4

30: -1.6807e+01 -1.6807e+01  1e-04  7e-11  6e-13
31: -1.6807e+01 -1.6807e+01  8e-05  4e-11  1e-12
32: -1.6807e+01 -1.6807e+01  4e-05  7e-12  2e-12
33: -1.6807e+01 -1.6807e+01  3e-06  5e-13  2e-12
Optimal solution found.
delta obj ratio: 7.34e+05

Iteration 6...
Linearizing constraints...
Computing slacks...
Linearizing...
Solving QP...
     pcost       dcost       gap    pres   dres
 0: -3.7275e+03 -4.6291e+01  5e+04  2e+02  7e-13
 1: -5.7230e+01 -4.5888e+01  7e+02  3e+00  6e-13
 2: -1.8359e+01 -3.5093e+01  8e+01  3e-01  6e-14
 3: -1.4679e+01 -2.4732e+01  2e+01  8e-02  2e-14
 4: -1.3554e+01 -1.6280e+01  5e+00  1e-02  8e-15
 5: -1.3399e+01 -1.4218e+01  1e+00  2e-03  7e-15
 6: -1.3418e+01 -1.3630e+01  3e-01  5e-04  7e-15
 7: -1.3425e+01 -1.3527e+01  1e-01  2e-04  7e-15
 8: -1.3428e+01 -1.3501e+01  1e-01  1e-04  7e-15
 9: -1.3431e+01 -1.3467e+01  5e-02  5e-05  7e-15
10: -1.3432e+01 -1.3450e+01  2e-02  2e-05  6e-15
11: -1.3433e+01 -1.3444e+01  1e-02  1e-05  6e-15
12: -1.3433e+01 -1.3436e+0

 7: -1.2908e+01 -1.2950e+01  5e-02  3e-05  5e-15
 8: -1.2910e+01 -1.2925e+01  2e-02  3e-06  6e-15
 9: -1.2911e+01 -1.2914e+01  3e-03  5e-07  7e-15
10: -1.2911e+01 -1.2911e+01  4e-04  3e-08  6e-15
11: -1.2911e+01 -1.2911e+01  3e-05  2e-09  7e-15
12: -1.2911e+01 -1.2911e+01  4e-07  2e-11  1e-14
Optimal solution found.
delta obj ratio: 5.16e+04

Iteration 15...
Linearizing constraints...
Computing slacks...
Linearizing...
Solving QP...
     pcost       dcost       gap    pres   dres
 0: -3.7271e+03 -4.5922e+01  5e+04  2e+02  6e-13
 1: -5.8080e+01 -4.5523e+01  7e+02  3e+00  5e-13
 2: -1.7778e+01 -3.4683e+01  8e+01  3e-01  5e-14
 3: -1.4040e+01 -2.3352e+01  2e+01  6e-02  2e-14
 4: -1.2819e+01 -1.5036e+01  3e+00  8e-03  1e-14
 5: -1.2751e+01 -1.3445e+01  1e+00  2e-03  1e-14
 6: -1.2756e+01 -1.3137e+01  5e-01  1e-03  1e-14
 7: -1.2765e+01 -1.3030e+01  4e-01  6e-04  1e-14
 8: -1.2784e+01 -1.2897e+01  2e-01  2e-04  1e-14
 9: -1.2788e+01 -1.2881e+01  1e-01  1e-04  1e-14
10: -1.2789e+01 -1.2871e+

# mi-Net

In [90]:
import sys
import time
from random import shuffle
import numpy as np
import argparse

from keras.models import Model
from keras.optimizers import SGD
from keras.regularizers import l2
from keras.layers import Input, Dense, Layer, Dropout

from mil_nets.dataset import load_dataset
from mil_nets.layer import Score_pooling
from mil_nets.metrics import bag_accuracy
from mil_nets.objectives import bag_loss
from mil_nets.utils import convertToBatch

In [93]:
def test_eval(model, test_set):
    """Evaluate on testing set.
    Parameters
    -----------------
    model : keras.engine.training.Model object
        The training mi-Net model.
    test_set : list
        A list of testing set contains all training bags features and labels.
    Returns
    -----------------
    test_loss : float
        Mean loss of evaluating on testing set.
    test_acc : float
        Mean accuracy of evaluating on testing set.
    """
    num_test_batch = len(test_set)
    result = []
    for ibatch, batch in enumerate(test_set):
        predicted = model.predict_on_batch({'input':batch[0].astype(np.float32)})[0]
        act = list(batch[1].astype(np.float32))
        if predicted > 0.5:
            result += [1]
        else:
            result += [-1]
    return result


def train_eval(model, train_set):
    """Evaluate on training set.
    Parameters
    -----------------
    model : keras.engine.training.Model object
        The training mi-Net model.
    train_set : list
        A list of training set contains all training bags features and labels.
    Returns
    -----------------
    test_loss : float
        Mean loss of evaluating on traing set.
    test_acc : float
        Mean accuracy of evaluating on testing set.
    """
    num_train_batch = len(train_set)
    train_loss = np.zeros((num_train_batch, 1), dtype=np.float32)
    train_acc = np.zeros((num_train_batch, 1), dtype=np.float32)
    shuffle(train_set)
    for ibatch, batch in enumerate(train_set):
        result = model.train_on_batch({'input':batch[0].astype(np.float32)}, {'sp':batch[1].astype(np.float32)})
        train_loss[ibatch] = result[0]
        train_acc[ibatch] = result[1]
    return np.mean(train_loss), np.mean(train_acc)

def mi_Net(X_train, X_test,y_train, y_test,fold):
    weight_decay=0.005
    init_lr=5e-4
    pooling_mode='max'
    momentum=0.9
    max_epoch=50
    """Train and evaluate on mi-Net.
    Parameters
    -----------------
    dataset : dict
        A dictionary contains all dataset information. We split train/test by keys.
    Returns
    -----------------
    test_acc : float
        Testing accuracy of mi-Net.
    """
    train_set = []
    test_set = []
    
    batch_num = len(X_train)
    for ibag, bag in enumerate(X_train):
        batch_data = bag
        batch_label = np.array([y_train[ibag]]*len(bag))
        train_set.append((batch_data, batch_label))
        
    batch_num = len(X_test)
    for ibag, bag in enumerate(X_test):
        batch_data = bag
        batch_label = np.array([y_train[ibag]]*len(bag))
        test_set.append((batch_data, batch_label))
    dimension = train_set[0][0].shape[1]

    # data: instance feature, n*d, n = number of training instance
    data_input = Input(shape=(dimension,), dtype='float32', name='input')

    # fully-connected
    fc1 = Dense(256, activation='relu', kernel_regularizer=l2(weight_decay))(data_input)
    fc2 = Dense(128, activation='relu', kernel_regularizer=l2(weight_decay))(fc1)
    fc3 = Dense(64, activation='relu', kernel_regularizer=l2(weight_decay))(fc2)

    # dropout
    dropout = Dropout(rate=0.5)(fc3)

    # score pooling
    sp = Score_pooling(output_dim=1, kernel_regularizer=l2(weight_decay), pooling_mode=pooling_mode, name='sp')(dropout)

    model = Model(inputs=[data_input], outputs=[sp])
    sgd = SGD(lr=init_lr, decay=1e-4, momentum=momentum, nesterov=True)
    model.compile(loss=bag_loss, optimizer=sgd, metrics=[bag_accuracy])

    # train model
    start = time.time()
    predicted = []
    for epoch in range(max_epoch):
        print("epoch",epoch)
        train_loss, train_acc = train_eval(model, train_set)
        predicted = test_eval(model, test_set)
    time_ep = time.time() - start
    result("mi-Net", predicted,y_test, time_ep, fold, "musk1")


In [94]:
for dataset in ['fox','mutagenesis-atoms','mutagenesis-bonds','mutagenesis-chains','eastWest','elephant','tiger','westEast','musk1']:
    example_set = parse_c45(dataset)

    # Get stats to normalize data
    raw_data = np.array(example_set.to_float())
    data_mean = np.average(raw_data, axis=0)
    data_std  = np.std(raw_data, axis=0)
    data_std[np.nonzero(data_std == 0.0)] = 1.0
    def normalizer(ex):
        ex = np.array(ex)
        normed = ((ex - data_mean) / data_std)
        if dataset == "musk1":
            normed[2:-1]
        return normed[1:-1]


    # Group examples into bags
    bagset = bag_set(example_set)

    # Convert bags to NumPy arrays
    bags = [np.array(b.to_float(normalizer)) for b in bagset]
    labels = np.array([b.label for b in bagset], dtype=float)
    # Convert 0/1 labels to -1/1 labels
    labels = 2 * labels - 1

    # perform five times 10-fold cross-validation experiments
    run = 5
    n_folds = 5

    labels = np.array(labels,dtype=int)

    bags = np.array(bags,dtype=object)

    for irun in range(run):
        fold = StratifiedKFold(n_splits=5, shuffle=False, random_state=None)
        splittt = 1
        for train_index, test_index in fold.split(bags,labels):
            X_train, X_test = bags[train_index], bags[test_index]
            y_train, y_test = labels[train_index], labels[test_index]
            print('run=', irun, '  fold=', splittt)
            mi_Net(X_train, X_test,y_train, y_test,splittt)          
            splittt += 1

run= 0   fold= 1
epoch 0


  super().__init__(name, **kwargs)


epoch 1
epoch 2


KeyboardInterrupt: 