In [1]:
import pandas as pd
import numpy as np
from sklearn import tree, metrics
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from scipy.stats import mode
import math
import itertools

import os

import GPyOpt
import numpy as np
import matplotlib.pyplot as plt
import torch
import numpy as np
from sklearn.metrics import roc_auc_score
from utils import plot_prototypes
from model import ShapeletGenerator, pairwise_dist
from mil import get_data
#from prototype_forest import PrototypeForest
import matplotlib.pyplot as plt
from pandas import DataFrame
import time
from os import listdir
from os.path import isfile, join
import os

In [2]:
class Node:
    def __init__(self):

        self.right = None
        self.left = None
        
        self.prototype = None
        
        self.column = None
        self.threshold = None
        
        self.probas = None
        self.depth = None
        
        self.is_terminal = False

class PrototypeTreeClassifier:
    def __init__(self, 
                 train_features,
                 feature_types = ["min", "mean", "max"], 
                 max_depth = 3, 
                 min_samples_leaf = 1, 
                 min_samples_split = 2, 
                 prototype_count = 1,
                 use_prototype_learner = True,
                 early_stopping_round = 10):
        
        self.max_depth = max_depth
        self.min_samples_leaf = min_samples_leaf
        self.min_samples_split = min_samples_split
        self.prototype_count = prototype_count
        self.feature_types = feature_types
        self.use_prototype_learner = use_prototype_learner
        self.Tree = None
        self.train_features = train_features
        self.early_stopping_round = early_stopping_round
        
        
    def prototype(self, bags, features, labels, prototype_count):
        if self.use_prototype_learner:
            prototypes = find_prototype(bags, features, labels, self.early_stopping_round)
            check = prototypes.cpu().detach().numpy()

            check.resize(check.shape[1], check.shape[2])
            
            return check
        
        else:
            number_of_rows = self.train_features.shape[0]
            random_indices = np.random.choice(number_of_rows, 
                                              size=prototype_count, 
                                              replace=False)
            
            prot = self.train_features[random_indices, :]
            if len(prot.shape) == 1:
                prot = prot.reshape(1, prot.shape[0])
            return prot
            
    def nodeProbas(self, y):
        # for each unique label calculate the probability for it
        probas = []

        for one_class in self.classes:
            proba = y[y == one_class].shape[0] / y.shape[0]
            probas.append(proba)
        return np.asarray(probas)

    def features_via_prototype(self, feature_types, features, bag_ids, prototypes):
        distances = self.calculate_distances(features, prototypes)

        bin_count  = np.unique(bag_ids, return_counts=True)[1]
        ids, index  = np.unique(bag_ids, return_index=True)
        feature_list = []

        for i in range(0, prototypes.shape[0]):
            if "max" in feature_types:
                group_max = np.maximum.reduceat(distances[:, i], index)
                max_vals = np.repeat(group_max, bin_count)
                feature_list.append(max_vals)

            if "min" in feature_types:
                group_min = np.minimum.reduceat(distances[:, i], index)
                min_vals = np.repeat(group_min, bin_count)
                feature_list.append(min_vals)

            if "mean" in feature_types:
                group_sum = np.add.reduceat(distances[:, i], index)
                group_mean = group_sum/bin_count
                mean_vals = np.repeat(group_mean, bin_count)
                feature_list.append(mean_vals)

        return np.array(np.transpose(feature_list))
    
    def dist1d(self, features, prototypes, distance_type="l2"):
        if distance_type == "l2":
            distance = np.linalg.norm(features - prototypes, axis=1)
        elif distance_type == "l1":
            distance = np.abs(features - prototypes)
            distance = np.sum(distance, axis=1)
        
        return distance

    def calculate_distances(self, features, prototypes):
        feature_list = []
        for i in range(0, prototypes.shape[0]):
            data = self.dist1d(features, prototypes[i], distance_type="l2")
            feature_list.append(data)
        data = np.column_stack(feature_list)

        return data

    def calcBestSplit(self, features, features_updated, labels, bag_ids):
        bdc = tree.DecisionTreeClassifier(random_state=0, 
                                  max_depth=1, 
                                  criterion="entropy",
                                  min_samples_split=2)
        bdc.fit(features_updated, labels.flatten())
        
        threshold = bdc.tree_.threshold[0]
        split_col = bdc.tree_.feature[0]

        features_left = features[features_updated[:,split_col] <= bdc.tree_.threshold[0]]
        features_right = features[features_updated[:,split_col] > bdc.tree_.threshold[0]]
        
        labels_left = labels[features_updated[:,split_col] <= bdc.tree_.threshold[0]]
        labels_right = labels[features_updated[:,split_col] > bdc.tree_.threshold[0]]

        bag_ids_left = bag_ids[features_updated[:,split_col] <= bdc.tree_.threshold[0]]
        bag_ids_right = bag_ids[features_updated[:,split_col] > bdc.tree_.threshold[0]]

        return split_col, threshold, features_left, features_right, labels_left, labels_right, bag_ids_left, bag_ids_right
    
    def buildDT(self, features, labels, bag_ids, node):
            '''
            Recursively builds decision tree from the top to bottom
            '''
            # checking for the terminal conditions

            if node.depth >= self.max_depth:
                node.is_terminal = True
                return

            if features.shape[0] < self.min_samples_split:
                node.is_terminal = True
                return

            if np.unique(labels).shape[0] == 1:
                node.is_terminal = True
                return
            
            node.prototype = self.prototype(bag_ids, features, labels, self.prototype_count)
            features_updated = self.features_via_prototype(self.feature_types, features, bag_ids, node.prototype)
            
            # calculating current split
            (splitCol, 
             thresh, 
             features_left, 
             features_right, 
             labels_left, 
             labels_right, 
             bag_ids_left, 
             bag_ids_right) = self.calcBestSplit(features, 
                                                 features_updated, 
                                                 labels, 
                                                 bag_ids)

            if splitCol is None:
                node.is_terminal = True
                return
            
            if features_left.shape[0] < self.min_samples_leaf or features_right.shape[0] < self.min_samples_leaf:
                node.is_terminal = True
                return

            node.column = splitCol
            node.threshold = thresh

            # creating left and right child nodes
            node.left = Node()
            node.left.depth = node.depth + 1
            node.left.probas = self.nodeProbas(labels_left)

            node.right = Node()
            node.right.depth = node.depth + 1
            node.right.probas = self.nodeProbas(labels_right)

            # splitting recursevely

            self.buildDT(features_right, labels_right, bag_ids_right, node.right)
            self.buildDT(features_left, labels_left, bag_ids_left, node.left)
            
    def fit(self, features, labels, bag_ids):
        '''
        Standard fit function to run all the model training
        '''
        self.classes = np.unique(labels)
        
        self.Tree = Node()
        self.Tree.depth = 1
        
        self.buildDT(features, labels, bag_ids, self.Tree)
    
    def predictSample(self, features, bag_ids, node):
        '''
        Passes one object through decision tree and return the probability of it to belong to each class
        '''
       
        # if we have reached the terminal node of the tree
        if node.is_terminal:
            return node.probas
        
        features_updated = self.features_via_prototype(self.feature_types, features, bag_ids, node.prototype)

        if features_updated[0][node.column] > node.threshold:
            probas = self.predictSample(features, bag_ids, node.right)
        else:
            probas = self.predictSample(features, bag_ids, node.left)
            
        return probas
    
    def predict(self, features, bag_ids):
        '''
        Returns the labels for each X
        '''
        if type(features) == pd.DataFrame:
            X = np.asarray(features)
                
        sort_index = np.argsort(bag_ids)
        bag_ids = bag_ids[sort_index]
        features = features[sort_index]
    
        features_updated = self.features_via_prototype(self.feature_types, features, bag_ids, self.Tree.prototype)
        
        index  = np.unique(bag_ids, return_index=True)[1]
        count  = np.unique(bag_ids, return_counts=True)[1]
        index = np.append(index, bag_ids.shape[0])   
        predictions = []
        
        for i in range(0, len(index) - 1):
            pred = np.argmax(self.predictSample(features[index[i]:index[i+1]], 
                                                bag_ids[index[i]:index[i+1]], 
                                                self.Tree))
            pred = np.repeat(pred, count[i])
            predictions = np.concatenate((predictions, pred), axis=0)
        
        return np.asarray(predictions)       
        
class PrototypeForest:
    def __init__(self, size,
                feature_types = ["min", "mean", "max"], 
                max_depth = 8, min_samples_leaf = 2, min_samples_split = 2, stratified = True, sample_rate = 0.8,
                prototype_count = 1,
                use_prototype_learner = True,
                early_stopping_round = 10):
        self.size = size
        self._trees = []
        self._tuning_trees = []
        self.max_depth = max_depth
        self.min_samples_leaf = min_samples_leaf
        self.min_samples_split = min_samples_split
        self.stratified = stratified
        self.sample_rate = sample_rate
        self.prototype_count = prototype_count
        self.use_prototype_learner = use_prototype_learner
        self.early_stopping_round = early_stopping_round
        
    def sample(self, features, labels, bag_ids, stratified, sample_rate):
        if stratified:
            ids, index  = np.unique(bag_ids, return_index=True)
            group_min = np.minimum.reduceat(labels, index)

            pos_bag_size = math.ceil(np.where(group_min == 1)[0].shape[0] * 0.8)
            neg_bag_size = math.ceil(np.where(group_min == 0)[0].shape[0] * 0.8)

            bags_pos = np.random.choice(np.where(group_min == 1)[0], pos_bag_size, replace=False)
            bags_neg = np.random.choice(np.where(group_min == 0)[0], neg_bag_size, replace=False)
            
            df = pd.DataFrame(np.concatenate([train_bag_ids.reshape(train_bag_ids.shape[0],1), 
                                              train_labels.reshape(train_labels.shape[0],1)], 
                                             axis=1))
            
            indices_pos = df[df[0].isin(bags_pos)].index.to_numpy()
            indices_neg = df[df[0].isin(bags_neg)].index.to_numpy()

            inbag_indices = np.concatenate((indices_pos, indices_neg))
        else:
            sample_size = math.ceil(labels.shape[0] * sample_rate)
            inbag_indices = np.random.choice(np.where(labels == 1)[0], sample_size, replace=False)
        
        oo_bag_mask = np.ones(labels.shape[0], dtype=bool)
        oo_bag_mask[inbag_indices] = False

        outbag_indices = np.where(oo_bag_mask == 1)

        return inbag_indices, outbag_indices
    
    def fit(self, features, labels, bag_ids):
        for i in range(self.size):
            if self.use_prototype_learner:
                print(f"Tree {i} will be trained")
            
            (inbag_indices,
             outbag_indices) = self.sample(features, labels, bag_ids, self.stratified, self.sample_rate)      

            inbag_features = features[inbag_indices]
            inbag_labels = labels[inbag_indices]
            inbag_bag_ids = bag_ids[inbag_indices]


            tree = PrototypeTreeClassifier(
                max_depth=self.max_depth, 
                min_samples_leaf=self.min_samples_leaf, 
                min_samples_split=self.min_samples_split,
                prototype_count = self.prototype_count,
                use_prototype_learner = self.use_prototype_learner,
                train_features = inbag_features,
                early_stopping_round = self.early_stopping_round
            )

            tree.fit(inbag_features, inbag_labels, inbag_bag_ids)
            preds = tree.predict(inbag_features, inbag_bag_ids)
            score = metrics.roc_auc_score(inbag_labels, preds)
            self._trees.append(tree)

    def predict(self, features, bag_ids):
        temp = [t.predict(features, bag_ids) for t in self._trees]
        preds = np.transpose(np.array(temp))

        return mode(preds,1)[0]
    
    def predict_proba(self, features, bag_ids):
        temp = [t.predict(features, bag_ids) for t in self._trees]
        preds = np.transpose(np.array(temp))
        
        return np.sum(preds==1, axis=1)/self.size

In [6]:
def sample(features, labels, bag_ids, stratified, sample_rate):
    bags = np.unique(bag_ids)
    positive_bags = np.unique(bag_ids[np.where(labels == 1)])
    negative_bags = np.unique(bag_ids[np.where(labels == 0)])
    if stratified:
        pos_sample_size = math.ceil(positive_bags.shape[0] * sample_rate)
        neg_sample_size = math.ceil(negative_bags.shape[0] * sample_rate)

        sample_pos_bags = np.random.choice(positive_bags, pos_sample_size, replace=False)
        sample_neg_bags = np.random.choice(negative_bags, neg_sample_size, replace=False)

        indices_pos = np.where(np.isin(bag_ids, sample_pos_bags) == 1)[0]
        indices_neg = np.where(np.isin(bag_ids, sample_neg_bags) == 1)[0]
        inbag_indices = np.concatenate((indices_pos, indices_neg))
    else:
        sample_size = math.ceil(bags.shape[0] * sample_rate)
        sample_bags = np.random.choice(bags, sample_size, replace=False)        
        inbag_indices = np.where(np.isin(bag_ids, sample_bags) == 1)[0]

    oo_bag_mask = np.ones(labels.shape[0], dtype=bool)
    oo_bag_mask[inbag_indices] = False

    outbag_indices = np.where(oo_bag_mask == 1)

    return inbag_indices, outbag_indices

def get_parameter_scores(features, labels, bag_ids, params, fit_on_full = True):
    keys, values = zip(*params.items())
    params_list = [dict(zip(keys, v)) for v in itertools.product(*values)]
    
    param_vals_scores = dict()
    for param_vals in params_list:
        if param_vals["explained_variance"] < 1:
            pipe = Pipeline([('pca', PCA(n_components = param_vals["explained_variance"], 
                             svd_solver = "full")), 
             ('scaler', StandardScaler()), ])
        else:
            pipe = Pipeline([('scaler', StandardScaler()), ])
        pipe.fit(features)

        train_features = pipe.transform(features)
        test_features = pipe.transform(features)

        score_list = []
        for i in range(0, param_vals["forest_size"]):
            (inbag_indices,
             outbag_indices) = sample(features, labels, bag_ids, stratified = True, sample_rate = 0.8)      

            inbag_features = features[inbag_indices]
            inbag_labels = labels[inbag_indices]
            inbag_bag_ids = bag_ids[inbag_indices]

            outbag_features = features[outbag_indices]
            outbag_labels = labels[outbag_indices]
            outbag_bag_ids = bag_ids[outbag_indices]

            model = PrototypeTreeClassifier(max_depth=param_vals["max_depth"], 
                                           min_samples_leaf=param_vals["min_samples_leaf"],
                                           min_samples_split=2)

            model.fit(inbag_features, inbag_labels, inbag_bag_ids)
            preds = model.predict(outbag_features, outbag_bag_ids)

            score = metrics.roc_auc_score(outbag_labels, preds)
            score_list.append(score)

        mean_score = sum(score_list)/len(score_list)
        key = frozenset(param_vals.items())
        param_vals_scores[key] = mean_score

    return param_vals_scores

def split_features_labels_bags(data):
    features = data[data.columns[~data.columns.isin([0,1])]].to_numpy()
    labels = data[0].to_numpy()
    bag_ids = data[1].to_numpy()
    
    sort_index = np.argsort(bag_ids)
    bag_ids = bag_ids[sort_index]
    features = features[sort_index]
    
    return (features, labels, bag_ids)

def train_test_split(dataset, rep, fold, explained_variance, fit_on_full = False):
    data = pd.read_csv(f"./datasets/{dataset}.csv", header=None)
    testbags =  pd.read_csv(f"./datasets/{dataset}.csv_rep{rep}_fold{fold}.txt", header=None)
    
    train_data = data[~data[1].isin(testbags[0].tolist())]    
    test_data = data[data[1].isin(testbags[0].tolist())]
    
    (train_features, train_labels, train_bag_ids) = split_features_labels_bags(train_data)
    (test_features, test_labels, test_bag_ids) = split_features_labels_bags(test_data)
    
    if explained_variance < 1:
        pipe = Pipeline([('pca', PCA(n_components = explained_variance, 
                         svd_solver = "full")), 
         ('scaler', StandardScaler()), ])
    else:
        pipe = Pipeline([('scaler', StandardScaler()), ])
    
    if fit_on_full:
        pipe.fit(data[data.columns[~data.columns.isin(['0','1'])]].to_numpy())
    else:
        pipe.fit(train_features)

    train_features = pipe.transform(train_features)
    test_features = pipe.transform(test_features)
    
    return (train_features, train_labels, train_bag_ids,
           test_features, test_labels, test_bag_ids)

In [7]:
def convert_to_bags(data,
                    split_instances=False,
                    instance_norm=True,
                    split_ratio=0.2,
                    stride_ratio=0.5):
  bags = []
  labels = []
  current_bag = []
  current_label = data[0, 0]
  cur = data[0, 1]
  instance_size = np.round(split_ratio * data[0, 2:].shape[0]).astype("int")
  stride = np.round(stride_ratio * instance_size).astype("int")

  for i in range(data.shape[0]):
    if data[i, 1] == cur:
      instance = data[i, 2:]
      if instance_norm:
        instance = (instance - np.mean(instance)) / (1e-08 + np.std(instance))
      if split_instances:
        size = instance.shape[0]
        window = instance_size
        while True:
          current_bag.append(instance[window - instance_size:window])
          window += stride
          if window >= size:
            window = size
            current_bag.append(instance[window - instance_size:window])
            break
      else:
        current_bag.append(instance)
    else:
      bags.append(np.array(current_bag))
      labels.append(np.array(current_label))
      current_label = data[i, 0]
      current_bag = []
      instance = data[i, 2:]
      if instance_norm:
        instance = (instance - np.mean(instance)) / (1e-08 + np.std(instance))
      if split_instances:
        size = instance.shape[0]
        window = instance_size
        while True:
          current_bag.append(instance[window - instance_size:window])
          window += stride
          if window >= size:
            window = size
            current_bag.append(instance[window - instance_size:window])
            break
      else:
        current_bag.append(instance)
      cur = data[i, 1]
  bags.append(np.array(current_bag))
  labels.append(np.array(current_label, dtype="int32"))
  return bags, labels

In [26]:
def find_prototype(bags,
                   features,
                   labels,
                   early_stopping_round = 10):
    
    n_classes=2
    n_epochs=100
    batch_size=1
    display_every=5
    final_vals = []
    reg_lambda_dist = random.choice(parameters[0])
    reg_lambda_w = random.choice(parameters[1])
    reg_lambda_p = random.choice(parameters[2])
    lr_prot = random.choice(parameters[3])
    lr_weights = random.choice(parameters[4])
    reg_w = random.choice(parameters[5])
    n_prototypes = random.choice(parameters[6])
    #reg_lambda_dist = 0.0005
    #reg_lambda_w = 0.005
    #reg_lambda_p = 0.00005
    #lr_prot = 0.00001
    #lr_weights = 0.00001
    #reg_w = 1
    #n_prototypes = 2
    #n_prototypes = n_prototypes*2
    
    data1 = np.vstack((labels, bags)).T
    data = np.concatenate([data1, features], axis=1)
    
    bags_train, labels_train = convert_to_bags(data)
    bags_train = np.array(bags_train)
    labels_train = np.array(labels_train)

    for rep in range(1, 2):
        vals = []
        for fold in range(1, 2):
            accs = [] 

            use_cuda = False

            bag_size = bags_train[0][0].shape[0]
            #step_per_epoch = len(bags_train)
            step_per_epoch = len(np.unique(bags))

            lr_step = (step_per_epoch * 40)
            display = (step_per_epoch * display_every)
            max_steps = n_epochs * step_per_epoch
            
            model = ShapeletGenerator(n_prototypes, bag_size, n_classes, features)

            if n_classes == 2:
                output_fn = torch.nn.Sigmoid()
            else:
                output_fn = torch.nn.Softmax()



            if n_classes == 2:
                loss = torch.nn.BCEWithLogitsLoss(reduction="mean")
            else:
                loss = torch.nn.CrossEntropyLoss(reduction="mean")

            optim1 = torch.optim.Adam([model.prototypes], lr=lr_prot)
            optim2 = torch.optim.Adam(list(model.linear_layer.parameters()),
                        lr=lr_weights)
            total_loss = 0
            correct = 0
            train_loss_hist, eval_loss_hist = [], []
            train_acc_hist, eval_acc_hist = [], []
            eval_aucs = []
            step_hist = []
            time_hist = []

            if use_cuda and torch.cuda.is_available():
                model = model.cuda()

            cont = True
            
            max_stagnation = 0 # number of epochs without improvement to tolerate
            best_prototype = None
            best_score = 0
            i = 0
            
            while i < max_steps and max_stagnation < early_stopping_round:
                i += 1
                np_idx = np.random.choice(bags_train.shape[0], batch_size)
                start_time = time.time()
                batch_inp = bags_train[np_idx]
                targets = torch.Tensor(labels_train[np_idx]).type(torch.int64)
                batch_inp = torch.Tensor(batch_inp[0])
                batch_inp = batch_inp.view(1, batch_inp.shape[0], batch_inp.shape[1])
                if use_cuda and torch.cuda.is_available():
                    targets = targets.cuda()
                    batch_inp = batch_inp.cuda()

                logits, distances = model(batch_inp)
                out = output_fn(logits)

                if n_classes == 2:
                    predicted = (out > 0.5).type(torch.int64)
                else:
                    _, predicted = torch.max(out, 1)
                correct += (predicted == targets).type(torch.float32).mean().item()

                batch_loss = loss(logits, targets.type(torch.float32))

                prototypes_pairwise = pairwise_dist(model.prototypes, model.prototypes)
                reg_prototypes = prototypes_pairwise.sum()

                weight_reg = 0
                for param in model.linear_layer.parameters():
                    weight_reg += param.norm(p=reg_w).sum()

                reg_loss = reg_lambda_w*weight_reg + reg_lambda_dist*distances.sum() + reg_prototypes*reg_lambda_p
                total_loss += batch_loss
                min_loss = batch_loss + reg_loss
                min_loss.backward()

                optim1.step()
                optim2.step()

                if (i + 1) % lr_step == 0:
                    print("LR DROP!")
                    optims = [optim1, optim2]
                    for o in optims:
                        for p in o.param_groups:
                            p["lr"] = p["lr"] / 2

                if (i + 1) % display == 0:
                    with torch.no_grad():
                        print("Step : ", str(i + 1), "Loss: ",
                        total_loss.item() / display, " accuracy: ", correct / (display))
                        train_loss_hist.append(total_loss.item() / display)
                        train_acc_hist.append(correct / display)
                        total_loss = 0
                        correct = 0
                        model = model.eval()
                        e_loss = 0
                        e_acc = 0
                        y_true = []
                        y_score = []

                        for i in range(len(bags_train)):
                            batch_inp = torch.Tensor(bags_train[i])
                            batch_inp = batch_inp.view(1, batch_inp.shape[0],
                                                  batch_inp.shape[1])
                            targets = torch.Tensor([labels_train[i]]).type(torch.int64)
                            logits, distances = model(batch_inp)
                            out = output_fn(logits)

                            if n_classes == 2:
                                predicted = (out > 0.5).type(torch.int64)
                            else:
                                _, predicted = torch.max(out, 1)
                            y_true.append(targets)
                            y_score.append(out)
                            correct = (predicted == targets).type(torch.float32).mean().item()
                            e_acc += correct
                            eval_loss = loss(logits, targets.type(torch.float32)).item()
                            e_loss += eval_loss

                        y_true_list = [x.tolist() for x in y_true]
                        y_score_list = [x.tolist() for x in y_score]
                        score_auc = roc_auc_score(y_true_list, y_score_list)
                        print("Eval Loss: ", e_loss / len(bags_train),
                            " Eval Accuracy:", e_acc / len(bags_train), " AUC: ",
                        score_auc)
                        
                        if score_auc > best_score:
                            best_score = score_auc
                            best_prototype = model.prototypes
                            max_stagnation = 0
                        else:
                            max_stagnation += 1
                        
                        print("max_stagnation ", max_stagnation)
                        eval_loss_hist.append(e_loss / len(bags_train))
                        eval_acc_hist.append(e_acc / len(bags_train))
                        eval_aucs.append(roc_auc_score(y_true_list, y_score_list))
                        accs.append(e_acc / len(bags_train))
                        step_hist.append(i+1)
                        model = model.train()

    return best_prototype

In [27]:
def get_auc(preds, test_labels, test_bag_ids):
    preds = preds.reshape(preds.shape[0])
    df = pd.DataFrame({"bag_ids": test_bag_ids, "preds": preds})
    predict_bags = df.groupby("bag_ids").mean()["preds"].to_numpy()

    df = pd.DataFrame({"bag_ids": test_bag_ids, "labels": test_labels})
    label_bags = df.groupby("bag_ids").mean()["labels"].to_numpy()
    
    return metrics.roc_auc_score(label_bags, predict_bags)

In [28]:
import torch
import torch.nn as nn
import numpy as np


def gram_matrix(mat):
  mat = mat.squeeze(dim=0)
  mat = torch.mm(mat, mat.t())
  return mat



def pairwise_dist(x, y):
  x_norm = (x.norm(dim=2)[:, :, None])
  y_t = y.permute(0, 2, 1).contiguous()
  y_norm = (y.norm(dim=2)[:, None])
  y_t = torch.cat([y_t] * x.shape[0], dim=0)
  dist = x_norm + y_norm - 2.0 * torch.bmm(x, y_t)
  return torch.clamp(dist, 0.0, np.inf)

class ShapeletGenerator(nn.Module):

    def __init__(self, n_prototypes, bag_size, n_classes, features):
        n_prototypes = int(n_prototypes)
        super(ShapeletGenerator, self).__init__()

        number_of_rows = number_of_rows = features.shape[0]

        random_indices = np.random.choice(number_of_rows, 
                                              size=1, 
                                              replace=False)
            
        prot = features[random_indices, :]
        prot = prot.reshape(1, n_prototypes, prot.shape[1])
        prot = prot.astype("float32")
        self.prototypes = torch.from_numpy(prot).requires_grad_()
        #self.prototypes = (torch.randn(
        #    (1, n_prototypes, bag_size))).requires_grad_()
        if n_classes == 2:
            n_classes = 1
        self.linear_layer = torch.nn.Linear(3 * n_prototypes, n_classes, bias=False)
        #self.linear_layer.weight = torch.nn.Parameter(self.linear_layer.weight/100000)
        self.n_classes = n_classes

    def pairwise_distances(self, x, y):
        x_norm = (x.norm(dim=2)[:, :, None])
        y_t = y.permute(0, 2, 1).contiguous()
        y_norm = (y.norm(dim=2)[:, None])
        y_t = torch.cat([y_t] * x.shape[0], dim=0)
        dist = x_norm + y_norm - 2.0 * torch.bmm(x, y_t)
        return torch.clamp(dist, 0.0, np.inf)

    def get_output(self, batch_inp):
        dist = self.pairwise_distances(batch_inp, self.prototypes)
        min_dist = dist.min(dim=1)[0]
        max_dist = dist.max(dim=1)[0]
        mean_dist = dist.mean(dim=1)
        all_features = torch.cat([min_dist, max_dist, mean_dist], dim=1)
        logits = self.linear_layer(all_features)

        return logits, all_features

    def forward(self, x):
        logits, distances = self.get_output(x)
        if self.n_classes == 1:
          logits = logits.view(1)
        return logits, distances


In [32]:
dataset = "BrownCreeper"

(train_features, 
     train_labels, 
     train_bag_ids,
     test_features, 
     test_labels, 
     test_bag_ids) = train_test_split(dataset, 1, 1, 1, fit_on_full = False)

In [36]:
import random

parameters = [[0.005, 0.01], [0.005, 0.01], [0.005, 0.01], [0.01, 0.02], [0.01, 0.05], [1],[1]]

reg_lambda_dist = parameters[0]
reg_lambda_w = parameters[1]
reg_lambda_p = parameters[2]
lr_prot = parameters[3]
lr_weights = parameters[4]
reg_w = parameters[5]
n_prototypes = parameters[6]


model = PrototypeForest(size = 1, 
                        max_depth = 3, 
                        min_samples_leaf= 3, 
                        min_samples_split=2,
                        prototype_count=1,
                        early_stopping_round=5,
                        use_prototype_learner = True)

model.fit(train_features, train_labels, train_bag_ids)

preds = model.predict(test_features, test_bag_ids)
metrics.accuracy_score(test_labels, preds)

Tree 0 will be trained
Step :  1780 Loss:  53.58619645365169  accuracy:  0.7033707865168539
Eval Loss:  27.422971244069508  Eval Accuracy: 0.8061797752808989  AUC:  0.8484599183888377
max_stagnation  0
Step :  1780 Loss:  74.3324613764045  accuracy:  0.601685393258427
Eval Loss:  208.71464168456154  Eval Accuracy: 0.7387640449438202  AUC:  0.7013623798867975
max_stagnation  1
Step :  1780 Loss:  124.14413623595506  accuracy:  0.6168539325842697
Eval Loss:  120.4287008609686  Eval Accuracy: 0.8089887640449438  AUC:  0.8115374489930236
max_stagnation  2
Step :  1780 Loss:  172.08114466292136  accuracy:  0.599438202247191
Eval Loss:  139.40592900349296  Eval Accuracy: 0.8314606741573034  AUC:  0.8086415690404107
max_stagnation  3
Step :  1780 Loss:  229.77189255617978  accuracy:  0.5831460674157304
Eval Loss:  217.47140509922414  Eval Accuracy: 0.6938202247191011  AUC:  0.6931354482032381
max_stagnation  4
Step :  1780 Loss:  174.2573911516854  accuracy:  0.6292134831460674
Eval Loss:  19

0.890020366598778

In [27]:
parameters = [0.005, 0.005, 0.005, 0.01,0.01,1,1]

model = PrototypeForest(size = 20, 
                        max_depth = 3, 
                        min_samples_leaf= 3, 
                        min_samples_split=2,
                        prototype_count=1,
                        use_prototype_learner = True,
                        early_stopping_round = 10)

model.fit(train_features, train_labels, train_bag_ids)

preds = model.predict(test_features, test_bag_ids)
metrics.accuracy_score(test_labels, preds)

Tree 0 will be trained
Step :  1765 Loss:  27.32640093838527  accuracy:  0.7019830028328612
Eval Loss:  50.92960374495119  Eval Accuracy: 0.6203966005665722  AUC:  0.6069919517102615
max_stagnation  0
Step :  1765 Loss:  38.93760180594901  accuracy:  0.6073654390934844
Eval Loss:  28.298802339695385  Eval Accuracy: 0.8016997167138811  AUC:  0.8491448692152918
max_stagnation  0
Step :  1765 Loss:  78.98396777620397  accuracy:  0.6124645892351275
Eval Loss:  54.36075382337296  Eval Accuracy: 0.8441926345609065  AUC:  0.8327297116029512
max_stagnation  1
Step :  870 Loss:  8.32175433279454  accuracy:  0.8908045977011494
Eval Loss:  17.695223753479706  Eval Accuracy: 0.9367816091954023  AUC:  0.49693251533742333
max_stagnation  0
Step :  870 Loss:  13.901850978807472  accuracy:  0.7034482758620689
Eval Loss:  15.730571966061646  Eval Accuracy: 0.9367816091954023  AUC:  0.49693251533742333
max_stagnation  1
Step :  895 Loss:  12.700563023743017  accuracy:  0.6256983240223464
Eval Loss:  4.9

AttributeError: 'PrototypeForest' object has no attribute '_train_perf'

In [56]:
dataset = "BrownCreeper"
accuracy_list = []

for repl in range(1, 2):
    for fold in range(1, 11):
        print(f"Fold is {fold}")
        (train_features, 
             train_labels, 
             train_bag_ids,
             test_features, 
             test_labels, 
             test_bag_ids) = train_test_split(dataset, repl, fold, 1, fit_on_full = False)

        parameters = [0.005, 0.005, 0.005, 0.01,0.01,1,1]

        model = PrototypeForest(size = 100, 
                                max_depth = 3, 
                                min_samples_leaf=1, 
                                min_samples_split=2,
                                prototype_count=1,
                                use_prototype_learner = False)

        model.fit(train_features, train_labels, train_bag_ids)

        preds = model.predict_proba(test_features, test_bag_ids)
        #acc = metrics.roc_auc_score(test_labels, preds)
        acc = get_auc(preds, test_labels, test_bag_ids)
        accuracy_list.append(acc)
        print(acc)

Fold is 1
> [0;32m<ipython-input-55-2b9f0df11447>[0m(160)[0;36mbuildDT[0;34m()[0m
[0;32m    158 [0;31m             [0mlabels_right[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    159 [0;31m             [0mbag_ids_left[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 160 [0;31m             [0mbag_ids_right[0m[0;34m)[0m [0;34m=[0m [0mself[0m[0;34m.[0m[0mcalcBestSplit[0m[0;34m([0m[0mfeatures[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    161 [0;31m                                                 [0mfeatures_updated[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    162 [0;31m                                                 [0mlabels[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m
ipdb> exit()


BdbQuit: 

In [296]:
def find_prototype(bags,
                   features,
                   labels):
    
    def experiment_fn(parameters,
                bags=bags,
                features=features,
                labels=labels,
                n_classes=2,
                folder=r"./datasets",
                n_epochs=100,
                batch_size=1,
                display_every=5,
                ):  
        final_vals = []
        reg_lambda_dist = 0.0005
        reg_lambda_w = 0.005
        reg_lambda_p = 0.00005
        lr_prot = 0.00001
        lr_weights = 0.00001
        reg_w = 1
        n_prototypes = 2
        
        for rep in range(1, 2):
            vals = []
            for fold in range(1, 2):
                accs = [] 

                use_cuda = False
                
                data1 = np.vstack((labels, bags)).T
                data = np.concatenate([data1, features], axis=1)

                bags_train, labels_train = convert_to_bags(data)
                bags_train = np.array(bags_train)
                labels_train = np.array(labels_train)

                bag_size = bags_train[0][0].shape[0]
                step_per_epoch = len(bags_train)
                lr_step = (step_per_epoch * 40)
                display = (step_per_epoch * display_every)
                max_steps = n_epochs * step_per_epoch

                model = ShapeletGenerator(n_prototypes, bag_size, n_classes)

                if n_classes == 2:
                    output_fn = torch.nn.Sigmoid()
                else:
                    output_fn = torch.nn.Softmax()

                if n_classes == 2:
                    loss = torch.nn.BCEWithLogitsLoss(reduction="mean")
                else:
                    loss = torch.nn.CrossEntropyLoss(reduction="mean")

                optim1 = torch.optim.Adam([model.prototypes], lr=lr_prot)
                optim2 = torch.optim.Adam(list(model.linear_layer.parameters()),
                            lr=lr_weights)
                total_loss = 0
                correct = 0
                train_loss_hist, eval_loss_hist = [], []
                train_acc_hist, eval_acc_hist = [], []
                eval_aucs = []
                step_hist = []
                time_hist = []

                if use_cuda and torch.cuda.is_available():
                    model = model.cuda()

                for i in range(max_steps):
                    np_idx = np.random.choice(bags_train.shape[0], batch_size)
                    start_time = time.time()
                    batch_inp = bags_train[np_idx]
                    targets = torch.Tensor(labels_train[np_idx]).type(torch.int64)
                    batch_inp = torch.Tensor(batch_inp[0])
                    batch_inp = batch_inp.view(1, batch_inp.shape[0], batch_inp.shape[1])
                    if use_cuda and torch.cuda.is_available():
                        targets = targets.cuda()
                        batch_inp = batch_inp.cuda()

                    logits, distances = model(batch_inp)
                    out = output_fn(logits)

                    if n_classes == 2:
                        predicted = (out > 0.5).type(torch.int64)
                    else:
                        _, predicted = torch.max(out, 1)
                    correct += (predicted == targets).type(torch.float32).mean().item()

                    batch_loss = loss(logits, targets.type(torch.float32))

                    # N_prot x N_prot
                    #M_prot_norm = torch.mm(prot_norms.transpose(0, 1), prot_norms)
                    #cos_loss = torch.bmm(prototypes, prototypes.transpose(1,2)).squeeze(0)
                    #cos_loss = cos_loss/M_prot_norm
                    #cos_norm = cos_loss.norm(dim=0).sum() 

                    # cos_loss = pd(model.prototypes, model.prototypes).sum()

                    #weight_reg = model.linear_layer.weight.norm(p=1).sum()

                    prototypes_pairwise = pairwise_dist(model.prototypes, model.prototypes)
                    reg_prototypes = prototypes_pairwise.sum()

                    weight_reg = 0
                    for param in model.linear_layer.parameters():
                        weight_reg += param.norm(p=reg_w).sum()

                    reg_loss = reg_lambda_w*weight_reg + reg_lambda_dist*distances.sum() + reg_prototypes*reg_lambda_p
                    total_loss += batch_loss
                    min_loss = batch_loss + reg_loss
                    min_loss.backward()

                    optim1.step()
                    optim2.step()

                    if (i + 1) % lr_step == 0:
                        print("LR DROP!")
                        optims = [optim1, optim2]
                        for o in optims:
                            for p in o.param_groups:
                                p["lr"] = p["lr"] / 2

                    if (i + 1) % display == 0:
                        with torch.no_grad():
                            print("Step : ", str(i + 1), "Loss: ",
                            total_loss.item() / display, " accuracy: ", correct / (display))
                            train_loss_hist.append(total_loss.item() / display)
                            train_acc_hist.append(correct / display)
                            total_loss = 0
                            correct = 0
                            model = model.eval()
                            e_loss = 0
                            e_acc = 0
                            y_true = []
                            y_score = []

                            for i in range(len(bags_train)):
                                batch_inp = torch.Tensor(bags_train[i])
                                batch_inp = batch_inp.view(1, batch_inp.shape[0],
                                                      batch_inp.shape[1])
                                targets = torch.Tensor([labels_train[i]]).type(torch.int64)
                                logits, distances = model(batch_inp)
                                out = output_fn(logits)

                                if n_classes == 2:
                                    predicted = (out > 0.5).type(torch.int64)
                                else:
                                    _, predicted = torch.max(out, 1)
                                y_true.append(targets)
                                y_score.append(out)
                                correct = (predicted == targets).type(torch.float32).mean().item()
                                e_acc += correct
                                eval_loss = loss(logits, targets.type(torch.float32)).item()
                                e_loss += eval_loss

                            y_true_list = [x.tolist() for x in y_true]
                            y_score_list = [x.tolist() for x in y_score]
                            print("Eval Loss: ", e_loss / len(bags_train),
                                " Eval Accuracy:", e_acc / len(bags_train), " AUC: ",
                            roc_auc_score(y_true_list, y_score_list))
                            eval_loss_hist.append(e_loss / len(bags_train))
                            eval_acc_hist.append(e_acc / len(bags_train))
                            eval_aucs.append(roc_auc_score(y_true_list, y_score_list))
                            accs.append(e_acc / len(bags_train))
                            step_hist.append(i+1)
                            model = model.train()

                print(str(rep), " ", str(fold), " Final Best AUC: ",
                    np.max(np.array(eval_aucs)))

                end_time = time.time()
                total_time = end_time - start_time
                time_hist.append([total_time]*len(accs))
                output_data = np.column_stack([step_hist, train_loss_hist,train_acc_hist, eval_loss_hist, eval_acc_hist, eval_aucs])
                df = DataFrame(output_data, columns = ["step_hist", "train_loss_hist","train_acc_hist", "eval_loss_hist", "eval_acc_hist", "eval_aucs"])
                df['dataset'] = dataset
                df['fold'] = fold
                df['rep'] = rep
                df['time_hist'] = total_time
                export_csv = df.to_csv (r'.\export_dataframe.csv', index = None, header=False, mode='a') #Don't forget to add '.csv' at the end of the path
                vals.append(np.max(np.array(eval_aucs)))
                prototypes = model.prototypes.squeeze(0).detach().numpy()
                figure_file = "shapelets_" + dataset + "_run_" + str(0) + "_" + str(
                rep) + "_" + str(fold) + ".png"
                files = "{}_{}_run_{}_{}_{}.png"
                loss_file = files.format("loss", dataset, "0", str(rep), str(fold))
                accuracy_file = files.format("acc", dataset, "0", str(rep), str(fold))

                plt.plot(train_loss_hist, label="train_loss")
                plt.plot(eval_loss_hist, label="eval_loss")
                plt.title("Loss History")
                plt.legend()
                plt.savefig(loss_file)
                plt.close()
                plt.plot(train_loss_hist, label="train_loss")
                plt.title("Only Training Loss History")
                plt.legend()
                plt.savefig("only_train_"+loss_file)
                plt.close()
                plt.plot(train_acc_hist, label="train_accuracy")
                plt.plot(eval_acc_hist, label="eval_accuracy")
                plt.title("Accuracy History")
                plt.legend()
                plt.savefig(accuracy_file)
                plt.close()
                plot_prototypes(prototypes, savefile=figure_file)

                final_vals.append(vals)

        print(np.mean(final_vals), "mean final vals")
    
        return np.mean(final_vals)

    BOUNDS = [
     {'name': 'reg_lambda_dist',
      'type': 'continuous',
      'domain': (0.0005, 0.005)},
     {'name': 'reg_lambda_w',
      'type': 'continuous',
      'domain': (0.005, 0.05)},
     {'name': 'reg_lambda_p',
      'type': 'continuous',
      'domain': (0.00005, 0.0005)},
     {'name': 'lr_prot',
      'type': 'continuous',
      'domain': (0.00001, 0.0001)},
     {'name': 'lr_weights',
      'type': 'continuous',
      'domain': (0.00001, 0.0001)},
     {'name': 'reg_w',
      'type': 'discrete',
      'domain': (1, 2)},
     {'name': 'n_prototypes',
      'type': 'discrete',
      'domain': (2, 6)}  # will be x2. ie if 2, then number of prototypes will actually be 4, if 4 then 8, etc.
    ]
    
    np.random.seed(777)
    optimizer = GPyOpt.methods.BayesianOptimization(
         f=experiment_fn, domain=BOUNDS,
         acquisition_type='MPI',
         acquisition_par=0.3,
         exact_eval=True,
         maximize=True
     )
    
    max_iter = 40
    optimizer.run_optimization(max_iter, max_time=3600)
    #con_plot = data_s + "_optimizer_bayesopt.png"
    
    parameters = optimizer.x_opt
    parameters_output = np.transpose([parameters])
    df_parameters = DataFrame(parameters_output)
    df_parameters = df_parameters.T
    
    n_classes=2
    n_epochs=100
    batch_size=1
    display_every=5
    print(parameters)
    final_vals = []
    reg_lambda_dist = parameters[0]
    reg_lambda_w = parameters[1]
    reg_lambda_p = parameters[2]
    lr_prot = parameters[3]
    lr_weights = parameters[4]
    reg_w = parameters[5]
    n_prototypes = parameters[6]
    n_prototypes = n_prototypes*2
    
    data1 = np.vstack((labels, bags)).T
    data = np.concatenate([data1, features], axis=1)

    bags_train, labels_train = convert_to_bags(data)
    bags_train = np.array(bags_train)
    labels_train = np.array(labels_train)

    for rep in range(1, 2):
        vals = []
        for fold in range(1, 2):
            accs = [] 

            use_cuda = False

            bag_size = bags_train[0][0].shape[0]
            step_per_epoch = len(bags_train)
            lr_step = (step_per_epoch * 40)
            display = (step_per_epoch * display_every)
            max_steps = n_epochs * step_per_epoch

            model = ShapeletGenerator(n_prototypes, bag_size, n_classes)

            if n_classes == 2:
                output_fn = torch.nn.Sigmoid()
            else:
                output_fn = torch.nn.Softmax()



            if n_classes == 2:
                loss = torch.nn.BCEWithLogitsLoss(reduction="mean")
            else:
                loss = torch.nn.CrossEntropyLoss(reduction="mean")

            optim1 = torch.optim.Adam([model.prototypes], lr=lr_prot)
            optim2 = torch.optim.Adam(list(model.linear_layer.parameters()),
                        lr=lr_weights)
            total_loss = 0
            correct = 0
            train_loss_hist, eval_loss_hist = [], []
            train_acc_hist, eval_acc_hist = [], []
            eval_aucs = []
            step_hist = []
            time_hist = []

            if use_cuda and torch.cuda.is_available():
                model = model.cuda()

            for i in range(max_steps):
                np_idx = np.random.choice(bags_train.shape[0], batch_size)
                start_time = time.time()
                batch_inp = bags_train[np_idx]
                targets = torch.Tensor(labels_train[np_idx]).type(torch.int64)
                batch_inp = torch.Tensor(batch_inp[0])
                batch_inp = batch_inp.view(1, batch_inp.shape[0], batch_inp.shape[1])
                if use_cuda and torch.cuda.is_available():
                    targets = targets.cuda()
                    batch_inp = batch_inp.cuda()

                logits, distances = model(batch_inp)
                out = output_fn(logits)

                if n_classes == 2:
                    predicted = (out > 0.5).type(torch.int64)
                else:
                    _, predicted = torch.max(out, 1)
                correct += (predicted == targets).type(torch.float32).mean().item()

                batch_loss = loss(logits, targets.type(torch.float32))

                # N_prot x N_prot
                #M_prot_norm = torch.mm(prot_norms.transpose(0, 1), prot_norms)
                #cos_loss = torch.bmm(prototypes, prototypes.transpose(1,2)).squeeze(0)
                #cos_loss = cos_loss/M_prot_norm
                #cos_norm = cos_loss.norm(dim=0).sum() 

                # cos_loss = pd(model.prototypes, model.prototypes).sum()

                #weight_reg = model.linear_layer.weight.norm(p=1).sum()

                prototypes_pairwise = pairwise_dist(model.prototypes, model.prototypes)
                reg_prototypes = prototypes_pairwise.sum()

                weight_reg = 0
                for param in model.linear_layer.parameters():
                    weight_reg += param.norm(p=reg_w).sum()

                reg_loss = reg_lambda_w*weight_reg + reg_lambda_dist*distances.sum() + reg_prototypes*reg_lambda_p
                total_loss += batch_loss
                min_loss = batch_loss + reg_loss
                min_loss.backward()

                optim1.step()
                optim2.step()

                if (i + 1) % lr_step == 0:
                    print("LR DROP!")
                    optims = [optim1, optim2]
                    for o in optims:
                        for p in o.param_groups:
                            p["lr"] = p["lr"] / 2

                if (i + 1) % display == 0:
                    with torch.no_grad():
                        print("Step : ", str(i + 1), "Loss: ",
                        total_loss.item() / display, " accuracy: ", correct / (display))
                        train_loss_hist.append(total_loss.item() / display)
                        train_acc_hist.append(correct / display)
                        total_loss = 0
                        correct = 0
                        model = model.eval()
                        e_loss = 0
                        e_acc = 0
                        y_true = []
                        y_score = []

                        for i in range(len(bags_train)):
                            batch_inp = torch.Tensor(bags_train[i])
                            batch_inp = batch_inp.view(1, batch_inp.shape[0],
                                                  batch_inp.shape[1])
                            targets = torch.Tensor([labels_train[i]]).type(torch.int64)
                            logits, distances = model(batch_inp)
                            out = output_fn(logits)

                            if n_classes == 2:
                                predicted = (out > 0.5).type(torch.int64)
                            else:
                                _, predicted = torch.max(out, 1)
                            y_true.append(targets)
                            y_score.append(out)
                            correct = (predicted == targets).type(torch.float32).mean().item()
                            e_acc += correct
                            eval_loss = loss(logits, targets.type(torch.float32)).item()
                            e_loss += eval_loss

                        y_true_list = [x.tolist() for x in y_true]
                        y_score_list = [x.tolist() for x in y_score]
                        print("Eval Loss: ", e_loss / len(bags_train),
                            " Eval Accuracy:", e_acc / len(bags_train), " AUC: ",
                        roc_auc_score(y_true_list, y_score_list))
                        eval_loss_hist.append(e_loss / len(bags_train))
                        eval_acc_hist.append(e_acc / len(bags_train))
                        eval_aucs.append(roc_auc_score(y_true_list, y_score_list))
                        accs.append(e_acc / len(bags_train))
                        step_hist.append(i+1)
                        model = model.train()

    return model.prototypes

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,192,193,194,195,196,197,198,199,200,201
0,1.0,1.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
1,1.0,1.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
2,1.0,1.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
3,1.0,1.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
4,1.0,1.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2757,0.0,100.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
2758,0.0,100.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
2759,0.0,100.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
2760,0.0,100.0,0.0,0.0,0.0,0.006834,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.000000,0.006452,0.0,0.0,0.0,0.0


In [None]:
def find_prototype(bags,
                   features,
                   labels):
    
    n_classes=2
    n_epochs=100
    batch_size=1
    display_every=5
    final_vals = []
    reg_lambda_dist = parameters[0]
    reg_lambda_w = parameters[1]
    reg_lambda_p = parameters[2]
    lr_prot = parameters[3]
    lr_weights = parameters[4]
    reg_w = parameters[5]
    n_prototypes = parameters[6]
    #reg_lambda_dist = 0.0005
    #reg_lambda_w = 0.005
    #reg_lambda_p = 0.00005
    #lr_prot = 0.00001
    #lr_weights = 0.00001
    #reg_w = 1
    #n_prototypes = 2
    #n_prototypes = n_prototypes*2
    
    data1 = np.vstack((labels, bags)).T
    data = np.concatenate([data1, features], axis=1)
    
    bags_train, labels_train = convert_to_bags(data)
    bags_train = np.array(bags_train)
    labels_train = np.array(labels_train)

    for rep in range(1, 2):
        vals = []
        for fold in range(1, 2):
            accs = [] 

            use_cuda = False

            bag_size = bags_train[0][0].shape[0]
            #step_per_epoch = len(bags_train)
            step_per_epoch = len(np.unique(bags))

            lr_step = (step_per_epoch * 40)
            display = (step_per_epoch * display_every)
            max_steps = n_epochs * step_per_epoch

            model = ShapeletGenerator(n_prototypes, bag_size, n_classes)

            if n_classes == 2:
                output_fn = torch.nn.Sigmoid()
            else:
                output_fn = torch.nn.Softmax()



            if n_classes == 2:
                loss = torch.nn.BCEWithLogitsLoss(reduction="mean")
            else:
                loss = torch.nn.CrossEntropyLoss(reduction="mean")

            optim1 = torch.optim.Adam([model.prototypes], lr=lr_prot)
            optim2 = torch.optim.Adam(list(model.linear_layer.parameters()),
                        lr=lr_weights)
            total_loss = 0
            correct = 0
            train_loss_hist, eval_loss_hist = [], []
            train_acc_hist, eval_acc_hist = [], []
            eval_aucs = []
            step_hist = []
            time_hist = []

            if use_cuda and torch.cuda.is_available():
                model = model.cuda()

            for i in range(max_steps):
                np_idx = np.random.choice(bags_train.shape[0], batch_size)
                start_time = time.time()
                batch_inp = bags_train[np_idx]
                targets = torch.Tensor(labels_train[np_idx]).type(torch.int64)
                batch_inp = torch.Tensor(batch_inp[0])
                batch_inp = batch_inp.view(1, batch_inp.shape[0], batch_inp.shape[1])
                if use_cuda and torch.cuda.is_available():
                    targets = targets.cuda()
                    batch_inp = batch_inp.cuda()

                logits, distances = model(batch_inp)
                out = output_fn(logits)

                if n_classes == 2:
                    predicted = (out > 0.5).type(torch.int64)
                else:
                    _, predicted = torch.max(out, 1)
                correct += (predicted == targets).type(torch.float32).mean().item()

                batch_loss = loss(logits, targets.type(torch.float32))

                # N_prot x N_prot
                #M_prot_norm = torch.mm(prot_norms.transpose(0, 1), prot_norms)
                #cos_loss = torch.bmm(prototypes, prototypes.transpose(1,2)).squeeze(0)
                #cos_loss = cos_loss/M_prot_norm
                #cos_norm = cos_loss.norm(dim=0).sum() 

                # cos_loss = pd(model.prototypes, model.prototypes).sum()

                #weight_reg = model.linear_layer.weight.norm(p=1).sum()

                prototypes_pairwise = pairwise_dist(model.prototypes, model.prototypes)
                reg_prototypes = prototypes_pairwise.sum()

                weight_reg = 0
                for param in model.linear_layer.parameters():
                    weight_reg += param.norm(p=reg_w).sum()

                reg_loss = reg_lambda_w*weight_reg + reg_lambda_dist*distances.sum() + reg_prototypes*reg_lambda_p
                total_loss += batch_loss
                min_loss = batch_loss + reg_loss
                min_loss.backward()

                optim1.step()
                optim2.step()

                if (i + 1) % lr_step == 0:
                    print("LR DROP!")
                    optims = [optim1, optim2]
                    for o in optims:
                        for p in o.param_groups:
                            p["lr"] = p["lr"] / 2

                if (i + 1) % display == 0:
                    with torch.no_grad():
                        print("Step : ", str(i + 1), "Loss: ",
                        total_loss.item() / display, " accuracy: ", correct / (display))
                        train_loss_hist.append(total_loss.item() / display)
                        train_acc_hist.append(correct / display)
                        total_loss = 0
                        correct = 0
                        model = model.eval()
                        e_loss = 0
                        e_acc = 0
                        y_true = []
                        y_score = []

                        for i in range(len(bags_train)):
                            batch_inp = torch.Tensor(bags_train[i])
                            batch_inp = batch_inp.view(1, batch_inp.shape[0],
                                                  batch_inp.shape[1])
                            targets = torch.Tensor([labels_train[i]]).type(torch.int64)
                            logits, distances = model(batch_inp)
                            out = output_fn(logits)

                            if n_classes == 2:
                                predicted = (out > 0.5).type(torch.int64)
                            else:
                                _, predicted = torch.max(out, 1)
                            y_true.append(targets)
                            y_score.append(out)
                            correct = (predicted == targets).type(torch.float32).mean().item()
                            e_acc += correct
                            eval_loss = loss(logits, targets.type(torch.float32)).item()
                            e_loss += eval_loss

                        y_true_list = [x.tolist() for x in y_true]
                        y_score_list = [x.tolist() for x in y_score]
                        print("Eval Loss: ", e_loss / len(bags_train),
                            " Eval Accuracy:", e_acc / len(bags_train), " AUC: ",
                        roc_auc_score(y_true_list, y_score_list))
                        eval_loss_hist.append(e_loss / len(bags_train))
                        eval_acc_hist.append(e_acc / len(bags_train))
                        eval_aucs.append(roc_auc_score(y_true_list, y_score_list))
                        accs.append(e_acc / len(bags_train))
                        step_hist.append(i+1)
                        model = model.train()

    return model.prototypes