# Linear Classifiers
Use some kind of random patches and train multiple classifiers on very small parts of the dataset. Specifically, for each data sample, sample from it's nearst sameclass and diff/class samples and create a small neighborhood. THen create a linear classifier based on the normal equation for fast inference.
Finally, find a smart way to combine these results (each model will occupy a specific point that would be the centroid of it's neighborhood. then we can use the distance as weights when predicting for a new sample)

In [1]:
def plot_selected_points(clf, X, y, center_id, indices, s=100):
    fig = plt.figure(figsize=(10,10))
    ax = plt.gca()
    plt.subplots_adjust(wspace=0.4, hspace=0.4)
    X0, X1 = X[:, 0], X[:, 1]
    xx, yy = make_meshgrid(X0, X1)
    plot_contours(ax, clf, xx, yy, cmap=plt.cm.coolwarm, alpha=0.8)
    ax.scatter(X0, X1, c=y, cmap=plt.cm.coolwarm, s=s, edgecolors='grey')
    ax.scatter(X[center_id,0], X[center_id,1], s=s, edgecolors='green', facecolors='none')
    ax.scatter(X[indices,0], X[indices, 1], s=s, edgecolors='black', facecolors='none')
    ax.set_xlim(xx.min(), xx.max())
    ax.set_ylim(yy.min(), yy.max())
    ax.set_xlabel('Sepal length')
    ax.set_ylabel('Sepal width')
    ax.set_xticks(())
    ax.set_yticks(())
    ax.set_title(title)
    plt.show()


In [None]:
print(__doc__)

import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, datasets


def make_meshgrid(x, y, h=.02):
    """Create a mesh of points to plot in

    Parameters
    ----------
    x: data to base x-axis meshgrid on
    y: data to base y-axis meshgrid on
    h: stepsize for meshgrid, optional

    Returns
    -------
    xx, yy : ndarray
    """
    x_min, x_max = x.min() - 1, x.max() + 1
    y_min, y_max = y.min() - 1, y.max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    return xx, yy


def plot_contours(ax, clf, xx, yy, **params):
    """Plot the decision boundaries for a classifier.

    Parameters
    ----------
    ax: matplotlib axes object
    clf: a classifier
    xx: meshgrid ndarray
    yy: meshgrid ndarray
    params: dictionary of params to pass to contourf, optional
    """
    #print(clf)
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    out = ax.contourf(xx, yy, Z, **params)
    return out


# import some data to play with
iris = datasets.load_iris()
# Take the first two features. We could avoid this by using a two-dim dataset
X = iris.data[:, :2]
y = iris.target

from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier

C = 1.0  # SVM regularization parameter
base =  AdaBoostClassifier()
        #KNeighborsClassifier()        
        #ExtraTreeClassifier()
        #DecisionTreeClassifier()
        #SGDClassifier() 
        #svm.SVC(kernel='linear', C=C, probability=True)
bagging = BaggingClassifier(base, max_samples=0.5, max_features=0.5, random_state=42)
rp = Vanilla_RP(base,max_samples=0.5, max_features=0.5, random_state=42)
rp_w = Vanilla_RP(base, max_samples=0.5, max_features=0.5, patcher='weighted', random_state=42)
ac = Adversarial_Cascade(base_estimator=base, num_adversaries_per_instance=10,
                         optim=False, 
                         parameters=None, oob=True, way='furthest')

# we create an instance of SVM and fit out data. We do not scale our
# data since we want to plot the support vectors

models = (svm.SVC(kernel='linear', C=C),
          svm.SVC(kernel='rbf', gamma=0.7, C=C), 
          KNeighborsClassifier(),
         pipeline,
         bagging,
         rp,
         rp_w,
         ac)

models = (clf.fit(X, y) for clf in models)

# title for the plots
titles = ('SVC with linear kernel',
          'SVC with RBF kernel',
          'KNN',
         'SGD', 
         "Bagging",
         "RP",
         "RP-W",
         'AC')

# Set-up 2x2 grid for plotting.
fig, sub = plt.subplots(2, 4)
plt.subplots_adjust(wspace=0.4, hspace=0.4)

X0, X1 = X[:, 0], X[:, 1]
xx, yy = make_meshgrid(X0, X1)

for clf, title, ax in zip(models, titles, sub.flatten()):
    
    print("%s : %0.3f"% (title, 100*accuracy_score(y, clf.predict(X))))
    plot_contours(ax, clf, xx, yy,
                  cmap=plt.cm.coolwarm, alpha=0.8)
    ax.scatter(X0, X1, c=y, cmap=plt.cm.coolwarm, s=20, edgecolors='k')
    ax.set_xlim(xx.min(), xx.max())
    ax.set_ylim(yy.min(), yy.max())
    ax.set_xlabel('Sepal length')
    ax.set_ylabel('Sepal width')
    ax.set_xticks(())
    ax.set_yticks(())
    ax.set_title(title)
fig.set_figwidth(20)
fig.set_figheight(10)
plt.show()

In [None]:
class Linear_Classifiers(BaseEstimator, ClassifierMixin):
    
    def __init__(self, base_estimator=KNeighborsClassifier(), n_estimators=10, acc_target=0.99,
                 num_adversaries_per_instance=4, way = 'prob', 
                 random_state=42, optim=False, parameters=None, metric='accuracy', oob=False, oob_size=0.1):
        self.base_estimator = base_estimator
        self.n_estimators = n_estimators
        self.acc_target = acc_target
        self.num_adversaries_per_instance = num_adversaries_per_instance
        self.way = way
        self.random_state = check_random_state(random_state)
        self.optim = optim
        self.oob = oob
        self.oob_size = 0.1
        self.X_oob = None
        self.y_oob = None
        if self.optim:
            self.parameters = parameters
        else:
            self.parameters = None
        self.scoring = get_scorer(metric)
        self.acc = 0
        self.ensemble = []
        self.selected_indices = []
    
    def fit(self, X, y):
        return self._fit(X, y)
    
    def _fit(self,X,y):
        X, y = check_X_y(
            X, y, ['csr', 'csc'], dtype=None, force_all_finite=False,
            multi_output=True)
        y = self._validate_y(y)
        if self.oob:
            X, self.X_oob, y, self.y_oob = train_test_split(X,y,test_size=0.1,stratify=y)
        n_samples, self.n_features_ = X.shape
        cur_X, cur_y = X, y
        self.selected_indices.append([i for i in xrange(X.shape[0])])
        flag_target = False
        for i_est in xrange(self.n_estimators):
            cur_mod = clone(self.base_estimator)
            if self.optim:
                grid_search = GridSearchCV(cur_mod, self.parameters, n_jobs=-1, verbose=1, refit=True)
                grid_search.fit(cur_X, cur_y)
                cur_mod = grid_search.best_estimator_
            else:
                cur_mod.fit(cur_X, cur_y)
            self.ensemble.append(cur_mod)
            cur_X, cur_y, flag_target = self._create_next_batch(X, y)
            if flag_target:
                break
            #print(cur_X.shape, cur_y.shape)
        print("%d ESTIMATORS -- %0.3f" % (len(self.ensemble), 100*accuracy_score(y, self.predict(X), normalize=True)))
        return self
    
    def _create_next_batch(self, X, y):
        if self.oob:
            preds = self.predict(self.X_oob)
            centroids = self.X_oob[preds != self.y_oob]
            centroids_ind = np.argwhere(preds != self.y_oob).reshape(-1,)
            cur_X = copy.deepcopy(self.X_oob[centroids_ind,:])
            cur_y = copy.deepcopy(self.y_oob[centroids_ind])
            str_target = "OOB SAMPLE"
            self.acc = accuracy_score(self.y_oob, preds, normalize=True)
            #acc = (1-(centroids.shape[0])/float(self.X_oob.shape[0]))
        else:
            preds = self.predict(X)
            centroids = X[preds != y]
            centroids_ind = np.argwhere(preds!=y).reshape(-1,)
            cur_X = copy.deepcopy(X[centroids_ind,:])
            cur_y = copy.deepcopy(y[centroids_ind])
            str_target = "TRAIN SAMPLE"
            self.acc = accuracy_score(y, preds, normalize=True)
            #acc = (1-(centroids.shape[0])/float(X.shape[0]))
        if  self.acc > self.acc_target:
            #return X, y, False
            #print("ACCURACY ON THE %s IS %0.3f" % (str_target, 100*(1-(centroids.shape[0])/float(X.shape[0]))))
            #print("STOPPING WITH %d BASE MODELS" % len(self.ensemble))
            return _, _,True
        probas = pairwise_distances(centroids, X)
        probas /= np.sum(probas, axis=1).reshape(-1,1)
        for i_centr in xrange(probas.shape[0]):
            # Make zero the probability that a same-class sample is picked
            cur_prob = copy.deepcopy(probas[i_centr,:])
            cur_prob[y[centroids_ind[i_centr]]==y]=0
            print(cur_prob.shape, np.sum(cur_prob))
            cur_prob /= np.sum(cur_prob)
            if self.way == 'prob':
                indices = self.random_state.choice([i for i in xrange(0, probas.shape[1])],
                                                   self.num_adversaries_per_instance, p=cur_prob)
            if self.way == 'furthest':
                indices = np.argsort(cur_prob)[::-1][:self.num_adversaries_per_instance]
            if self.way == 'closest':
                cur_prob[y[centroids_ind[i_centr]]==y]=1
                indices = np.argsort(cur_prob)[:self.num_adversaries_per_instance]
            indices = self._fix_class_indices(y, indices)
            #print(cur_X.shape, X[indices,:].shape)
            cur_X = np.vstack((cur_X, X[indices,:]))
            cur_y = np.append(cur_y, y[indices])
                #cur_y.extend(indices)

            #cur_X = np.delete(cur_X, 0, axis=0)
            #cur_y = y[cur_y]
        return cur_X, cur_y, False
        
    def _fix_class_indices(self, y, samples_indices):
        in_set = set(y[samples_indices])
        a = set(y).difference(in_set)
        for item in a:
           samples_indices= np.append(samples_indices, [np.where(y==item)[0][0]])
        return samples_indices    
    
    def _validate_y(self, y):
        y = column_or_1d(y, warn=True)
        check_classification_targets(y)
        self.classes_, y = np.unique(y, return_inverse=True)
        self.n_classes_ = len(self.classes_)
        return y
    
    def predict(self, X):
        """Predict class for X.
        The predicted class of an input sample is computed as the class with
        the highest mean predicted probability. If base estimators do not
        implement a ``predict_proba`` method, then it resorts to voting.
        Parameters
        ----------
        X : {array-like, sparse matrix} of shape = [n_samples, n_features]
            The training input samples. Sparse matrices are accepted only if
            they are supported by the base estimator.
        Returns
        -------
        y : array of shape = [n_samples]
            The predicted classes.
        """
        
        if hasattr(self.base_estimator, "predict_proba"):
            predicted_probability = self.predict_proba(X)
            return self.classes_.take((np.argmax(predicted_probability, axis=1)),
                                  axis=0)
        else:
            predicted_probability = np.zeros((X.shape[0],1), dtype=int)
            for i, ens in enumerate(self.ensemble):
                predicted_probability = np.hstack((predicted_probability, ens.predict(X).reshape(-1,1)))
            predicted_probability = np.delete(predicted_probability,0,axis=1)
            final_pred = []
            for sample in xrange(X.shape[0]):
                final_pred.append(most_common(predicted_probability[sample,:]))
                #votes = []
                #for i, mod_vote in predictions[sample,:]:
                #    votes.extend([predictions[sample, i] for j in xrange(int(self.acc[i]))])
                #final_pred = most_common(votes)
            return np.array(final_pred)   


    def predict_proba(self, X):
        """Predict class probabilities for X.
        The predicted class probabilities of an input sample is computed as
        the mean predicted class probabilities of the base estimators in the
        ensemble. If base estimators do not implement a ``predict_proba``
        method, then it resorts to voting and the predicted class probabilities
        of an input sample represents the proportion of estimators predicting
        each class.
        Parameters
        ----------
        X : {array-like, sparse matrix} of shape = [n_samples, n_features]
            The training input samples. Sparse matrices are accepted only if
            they are supported by the base estimator.
        Returns
        -------
        p : array of shape = [n_samples, n_classes]
            The class probabilities of the input samples. The order of the
            classes corresponds to that in the attribute `classes_`.
        """
        check_is_fitted(self, "classes_")
        # Check data
        X = check_array(
            X, accept_sparse=['csr', 'csc'], dtype=None,
            force_all_finite=False
        )

        if self.n_features_ != X.shape[1]:
            raise ValueError("Number of features of the model must "
                             "match the input. Model n_features is {0} and "
                             "input n_features is {1}."
                             "".format(self.n_features_, X.shape[1]))

        all_proba = np.zeros((X.shape[0], self.n_classes_))
        for i, ens in enumerate(self.ensemble):
            all_proba += ens.predict_proba(X)
        all_proba /= self.n_estimators
        #print(all_proba.shape)
        #print(all_proba)
        #proba = np.sum(all_proba, axis=0) / self.n_estimators
        #print(proba.shape)
        #print(proba)
        return all_proba


    @if_delegate_has_method(delegate='base_estimator')
    def decision_function(self, X):
        """Average of the decision functions of the base classifiers.
        Parameters
        ----------
        X : {array-like, sparse matrix} of shape = [n_samples, n_features]
            The training input samples. Sparse matrices are accepted only if
            they are supported by the base estimator.
        Returns
        -------
        score : array, shape = [n_samples, k]
            The decision function of the input samples. The columns correspond
            to the classes in sorted order, as they appear in the attribute
            ``classes_``. Regression and binary classification are special
            cases with ``k == 1``, otherwise ``k==n_classes``.
        """
        check_is_fitted(self, "classes_")

        # Check data
        X = check_array(
            X, accept_sparse=['csr', 'csc'], dtype=None,
            force_all_finite=False
        )

        if self.n_features_ != X.shape[1]:
            raise ValueError("Number of features of the model must "
                             "match the input. Model n_features is {0} and "
                             "input n_features is {1} "
                             "".format(self.n_features_, X.shape[1]))
        all_decisions = np.zeros((X.shape[0], self.n_classes_))
        for i, ens in enumerate(self.ensemble):
            all_decisions += ens.predict_proba(X) 
        decisions = sum(all_decisions) / self.n_estimators

        return decisions
    
    def viz_fit(self, X, y):
        X, y = check_X_y(
            X, y, ['csr', 'csc'], dtype=None, force_all_finite=False,
            multi_output=True)
        y = self._validate_y(y)
        if self.oob:
            X, self.X_oob, y, self.y_oob = train_test_split(X,y,test_size=0.1,stratify=y)
        n_samples, self.n_features_ = X.shape
        cur_X, cur_y = X, y
        self.selected_indices.append([i for i in xrange(X.shape[0])])
        flag_target = False
        for i_est in xrange(self.n_estimators):
            cur_mod = clone(self.base_estimator)
            if self.optim:
                grid_search = GridSearchCV(cur_mod, self.parameters, n_jobs=-1, verbose=1, refit=True)
                grid_search.fit(cur_X, cur_y)
                cur_mod = grid_search.best_estimator_
            else:
                cur_mod.fit(cur_X, cur_y)
            self.ensemble.append(cur_mod)
            cur_X, cur_y, flag_target = self.viz_create_next_batch(X, y)
            if flag_target:
                break
            #print(cur_X.shape, cur_y.shape)
        print("%d ESTIMATORS -- %0.3f" % (len(self.ensemble), 100*accuracy_score(y, self.predict(X), normalize=True)))
        return self
    
    def viz_create_next_batch(self, X, y):
        if self.oob:
            preds = self.predict(self.X_oob)
            centroids = self.X_oob[preds != self.y_oob]
            centroids_ind = np.argwhere(preds != self.y_oob).reshape(-1,)
            cur_X = copy.deepcopy(self.X_oob[centroids_ind,:])
            cur_y = copy.deepcopy(self.y_oob[centroids_ind])
            str_target = "OOB SAMPLE"
            self.acc = accuracy_score(self.y_oob, preds, normalize=True)
            #acc = (1-(centroids.shape[0])/float(self.X_oob.shape[0]))
        else:
            preds = self.predict(X)
            centroids = X[preds != y]
            centroids_ind = np.argwhere(preds!=y).reshape(-1,)
            cur_X = copy.deepcopy(X[centroids_ind,:])
            cur_y = copy.deepcopy(y[centroids_ind])
            str_target = "TRAIN SAMPLE"
            self.acc = accuracy_score(y, preds, normalize=True)
            #acc = (1-(centroids.shape[0])/float(X.shape[0]))
        if  self.acc > self.acc_target:
            #return X, y, False
            #print("ACCURACY ON THE %s IS %0.3f" % (str_target, 100*(1-(centroids.shape[0])/float(X.shape[0]))))
            #print("STOPPING WITH %d BASE MODELS" % len(self.ensemble))
            return _, _,True
        probas = pairwise_distances(centroids, X)
        probas /= np.sum(probas, axis=1).reshape(-1,1)
        for i_centr in xrange(probas.shape[0]):
            # Make zero the probability that a same-class sample is picked
            cur_prob = copy.deepcopy(probas[i_centr,:])
            cur_prob[y[centroids_ind[i_centr]]==y]=0
            print(cur_prob.shape, np.sum(cur_prob))
            cur_prob /= np.sum(cur_prob)
            if self.way == 'prob':
                indices = self.random_state.choice([i for i in xrange(0, probas.shape[1])],
                                                   self.num_adversaries_per_instance, p=cur_prob)
            if self.way == 'furthest':
                indices = np.argsort(cur_prob)[::-1][:self.num_adversaries_per_instance]
            if self.way == 'closest':
                cur_prob[y[centroids_ind[i_centr]]==y]=1
                indices = np.argsort(cur_prob)[:self.num_adversaries_per_instance]
            indices = self._fix_class_indices(y, indices)
            
            #print(cur_X.shape, X[indices,:].shape)
            cur_X = np.vstack((cur_X, X[indices,:]))
            cur_y = np.append(cur_y, y[indices])
                #cur_y.extend(indices)

            #cur_X = np.delete(cur_X, 0, axis=0)
            #cur_y = y[cur_y]
        plot_selected_points(self, X,y, centroids_ind[i_centr], indices)
        cc = raw_input()
        if cc == 'q':
            exit
        return cur_X, cur_y, False
       

def most_common(lst):
    if isinstance(lst, np.ndarray):
        lst = lst.tolist()
    #print(lst, max(set(lst), key=lst.count)  )
    return max(set(lst), key=lst.count)