Code that implements global, local and MTL binary classification using SVM type objective

In [None]:
import numpy as np
import glob
from sklearn.svm import SVC
from sklearn.grid_search import GridSearchCV
from scipy import linalg as LA
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
def read_prep_data():
    # the folder should contain the features and labels for each task in seperate csv files.
    # the name of the files should be feature_#task or label_#task.
    # this function reads the data and returns two lists for features and labels. 
    # Inside length of the list is equal to the number of tasks and each item is a numpy ndarray
    feature_list = glob.glob('features_*.csv')
    label_list = glob.glob('labels_*.csv')
    if (len(label_list)!=len(feature_list)):
        assert('input diparity');
    feature_list.sort()
    label_list.sort()
    X = []
    Y = []
    for f in feature_list:
        X.append(np.genfromtxt(f, delimiter=','))
    for el in label_list:
        Y.append(np.genfromtxt(el, delimiter=','))
    return X,Y
def flatten_tasks(XX,YY):
    # flattens the mt data for global modeling
    X = XX[0]
    Y = YY[0]
    for t in range(len(XX)-1):
        X = np.append(X,XX[t+1],0)
        Y = np.append(Y,YY[t+1],0)
    return X,Y

def mt_data_split(X, y, perc, random_state):
    m = len(X);
    X_train = []
    y_train = []
    X_test = []
    y_test = []
    for t in range(m):
        Xt_train, Xt_test, yt_train, yt_test = train_test_split(X[t], y[t], test_size=perc)#, stratify=y[t])#, random_state=random_state)
        X_train.append(Xt_train);
        X_test.append(Xt_test);
        y_train.append(yt_train);
        y_test.append(yt_test);
    return X_train, X_test, y_train, y_test


XX,YY = read_prep_data()
X,Y = flatten_tasks(XX,YY)

The following cell defines the class object for mtl classifier

In [None]:
class mtl:
    def __init__(self, lam = 1.0 , max_outer_iter = 100, max_inner_iter = 10, max_local_iter = 1, random_seed = 3):
        self.lam = lam # lambda: regularization parameter
        self.max_outer_iter = max_outer_iter;
        self.max_inner_iter = max_inner_iter;
        self.max_local_iter = max_local_iter;
        self.random_seed = random_seed;
    def predict(self, X):
        y = []
        for t in range(self.num_tasks):
            temp = np.sign(np.dot(X[t], self.model[:,t])).reshape((X[t].shape[0],1))
            y.append(temp)
        return y
    
    def score(self, X, y):
        yy = self.predict(X);
        score_vec=np.zeros((self.num_tasks,1))
        for t in range(self.num_tasks):
            score_vec[t] = 1.0-np.sum(yy[t]!=y[t].reshape(y[t].shape[0],1))*1.0/(y[t].shape[0])
        return score_vec
    
    def fit(self, X, y):
        
        # initialize
        #np.random.seed(self.random_seed) # used for debugging
        self.num_tasks = len(X);
        self.d = X[0].shape[1]
        d = self.d
        m = self.num_tasks
        self.Sigma = np.eye(m) * (1.0/m);
        self.Omega = np.eye(m) * (1.0*m);
        rho = 1.0
        self.model = np.zeros((self.d,m))
        self.alpha = []
        W = self.model;
        
        self.n = []
        self.totaln = 0
        
        for t in range(m):
            temp = y[t].shape[0]
            self.n.append(temp)
            self.totaln = self.totaln + temp
            self.alpha.append(np.zeros((temp,1)));
        
        size_x = np.zeros((max(self.n),m));
        for t in range(m):
            for i in range(self.n[t]):
                curr_x = X[t][i, :].reshape((1,d));
                size_x[i,t] = np.dot(curr_x,curr_x.transpose())
        
        
        self.train_mse_iter = np.zeros((self.max_outer_iter,1));
        for h in range(self.max_outer_iter):
            self.train_mse_iter[h] = 1.0 - np.mean(self.score(X,y));
            
            ## update W
            for hh in range(self.max_inner_iter):
                deltaW = np.zeros((self.d, m));
                deltaB = np.zeros((self.d, m));
                
                ## going over tasks
                for t in range(m):
                    alpha_t = self.alpha[t];
                    curr_sig = self.Sigma[t,t];
                    perm_t = np.random.permutation(self.n[t])
                    local_iters_t = round(self.max_local_iter*self.n[t])
                    
                    for s in range(local_iters_t):
                        idx = perm_t[(s%self.n[t])];
                        # get current variables
                        alpha_old = alpha_t[idx];
                        curr_x = X[t][idx, :].reshape((1,d));
                        curr_y = y[t][idx];
                        size_xx = np.dot(curr_x,curr_x.transpose())
                        update = (curr_y * np.dot(curr_x, (W[:,t] + rho * deltaW[:, t])));
                        grad = self.lam * self.n[t] * (1.0 - update) / (curr_sig * rho * size_xx) + (alpha_old * curr_y);
                        
                        alpha_new = curr_y * max(0.0, min(1.0, grad));
                        deltaW[:, t] = deltaW[:, t] + curr_sig * (alpha_new - alpha_old) * curr_x.transpose().squeeze()/ (self.lam * self.n[t]);
                        deltaB[:, t] = deltaB[:, t] + (alpha_new - alpha_old) * curr_x.transpose().squeeze() / self.n[t];
                        alpha_t[idx] = alpha_new;
                            
                # combine updates globally
                for t in range(m):
                    for tt in range(m):
                        W[:, t] = W[:, t] + deltaB[:, tt] * self.Sigma[t, tt] * (1.0 / self.lam);
        
            
    
            # update the Sigmas
            epsil = 0.0000001;
            A = np.dot(W.transpose(),W)
            D, V = LA.eigh(A)
            D = (D * (D>epsil)) + epsil*(D<=epsil);           
            sqm = np.sqrt(D)
            s = np.sum(sqm)
            sqm = sqm / s;
            self.Sigma = (np.dot(np.dot(V, np.diag(sqm)), V.transpose()))
            rho = max(np.sum(np.absolute(self.Sigma),0) / np.diag(self.Sigma));
            
        return self   
            
            
        
        
        
        
    def get_params(self, deep=True):
        return {"lam": self.lam, "max_outer_iter": self.max_outer_iter, "max_inner_iter": self.max_inner_iter,
                "max_local_iter": self.max_local_iter}

    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self
    
    def data_split(self, X, y, perc, random_state):
        self.num_tasks = len(X);
        m = self.num_tasks
        X_train = []
        y_train = []
        X_test = []
        y_test = []
        for t in range(m):
            Xt_train, Xt_test, yt_train, yt_test = train_test_split(X[t], y[t], test_size=perc)#, random_state = random_state)
            X_train.append(Xt_train);
            X_test.append(Xt_test);
            y_train.append(yt_train);
            y_test.append(yt_test);
        return X_train, X_test, y_train, y_test
            
    def cross_validate(self, X, y, folds = 5, lam_range=[.1,1.0,10.0], outer_iters_range = [1,10,20], inner_iters_range = [1,2], local_iters_range = [.1,1.0]):
        #print('start running cv',flush=True)
        el = len(lam_range);
        oi = len(outer_iters_range)
        ii = len(inner_iters_range)
        eli = len(local_iters_range)
        score_results = np.zeros((folds, el, oi ,ii, eli));
        perc = 1.0/folds;
        for f in range(folds):
            random_state = np.random.randint(10000)
            X_train, X_test, y_train, y_test = self.data_split(X, y, perc, random_state=random_state)
            for el_it in range(el):
                self.lam = lam_range[el_it]
                for oi_it in range(oi):
                    self.max_outer_iter = outer_iters_range[oi_it]
                    for ii_it in range(ii):
                        self.max_inner_iter = inner_iters_range[ii_it]
                        for eli_it in range(eli):
                            self.max_local_iter = local_iters_range[eli_it]
                            self.fit(X_train, y_train)
                            score_results[f,el_it,oi_it,ii_it, eli_it] = np.mean(self.score(X_test, y_test));
        score_results_avg = np.mean(score_results, axis=0);
        score_results_std = np.std(score_results, axis=0);
        
        # finding the best score
        arg_max = np.argmax(score_results_avg);
        args = np.unravel_index(arg_max, (el,oi,ii,eli))
        
        self.best_lam = lam_range[args[0]];
        self.best_outer = outer_iters_range[args[1]];
        self.best_inner = inner_iters_range[args[2]];
        self.best_local = local_iters_range[args[3]];
        
        
        self.lam = self.best_lam
        self.max_outer_iter = self.best_outer
        self.max_inner_iter = self.best_inner
        self.max_local_iter = self.best_local
        
        self.fit(X,y)
        return self.score(X, y)

The following piece of code is for performing simple SVM that is used for local and global

In [None]:
class simple_svm:
    def __init__(self, lam = 1.0, max_iter = 10, random_seed = 3):
        self.lam = lam # lambda: regularization parameter
        self.max_iter = max_iter;
        self.random_seed = random_seed;
    def fit(self, X,y):
        # Simple function for solving an SVM with SDCA
        # Used for local & global baselines
        # At each point, the objective is g_i(w) = max(0,1-y_i x_i^T w). 
        # The overall objective is (1/N)*g_i(w) + (lambda/2)*||w||^2
        # Inputs
        # X: input training data
        # y: output training data. should be -1,1's
        # Output
        # w: the learned model
        
        ## initialize
        #np.random.seed(self.random_seed)
        [n, d] = X.shape;
        w = np.zeros((d, 1))
        alpha = np.zeros((n, 1))
        size_x = np.zeros((n,1))
        for i in range(n):
            curr_x = X[i, :].reshape((1,d));
            size_x[i] = np.dot(curr_x,curr_x.transpose())
        
        for iter in range(self.max_iter):
            ## update coordinates cyclically
            for i in np.random.permutation(n):
                # get current variables
                alpha_old = alpha[i];
                curr_x = X[i, :].reshape((1,d));
                curr_y = y[i];

                # calculate update
                update = self.lam*n*(1.0 - (curr_y*np.dot(curr_x, w)))/size_x[i] + (alpha_old*curr_y)
                # apply update
                alpha_new = curr_y*max(0, min(1.0, update))
                w = w + ((alpha_new - alpha_old) * curr_x.transpose() * (1.0 / (self.lam * n)));
                alpha[i] = alpha_new;
        
        self.model = w
        self.support_vector = alpha
        return self
    def predict(self, X):
        return np.sign(np.dot(X, self.model)).reshape((X.shape[0],1))
    
    def score(self, X, y):
        return 1.0-np.sum(self.predict(X)!=y.reshape(len(y),1))*1.0/len(y)
    def get_params(self, deep=True):
        return {"lam": self.lam, "max_iter": self.max_iter}

    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self

In [None]:
def run_local(Xtrain,ytrain, Xtest, ytest, cv = 5, lam_range = [1.0], max_iter_range = [10], tol_range = [.001]):
    m = len(Xtrain)
    results = np.zeros((m,1));
    for t in range(m):
        
        
        # doing the scikitlearn svm
        nt = ytrain[t].shape[0]
        C_list = []
        for lam in lam_range:
            C_list.append(1.0/(lam*nt))
        param_grid = [{'C': C_list, 'tol': tol_range}]
        classifier = SVC(C=1.0, kernel='linear')
        train_cv = GridSearchCV(classifier, param_grid, cv = cv)
        
        # doing the simple svm
        #classifier = simple_svm()
        #param_grid = [{'lam': lam_range, 'max_iter':max_iter_range}]
        #train_cv = GridSearchCV(classifier, param_grid, cv = cv)
        
        train_cv.fit(Xtrain[t],ytrain[t]);
        results[t] = train_cv.score(Xtest[t],ytest[t]);
        print('local method best params for task '+ str(t)+':')
        print(train_cv.best_params_)
    print('------')
    return results

def run_global(Xtrain,ytrain, Xtest, ytest, cv = 5, lam_range = [1.0], max_iter_range = [10], tol_range = [.001]):
    m = len(Xtrain)
    results = np.zeros((m,1))
    Xf, yf = flatten_tasks(Xtrain,ytrain)
    
    # doing the scikitlearn SVC
    n = yf.shape[0]
    C_list = []
    for lam in lam_range:
        C_list.append(1.0/(lam*n))
    param_grid = [{'C': C_list, 'tol': tol_range}]
    classifier = SVC(C=1.0, kernel='linear')
    train_cv = GridSearchCV(classifier, param_grid, cv = cv)
    
    # doing the simple svm
    #classifier = simple_svm()
    #param_grid = [{'lam': lam_range, 'max_iter':max_iter_range}]
    #train_cv = GridSearchCV(classifier, param_grid, cv = cv)
    
    
    train_cv.fit(Xf, yf);
    print('global method best params:')
    print(train_cv.best_params_)
    print('----')
    for t in range(m):
        results[t] = train_cv.score(Xtest[t],ytest[t]);
    return results

def run_mtl(Xtrain, ytrain, Xtest, ytest, cv = 5, lam_range=[1.0],
              outer_iters_range = [1], inner_iters_range = [1], 
              local_iters_range = [1.0]):
    mtl_clf = mtl()
    mtl_clf.cross_validate(Xtrain,ytrain, folds = cv, lam_range=lam_range,
                           outer_iters_range = outer_iters_range, 
                           inner_iters_range = inner_iters_range,
                           local_iters_range = local_iters_range)
    print('mtl best parameters:')
    print(['lambda:',mtl_clf.best_lam, 'outer:', mtl_clf.best_outer, 'inner:', mtl_clf.best_inner, 'local:', mtl_clf.best_local])
    print('----')
    return mtl_clf.score(Xtest, ytest)


The following cell runs the experiments by calling the methods

In [None]:
num_trials = 10
test_perc = .9
cv = 5;
m = len(XX)
np.random.seed(0)
local_results = np.zeros((m,num_trials))
global_results = np.zeros((m,num_trials))
mtl_results = np.zeros((m,num_trials))

for t in range(num_trials):
    Xtrain, Xtest, Ytrain, Ytest = mt_data_split(XX, YY, test_perc, 10000*t+10000)

    
    # doing normalization an adding the bias term
    for tasks in range(m):
        scaler = StandardScaler(copy = False)
        scaler.fit(Xtrain[tasks]);
        scaler.transform(Xtrain[tasks], copy = False)
        scaler.transform(Xtest[tasks], copy = False)
        
        # adding a relatively large constant (10) at the beginning (large value such that it cancels regularizing the bias)
        all_ones = 10*np.ones((Xtrain[tasks].shape[0],1))
        Xtrain[tasks] = np.append(all_ones,Xtrain[tasks],1);
        
        all_ones = 10*np.ones((Xtest[tasks].shape[0],1))
        Xtest[tasks] = np.append(all_ones,Xtest[tasks],1);

        
        
        
        
    
    
    local_lam_range = [10, 1.0, .1, .01, .001, .0001, .00001, .000001]
    local_max_iter_range = [1, 5, 10]
    local_tol_range = [.1, .01, .001, .0001]
    local_results[:,t] = run_local(Xtrain,Ytrain, Xtest, Ytest, 
                                   cv = cv, lam_range = local_lam_range, 
                                   max_iter_range = local_max_iter_range, tol_range = local_tol_range).squeeze()
    
    global_lam_range = [10, 1.0, .1, .01, .001, .0001, .00001, .000001]
    global_max_iter_range = [1, 5, 10]
    global_tol_range = [.1, .01, .001, .0001]
    global_results[:,t] = run_global(Xtrain,Ytrain, Xtest, Ytest, 
                                   cv = cv, lam_range = global_lam_range, 
                                   max_iter_range = global_max_iter_range, tol_range = global_tol_range).squeeze()
    
    
    mtl_lam_range = [1.0, .1, .01, .001, .0001, .00001, .000001]
    mtl_outer_iters_range = [5, 10, 50]
    mtl_inner_iters_range = [1, 5, 10]
    mtl_local_iters_range = [.5, 1.0, 2.0]
    mtl_results[:,t] = run_mtl(Xtrain, Ytrain, Xtest, Ytest, cv = cv, lam_range = mtl_lam_range,
                                  outer_iters_range = mtl_outer_iters_range, inner_iters_range = mtl_inner_iters_range, 
                                  local_iters_range = mtl_local_iters_range).squeeze()
    

local_results_avg = np.mean(local_results, axis = 1)
global_results_avg = np.mean(global_results, axis = 1)
mtl_results_avg = np.mean(mtl_results, axis = 1)

print('local score:')
print(local_results_avg)

print('global score:')
print(global_results_avg)

print('mtl score:')
print(mtl_results_avg)
    
    
    
    
    
    
    
    



Printing the results in another form

In [None]:
local_results_avg = np.mean(local_results, axis = 0)
global_results_avg = np.mean(global_results, axis = 0)
mtl_results_avg = np.mean(mtl_results, axis = 0)


print('local score:')
print(local_results_avg)

print('global score:')
print(global_results_avg)

print('mtl score:')
print(mtl_results_avg)

print('local mean and std:')
print(np.mean(local_results_avg))
print(np.std(local_results_avg))

print('global mean and std:')
print(np.mean(global_results_avg))
print(np.std(global_results_avg))

print('mtl mean and std:')
print(np.mean(mtl_results_avg))
print(np.std(mtl_results_avg))