In [15]:
import pandas as pd
import numpy as np
import csv
import random

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import ExtraTreesClassifier, AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.cross_validation import train_test_split, cross_val_score
from sklearn.metrics import log_loss
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.base import BaseEstimator
from scipy.optimize import minimize
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.calibration import CalibratedClassifierCV
import xgboost as xgb
from xgboost import XGBClassifier
from sknn.mlp import Classifier, Layer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler



In [10]:
print('Load data...')
DATA_DIR = "/Users/patrickkennedy/Desktop/Data_Science_MISC/Kaggle"
train = pd.read_csv(DATA_DIR + "/BNP_Paribas/train.csv")
test = pd.read_csv(DATA_DIR + "/BNP_Paribas/test.csv")

target = train['target'].values

train = train.drop(['ID','target'],axis=1)
id_test = test['ID'].values
test = test.drop(['ID'],axis=1)

print('Clearing...')
for (train_name, train_series), (test_name, test_series) in zip(train.iteritems(),test.iteritems()):
    if train_series.dtype == 'O':
        #for objects: factorize
        train[train_name], tmp_indexer = pd.factorize(train[train_name])
        test[test_name] = tmp_indexer.get_indexer(test[test_name])
        #but now we have -1 values (NaN)
    else:
        #for int or float: fill NaN
        tmp_len = len(train[train_series.isnull()])
        if tmp_len>0:
            #print "mean", train_series.mean()
            train.loc[train_series.isnull(), train_name] = -9999 #train_series.mean()
        #and Test
        tmp_len = len(test[test_series.isnull()])
        if tmp_len>0:
            test.loc[test_series.isnull(), test_name] = -9999 #train_series.mean()  #TODO
            


Load data...
Clearing...


In [13]:
for_real=True   

In [25]:
%%time

#Defining the classifiers
clfs = {#'LR'  : LogisticRegression(), 
        #'SVM' : SVC(probability=True, random_state=random_state), 
        #'RF'  : RandomForestClassifier(),#n_estimators=100, n_jobs=-1), 
        #'GBM' : GradientBoostingClassifier(n_estimators=50), 
        'ETC' : ExtraTreesClassifier(),#n_estimators=500, 
                                     #max_features=60, 
                                     #max_depth=10, 
                                     #criterion='entropy', 
                                     #min_samples_split= 4, 
                                     #min_samples_leaf= 2, 
                                     #verbose = 0, 
                                     #n_jobs =-1),
        #'KNN' : KNeighborsClassifier(n_neighbors=30)}
        'XGBc': XGBClassifier(objective='binary:logistic'),
                              #colsample_bytree=0.77638333498678636,
                              #learning_rate=0.030567867858705199,
                              #max_delta_step=4.6626180513766657,
                              #min_child_weight=57.354121041109124,
                              #n_estimators=478,
                              #subsample=0.8069399976204783,
                              #max_depth=6,
                              #gamma=0.2966938071810209)#,
        #'NN'  : Pipeline([('min/max scaler', MinMaxScaler(feature_range=(-1.0, 1.0))),
        #                  ('neural network', Classifier(layers=[Layer("Rectifier", units=10),
        #                                                        Layer("Tanh", units=10),
        #                                                        Layer("Softmax")], 
        #                                                n_iter=5))])       
       }

jitters=10
p_valid, p_test, y_valid, y_test, p_valid_real, p_test_real, y_valid_real = run_classifiers(train, 
                                                                                            target, 
                                                                                            test, 
                                                                                            clfs, 
                                                                                            jitters)

#as long as i am better than .48 on my individual classifier tests, i am a-ok

#changing so that i am not using train_test_split on different data every time (duh)




Performance of individual classifiers (1st layer) on X_test
------------------------------------------------------------
ETC:       logloss  => 0.9041075
MEAN cv score : -0.92156415523
ETC:       logloss  => 0.9811521
MEAN cv score : -0.932532255621


KeyboardInterrupt: 

In [85]:
preds = optimize_and_score(p_valid, p_test, y_valid, y_test, p_valid_real, p_test_real, y_valid_real, jitters=100)
pd.DataFrame({"ID": id_test, "PredictedProb": preds[:,1]}).to_csv('xgb_native_with_etcmaxdepth10_params_100rounds.csv',index=False)

Performance of optimization based ensemblers (2nd layer) on X_test
------------------------------------------------------------
EN_optA:             logloss  => 0.5538136


KeyboardInterrupt: 

In [None]:
#3rd layer gives .549, kaggle gives .537

In [45]:
dummy = [x[:,0] for x in p_valid[:100]]

In [75]:
dummy[0]

array([ 0.28049038,  0.56250137,  0.10673434, ...,  0.13725468,
        0.21583141,  0.14018389])

In [76]:
df = pd.DataFrame()
for i in range(len(dummy)):
    df['etc'+str(i)+"-0"] = dummy[i]

(25277, 100)

In [81]:
p_valid[100:][0]

array([ 0.28520083,  0.85749406,  0.86651611, ...,  0.95152348,
        0.66153866,  0.89656085], dtype=float32)

In [19]:
def optimize_and_score(p_valid, p_test, y_valid, y_test, p_valid_real, p_test_real, y_valid_real, jitters):
    
    
    p_valid_df = pd.DataFrame()
    p_test_df = pd.DataFrame()
    p_valid_real_df = pd.DataFrame()
    p_test_real_df = pd.DataFrame()
    df_list = [p_valid_df, p_test_df, p_valid_real_df, p_test_real_df]
    preds_list = [p_valid, p_test, p_valid_real, p_test_real]
    
    for df, preds in zip(df_list, preds_list):
        dummy = [x[:,0] for x in preds[:100]] #p_valid[:jitters][:,0]
        for i in range(len(dummy)):
            df['etc'+str(i)+"-0"] = dummy[i]
        
        dummy = [x[:,1] for x in preds[:100]]
        for i in range(len(dummy)):
            df['etc'+str(i)+'-1'] = dummy[i]
            
        for i in range(len(preds[100:])):
            df['xgb'+str(i)+'-0'] = preds[100:][i]
            
        for i in range(len(preds[100:])):
            df['xgb'+str(i)+'-1'] = preds[100:][i]
            
            
    
    #p_valid_df["etc-1"] = [x[:,1] for x in p_valid[:100]] #p_valid[:jitters][:,1]
    #p_valid_df["xgb-1"] = p_valid[jitters:]
    #p_valid_df["xgb-0"] = 1-p_valid_df["xgb-1"]

    #p_test_df["etc-0"] = [x[:,0] for x in p_test[:100]] #p_test[:jitters][:,0]
    #p_test_df["etc-1"] = [x[:,0] for x in p_test[:100]] #p_test[:jitters][:,1]
    #p_test_df["xgb-1"] = p_test[jitters:]
    #p_test_df["xgb-0"] = 1-p_test_df["xgb-1"]

    #p_valid_real_df["etc-0"] = [x[:,0] for x in p_valid_real[:100]] #p_valid_real[:jitters][:,0]
    #p_valid_real_df["etc-1"] = [x[:,0] for x in p_valid_real[:100]] #p_valid_real[:jitters][:,1]
    #p_valid_real_df["xgb-1"] = p_valid_real[jitters:]
    #p_valid_real_df["xgb-0"] = 1-p_valid_real_df["xgb-1"]

    #p_test_real_df["etc-0"] = [x[:,0] for x in p_test_real[:100]] #p_test_real[:jitters][:,0]
    #p_test_real_df["etc-1"] = [x[:,0] for x in p_test_real[:100]] #p_test_real[:jitters][:,1]
    #p_test_real_df["xgb-1"] = p_test_real[jitters:]
    #p_test_real_df["xgb-0"] = 1-p_test_real_df["xgb-1"]

    cols = p_valid_df.columns
    cols = cols[:2] | cols[3:] | cols[2:3]
    p_valid_df = p_valid_df[cols]
    p_test_df = p_test_df[cols]
    p_valid_real_df = p_valid_real_df[cols]
    p_test_real_df = p_test_real_df[cols]

    
    
    print('Performance of optimization based ensemblers (2nd layer) on X_test')   
    print('------------------------------------------------------------')
    
    #Creating the data for the 2nd layer.
    #XV = np.hstack(p_valid)
    #XT = np.hstack(p_test)  

    XV = p_valid_df.as_matrix()
    XT = p_test_df.as_matrix()
    
    n_classes = 2

    #EN_optA
    enA = EN_optA(n_classes)
    enA.fit(XV, y_valid)
    w_enA = enA.w
    y_enA = enA.predict_proba(XT)
    print('{:20s} {:2s} {:1.7f}'.format('EN_optA:', 'logloss  =>', log_loss(y_test, y_enA)))
    
    #Calibrated version of EN_optA 
    cc_optA = CalibratedClassifierCV(enA, method='isotonic')
    cc_optA.fit(XV, y_valid)
    y_ccA = cc_optA.predict_proba(XT)
    print('{:20s} {:2s} {:1.7f}'.format('Calibrated_EN_optA:', 'logloss  =>', log_loss(y_test, y_ccA)))
        
    #EN_optB
    enB = EN_optB(n_classes) 
    enB.fit(XV, y_valid)
    w_enB = enB.w
    y_enB = enB.predict_proba(XT)
    print('{:20s} {:2s} {:1.7f}'.format('EN_optB:', 'logloss  =>', log_loss(y_test, y_enB)))

    #Calibrated version of EN_optB
    cc_optB = CalibratedClassifierCV(enB, method='isotonic')
    cc_optB.fit(XV, y_valid)
    y_ccB = cc_optB.predict_proba(XT)  
    print('{:20s} {:2s} {:1.7f}'.format('Calibrated_EN_optB:', 'logloss  =>', log_loss(y_test, y_ccB)))
    print('')


    print('REAL: Performance of optimization based ensemblers (2nd layer) on X_test')   
    print('------------------------------------------------------------')
    
    #Creating the data for the 2nd layer.
    #XV_real = np.hstack(p_valid_real)
    #XT_real = np.hstack(p_test_real)  
    
    XV_real = p_valid_real_df.as_matrix()
    XT_real = p_test_real_df.as_matrix()
    
    n_classes = 2

    #EN_optA
    enA_real = EN_optA(n_classes)
    enA_real.fit(XV_real, y_valid_real)
    w_enA_real = enA_real.w
    y_enA_real = enA_real.predict_proba(XT_real)
    #print('{:20s} {:2s} {:1.7f}'.format('EN_optA:', 'logloss  =>', log_loss(y_test, y_enA)))
    
    #Calibrated version of EN_optA 
    cc_optA_real = CalibratedClassifierCV(enA_real, method='isotonic')
    cc_optA_real.fit(XV_real, y_valid_real)
    y_ccA_real = cc_optA_real.predict_proba(XT_real)
    #print('{:20s} {:2s} {:1.7f}'.format('Calibrated_EN_optA:', 'logloss  =>', log_loss(y_test, y_ccA)))
        
    #EN_optB
    enB_real = EN_optB(n_classes) 
    enB_real.fit(XV_real, y_valid_real)
    w_enB_real = enB_real.w
    y_enB_real = enB_real.predict_proba(XT_real)
    #print('{:20s} {:2s} {:1.7f}'.format('EN_optB:', 'logloss  =>', log_loss(y_test, y_enB)))

    #Calibrated version of EN_optB
    cc_optB_real = CalibratedClassifierCV(enB_real, method='isotonic')
    cc_optB_real.fit(XV_real, y_valid_real)
    y_ccB_real = cc_optB_real.predict_proba(XT_real)  
    #print('{:20s} {:2s} {:1.7f}'.format('Calibrated_EN_optB:', 'logloss  =>', log_loss(y_test, y_ccB)))
    #print('')
    
    #ummm why am i optimizing on y_valid_real??? why not y_real?????
    
    
    
    
    #optimize weighting of the 3rd level - keep the same weighting for real data
    best_score = 10.0

    for i in range(10000):
        first = random.randint(0,20)
        second = random.randint(0,20)
        third = random.randint(0,20)
        fourth = random.randint(0,20)
        total = first + second + third + fourth
        first = first / (total * 1.0)
        second = second / (total * 1.0)
        third = third / (total * 1.0)
        fourth = fourth / (total * 1.0)
    
        y_3l = (y_enA * first) + (y_ccA * second) + (y_enB * third) + (y_ccB * fourth)
        current_score = log_loss(y_test, y_3l)
    
        if current_score < best_score:
            print('{:20s} {:2s} {:1.7f}'.format('3rd_layer:', 'logloss  =>', log_loss(y_test, y_3l)))
            #print first, second, third, fourth
            best_score = current_score
            best_first = first
            best_second = second
            best_third = third
            best_fourth = fourth
            
            
    preds = (best_first * y_enA_real) + (best_second * y_ccA_real) + \
            (best_third * y_enB_real) + (best_fourth * y_ccB_real) 
    return preds

In [24]:
def run_classifiers(train, target, test, clfs, jitters):
    #predictions on the validation and test sets
    p_valid = []
    p_test = []

    p_valid_real = []
    p_test_real = []
  
    holder = []

    
    #Spliting data into train and test sets.
    X, X_test, y, y_test = train_test_split(train, target, test_size=0.33, random_state=42)
    
    #Spliting train data into training and validation sets.
    X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.33, random_state=42)

    #print('Data shape:')
    #print('X_train: %s, X_valid: %s, X_test: %s \n' %(X_train.shape, X_valid.shape, 
    #                                          X_test.shape))

    if for_real:
        #take the train, target and test data, and come up with a validation set from train
        X_real = train
        test['labels'] = -1
        y_test_real = test['labels']
        test = test.drop('labels', axis=1)
        X_test_real = test

        y_real = target
    
        X_train_real, X_valid_real, y_train_real, y_valid_real = train_test_split(X_real, y_real, test_size=0.33,
                                                                                 random_state=42)
        
        
    print('Performance of individual classifiers (1st layer) on X_test')   
    print('------------------------------------------------------------')
   
    for nm, clf in clfs.items():
        for i in xrange(jitters):
            dummy = random.randint(1,10000)
            x = True
            while x == True:
                if dummy in holder:
                    dummy = random.randint(1,10000)
                else:
                    x = False
            holder.append(dummy)
    
            random.seed(dummy)
        

            if nm == 'NN':
                #First run. Training on (X_train, y_train) and predicting on X_valid.
                clf.fit(X_train.as_matrix(), y_train)
                yv = clf.predict_proba(X_valid.as_matrix())
                p_valid.append(yv)
        
                #Second run. Training on (X, y) and predicting on X_test.
                clf.fit(X.as_matrix(), y)
                yt = clf.predict_proba(X_test.as_matrix())
                p_test.append(yt)
        
                if for_real:
                    #First run. Training on (X_train, y_train) and predicting on X_valid.
                    clf.fit(X_train_real.as_matrix(), y_train_real)
                    yv_real = clf.predict_proba(X_valid_real.as_matrix())
                    p_valid_real.append(yv_real)
            
                    #Second run. Training on (X, y) and predicting on X_test.
                    clf.fit(X_real.as_matrix(), y_real)
                    yt_real = clf.predict_proba(X_test_real.as_matrix())
                    p_test_real.append(yt_real)
    
            elif nm == 'XGBc':
                #think about making a function that handles this part
                xgtrain = xgb.DMatrix(X_train, y_train)
                xgtest = xgb.DMatrix(X_valid, label=y_valid)
                params = {"objective":"binary:logistic",
                          "colsample_bytree":0.77638333498678636,
                          "learning_rate":0.030567867858705199,
                          "max_delta_step":4.6626180513766657,
                          "min_child_weight":57.354121041109124,
                          "n_estimators":478,
                          "subsample":0.8069399976204783,
                          "max_depth":6,
                          "gamma":0.2966938071810209,
                          "eval_metric":"logloss"}
                #can't use predict proba, so i'll need to generate the alternative scores for p_valid, p_test
                model = xgb.train(params, xgtrain, 500) 
                yv = model.predict(xgtest, ntree_limit=model.best_iteration)
                p_valid.append(yv)
        
                xgtrain = xgb.DMatrix(X, y)
                xgtest = xgb.DMatrix(X_test, label=y_test)
                model = xgb.train(params, xgtrain, 500)
                yt = model.predict(xgtest, ntree_limit=model.best_iteration)
                p_test.append(yt)
        
                if for_real:
                    xgtrain = xgb.DMatrix(X_train_real, y_train_real)
                    xgtest = xgb.DMatrix(X_valid_real, label=y_valid_real)
                    model = xgb.train(params, xgtrain, 500)
                    yv_real = model.predict(xgtest, ntree_limit=model.best_iteration)
                    p_valid_real.append(yv_real)
                
                    xgtrain = xgb.DMatrix(X_real, y_real)
                    xgtest = xgb.DMatrix(X_test_real, label=y_test_real)
                    model = xgb.train(params, xgtrain, 500)
                    yt_real = model.predict(xgtest, ntree_limit=model.best_iteration)
                    p_test_real.append(yt_real)
        
            else:
                #First run. Training on (X_train, y_train) and predicting on X_valid.
                clf.fit(X_train, y_train)
                yv = clf.predict_proba(X_valid)
                p_valid.append(yv)
        
                #Second run. Training on (X, y) and predicting on X_test.
                clf.fit(X, y)
                yt = clf.predict_proba(X_test)
                p_test.append(yt)
        
                if for_real:
                    #First run. Training on (X_train, y_train) and predicting on X_valid.
                    clf.fit(X_train_real, y_train_real)
                    yv_real = clf.predict_proba(X_valid_real)
                    p_valid_real.append(yv_real)
        
                    #Second run. Training on (X, y) and predicting on X_test.
                    clf.fit(X_real, y_real)
                    yt_real = clf.predict_proba(X_test_real)
                    p_test_real.append(yt_real)
       
            #Printing out the performance of the classifier
            print('{:10s} {:2s} {:1.7f}'.format('%s: ' %(nm), 'logloss  =>', log_loss(y_test, yt)))
            print('MEAN cv score : ' + str(np.mean(cross_val_score(clf, X_test, y_test, scoring='log_loss', cv=10))))
    print('')
    
    return p_valid, p_test, y_valid, y_test, p_valid_real, p_test_real, y_valid_real

In [21]:
def objf_ens_optA(w, Xs, y, n_class):
    """
    Function to be minimized in the EN_optA ensembler.
    
    Parameters:
    ----------
    w: array-like, shape=(n_preds)
       Candidate solution to the optimization problem (vector of weights).
    Xs: list of predictions to combine
       Each prediction is the solution of an individual classifier and has a
       shape=(n_samples, n_classes).
    y: array-like sahpe=(n_samples,)
       Class labels
    n_class: int
       Number of classes in the problem (12 in Airbnb competition)
    
    Return:
    ------
    score: Score of the candidate solution.
    """
    w = np.abs(w)
    sol = np.zeros(Xs[0].shape)
    for i in range(len(w)):
        sol += Xs[i] * w[i]
    #Using log-loss as objective function (different objective functions can be used here). 
    score = log_loss(y, sol)   
    return score
        

class EN_optA(BaseEstimator):
    """
    Given a set of predictions $X_1, X_2, ..., X_n$,  it computes the optimal set of weights
    $w_1, w_2, ..., w_n$; such that minimizes $log\_loss(y_T, y_E)$, 
    where $y_E = X_1*w_1 + X_2*w_2 +...+ X_n*w_n$ and $y_T$ is the true solution.
    """
    def __init__(self, n_class):
        super(EN_optA, self).__init__()
        self.n_class = n_class
        
    def fit(self, X, y):
        """
        Learn the optimal weights by solving an optimization problem.
        
        Parameters:
        ----------
        Xs: list of predictions to be ensembled
           Each prediction is the solution of an individual classifier and has 
           shape=(n_samples, n_classes).
        y: array-like
           Class labels
        """
        #print X.shape[1], self.n_class
        
        Xs = np.hsplit(X, X.shape[1]/self.n_class)
        #Initial solution has equal weight for all individual predictions.
        x0 = np.ones(len(Xs)) / float(len(Xs)) 
        #Weights must be bounded in [0, 1]
        bounds = [(0,1)]*len(x0)   
        #All weights must sum to 1
        cons = ({'type':'eq','fun':lambda w: 1-sum(w)})
        #Calling the solver
        res = minimize(objf_ens_optA, x0, args=(Xs, y, self.n_class), 
                       method='SLSQP', 
                       bounds=bounds,
                       constraints=cons
                       )
        self.w = res.x
        return self
    
    def predict_proba(self, X):
        """
        Use the weights learned in training to predict class probabilities.
        
        Parameters:
        ----------
        Xs: list of predictions to be blended.
            Each prediction is the solution of an individual classifier and has 
            shape=(n_samples, n_classes).
            
        Return:
        ------
        y_pred: array_like, shape=(n_samples, n_class)
                The blended prediction.
        """
        Xs = np.hsplit(X, X.shape[1]/self.n_class)
        y_pred = np.zeros(Xs[0].shape)
        for i in range(len(self.w)):
            y_pred += Xs[i] * self.w[i] 
        return y_pred

In [22]:
def objf_ens_optB(w, Xs, y, n_class):
    """
    Function to be minimized in the EN_optB ensembler.
    
    Parameters:
    ----------
    w: array-like, shape=(n_preds)
       Candidate solution to the optimization problem (vector of weights).
    Xs: list of predictions to combine
       Each prediction is the solution of an individual classifier and has a
       shape=(n_samples, n_classes).
    y: array-like sahpe=(n_samples,)
       Class labels
    n_class: int
       Number of classes in the problem, i.e. = 12
    
    Return:
    ------
    score: Score of the candidate solution.
    """
    #Constraining the weights for each class to sum up to 1.
    #This constraint can be defined in the scipy.minimize function, but doing 
    #it here gives more flexibility to the scipy.minimize function 
    #(e.g. more solvers are allowed).
    w_range = np.arange(len(w))%n_class 
    for i in range(n_class): 
        w[w_range==i] = w[w_range==i] / np.sum(w[w_range==i])
        
    sol = np.zeros(Xs[0].shape)
    for i in range(len(w)):
        sol[:, i % n_class] += Xs[int(i / n_class)][:, i % n_class] * w[i] 
        
    #Using log-loss as objective function (different objective functions can be used here). 
    score = log_loss(y, sol)   
    return score
    

class EN_optB(BaseEstimator):
    """
    Given a set of predictions $X_1, X_2, ..., X_n$, where each $X_i$ has
    $m=12$ clases, i.e. $X_i = X_{i1}, X_{i2},...,X_{im}$. The algorithm finds the optimal 
    set of weights $w_{11}, w_{12}, ..., w_{nm}$; such that minimizes 
    $log\_loss(y_T, y_E)$, where $y_E = X_{11}*w_{11} +... + X_{21}*w_{21} + ... 
    + X_{nm}*w_{nm}$ and and $y_T$ is the true solution.
    """
    def __init__(self, n_class):
        super(EN_optB, self).__init__()
        self.n_class = n_class
        
    def fit(self, X, y):
        """
        Learn the optimal weights by solving an optimization problem.
        
        Parameters:
        ----------
        Xs: list of predictions to be ensembled
           Each prediction is the solution of an individual classifier and has 
           shape=(n_samples, n_classes).
        y: array-like
           Class labels
        """
        #print X.shape[1], self.n_class
        
        Xs = np.hsplit(X, X.shape[1]/self.n_class)
        #Initial solution has equal weight for all individual predictions.
        x0 = np.ones(self.n_class * len(Xs)) / float(len(Xs)) 
        #Weights must be bounded in [0, 1]
        bounds = [(0,1)]*len(x0)   
        #Calling the solver (constraints are directly defined in the objective
        #function)
        res = minimize(objf_ens_optB, x0, args=(Xs, y, self.n_class), 
                       method='L-BFGS-B', 
                       bounds=bounds, 
                       )
        self.w = res.x
        return self
    
    def predict_proba(self, X):
        """
        Use the weights learned in training to predict class probabilities.
        
        Parameters:
        ----------
        Xs: list of predictions to be ensembled
            Each prediction is the solution of an individual classifier and has 
            shape=(n_samples, n_classes).
            
        Return:
        ------
        y_pred: array_like, shape=(n_samples, n_class)
                The ensembled prediction.
        """
        
        Xs = np.hsplit(X, X.shape[1]/self.n_class)
        y_pred = np.zeros(Xs[0].shape)
        for i in range(len(self.w)):
            y_pred[:, i % self.n_class] += \
                   Xs[int(i / self.n_class)][:, i % self.n_class] * self.w[i]  
        return y_pred