# Blend Predictions through Optimized Weighted Averaging

In [1]:
import os
import glob
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt;

%matplotlib inline

from tqdm import tqdm_notebook as tqdm
from sklearn.metrics import roc_auc_score
from sklearn.linear_model import LogisticRegression

from scipy.stats.mstats import gmean, hmean
from sklearn.preprocessing import minmax_scale

from scipy.optimize import minimize

from tqdm import tqdm

In [2]:
def read_predictions(prediction_dir, mode='valid', valid_columns=None, stacking_mode='flat'):
    valid_labels = pd.read_csv(os.path.join(prediction_dir, 'valid_split.csv'))
    sample_submission = pd.read_csv(os.path.join(prediction_dir, 'sample_submission.csv'))
    
    predictions = []
    filenames = []
    
    for filepath in sorted(glob.glob('{}/{}/*'.format(prediction_dir, mode))):
        prediction_single = pd.read_csv(filepath)
        prediction_single.drop('id', axis=1, inplace=True)
        predictions.append(prediction_single)
        filenames.append(filepath.split("\\")[-1])

    return predictions, sample_submission, filenames

In [3]:
LABEL_COLUMNS = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']

SINGLE_DIR = 'single_model_predictions'
SAMPLE_SUBMISSION_PATH = 'single_model_predictions/sample_submission.csv'

In [4]:
valid_split  = pd.read_csv('single_model_predictions/valid_split.csv').drop('comment_text', axis=1)
valid_actual = pd.read_csv('single_model_predictions/valid_split.csv').drop('comment_text', axis=1)

valid_predictions, _, valid_names = read_predictions(SINGLE_DIR, mode='valid')

In [5]:
pred_scores = pd.DataFrame(columns=['name', 'score', 'stddev'])

for prediction, name in zip(valid_predictions, valid_names):
    scores = []

    for label in LABEL_COLUMNS:
        score = roc_auc_score(valid_actual[label], prediction[label])
        #print(label + ' score = ' + str(score))
        scores.append(score)
    
    pred_scores = pred_scores.append({'name':name, 'score':np.mean(scores), 'stddev':np.std(scores)}, ignore_index=True)

In [6]:
pred_scores.sort_values('score', ascending=False)

Unnamed: 0,name,score,stddev
4,18_03_11_LSTM_Valid.csv,0.990253,0.004114
2,18_03_11_DPCNN_SCNN_GRU_Valid.csv,0.99014,0.003719
3,18_03_11_FastTextGRU_Valid.csv,0.990036,0.003918
5,18_03_17_Pavel_Valid.csv,0.989998,0.003
6,Wordbatch_Merged_VALID.csv,0.987584,0.004614
0,18_02_16_BagOfWords_TFIDF_LogisticRegression_V...,0.985529,0.004436
1,18_02_18_pooledgru_valid.csv,0.984768,0.003736
8,lvl0_lgbm_clean_VALID.csv,0.983589,0.00697
7,char_vdcnn_valid.csv,0.973076,0.012755


# Blend Validation Predictions

In [13]:
LABEL_COLUMNS

['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']

In [7]:
def objective(x):
    for label in LABEL_COLUMNS:
        valid_split[label] = np.average([prediction[label].values for prediction in valid_predictions], axis=0, weights=x)    
    
    scores = []

    for label in LABEL_COLUMNS:
        score = roc_auc_score(valid_actual[label], valid_split[label])
        scores.append(score)
        
    return(-np.mean(scores))

In [8]:
num_times = 2000;
coef_dir = 1;

best_score = 0;
best_variables = [];

for x in tqdm(range(num_times)):
    
    res = minimize(objective,
    #               x0 = [1/len(valid_predictions) for x in range(len(valid_predictions))],
                   x0 = np.random.dirichlet(np.ones((len(valid_predictions)))*coef_dir, size=1)[0],
                   bounds = [(0, 1) for x in range(len(valid_predictions))],
                   constraints = {'type': 'eq', 
                                  'fun': lambda x: (sum(x)-1)}, 
                   method='SLSQP')
    
    if best_score < -res.fun:
        best_score = -res.fun
        best_variables = res.x

print(best_score)
print(best_variables)

100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [11:34<00:00,  2.88it/s]


0.992175196836278
[0.08792799 0.0310167  0.0315641  0.02459024 0.28716949 0.21458821
 0.20837474 0.02844955 0.08631898]


In [15]:
the_best_score     = best_score
the_best_variables = best_variables

print(the_best_score)
print(the_best_variables)

0.992175196836278
[0.08792799 0.0310167  0.0315641  0.02459024 0.28716949 0.21458821
 0.20837474 0.02844955 0.08631898]


# Blend Test Predictions

In [16]:
predictions, sample_submission, _ = read_predictions(SINGLE_DIR, mode='test')

In [17]:
for label in LABEL_COLUMNS:
    sample_submission[label] = np.average([prediction[label].values for prediction in predictions], axis=0, weights=the_best_variables)

In [18]:
sample_submission.head()

Unnamed: 0,id,toxic,severe_toxic,obscene,threat,insult,identity_hate
0,00001cee341fdb12,0.945151,0.465507,0.925977,0.216399,0.893929,0.458318
1,0000247867823ef7,0.106829,0.10517,0.103594,0.104166,0.105452,0.105509
2,00013b17ad220c46,0.108799,0.104087,0.106297,0.104646,0.103966,0.104882
3,00017563c3f7919a,0.101106,0.103449,0.102196,0.104258,0.10242,0.103436
4,00017695ad8997eb,0.110721,0.10396,0.105147,0.104102,0.105305,0.105214


In [19]:
ENSEMBLE_SUBMISSION_PATH = 'submissions/18_03_19_OptimizedWeightedAverage_Updated.csv'

sample_submission.to_csv(ENSEMBLE_SUBMISSION_PATH, index=None)