## Smart Blender..

![](http://thumbs.dreamstime.com/z/brain-blender-model-placed-34910730.jpg)

In [97]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Visualization
import seaborn as sns
import matplotlib.pyplot as plt
get_ipython().magic('matplotlib inline')
plt.rcParams['figure.figsize'] = (8, 8)
from sklearn import metrics

import os
print("Shoutout to:")
print([print(x) for x in os.listdir("../input")])

# Scale
from sklearn.preprocessing import minmax_scale

# Dictionary incase I want to add more flavor to this blend..
subs = dict()
subs["gru"] = pd.read_csv('../input/pooled-gru-fasttext/submission.csv',index_col="id") # PL score 0.9829
subs["lstm_nb_svm"] = pd.read_csv('../input/minimal-lstm-nb-svm-baseline-ensemble/submission.csv',index_col="id") # 0.9811
subs["lr"] = pd.read_csv('../input/logistic-regression-with-words-and-char-n-grams/submission.csv',index_col="id") # 0.9788
subs["lgb"] = pd.read_csv('../input/lightgbm-with-select-k-best-on-tfidf/lgb_submission.csv',index_col="id") # 0.9785
subs["bigru"] = pd.read_csv("../input/bi-gru-cnn-poolings/submission.csv",index_col="id") # 0.9841
#subs["high_blend"] = pd.read_csv("../input/hight-of-blend-v2/hight_of_blend_v2.csv",index_col="id")
subindex = subs["bigru"].index

OGsubs = [x for x in subs]

# Scores
scores = dict()
scores["gru"] = 0.9829
scores["lstm_nb_svm"] = 0.9811
scores["lr"] = 0.9788
scores["lgb"] = 0.9785
scores["bigru"] = 9841

# Label DF
empty = pd.DataFrame()
classes = {"toxic":empty,"severe_toxic":empty,"obscene":empty,"threat":empty,"insult":empty,"identity_hate":empty}

# Assemble by Label, set model as column name
def assemble():
    for x in classes:
        temp = pd.DataFrame()
        for sub in subs:
            temp[sub] = subs[sub][x]
        classes[x] = temp
# Heatmap
def heatclass():
    f,ax = plt.subplots(3,2,figsize=[12,12])
    row = 0
    col = 0
    for x in classes:
        sns.heatmap(classes[x].corr(),annot=True,cmap="plasma",cbar_kws={'label': 'Correlation Coefficient'},
                    ax=ax[row,col])
        ax[row,col].set_title("{} Correlation ".format(x.capitalize()))
        col += 1
        if col == 2: 
            row += 1
            col = 0
    plt.tight_layout(pad=0)

What am trying to do is take the titanic experimental stacking to another level, where I seek to look at each label and find the most fruitful mix.

To do this, I will look for the submissions with the lowest correlations and stack them. In the future, I could iterate through multiple stacks.

In [98]:
def min_index(x):
    matrix = classes[x].corr()
    mincol = matrix.min().idxmin()
    minrow = matrix[mincol].idxmin()
    return mincol,minrow

# Configure HIGH / LOW
def weighted_method(part1,part2,x,high = .5,low = .5):
    return (((part1)*high) + ((part2)*low))

# Standard_Blend
def minmax_weighted_method(part1,part2,x):
    return (((minmax_scale(part1.values))*high) + ((minmax_scale(part2.values))*low))

def minmaxer(rounds):
    temp = pd.DataFrame(index=subindex)
    for x in classes:
        mincol, minrow = min_index(x)
        temp[x] = minmax_weighted_method(subs[mincol][x],subs[minrow][x],x)
        for n_rounds in list(range(rounds)):
            blend_best = classes[x].corrwith(temp[x]).idxmin()
            temp[x] = minmax_weighted_method(subs[mincol][x],subs[blend_best][x],x)
    return temp

def weighted_func(rounds):
    temp = pd.DataFrame(index=subindex)
    for x in classes:
        matrix = classes[x].corr()
        mincol = matrix.min().idxmin()
        minrow = matrix[mincol].idxmin()
        temp[x] = weighted_method(subs[mincol][x],subs[minrow][x],x)
        for n_rounds in list(range(rounds)):
            blend_best = classes[x].corrwith(temp[x]).idxmin()
            temp[x] = weighted_method(subs[mincol][x],subs[blend_best][x],x)
    return temp

In [99]:
assemble()
heatclass()

In [100]:
subs['weighted_blend'] = weighted_func(rounds=2)
subs['min_max'] = minmaxer(rounds=2)

## Descriptives..

In [101]:
# Gather Descriptive Statistics for Stacking
def manual_stack(data):
    df = pd.DataFrame()
    df['max'] = data.max(axis=1) # axis = By Row
    df['min'] = data.min(axis=1)
    df['mean'] = data.mean(axis=1)
    df['median'] = data.median(axis=1)
    #df.index = data.index
    return df

### Inner Band Blending

In [102]:
base = "bigru"
def slicer(base, up, low):
    temp = pd.DataFrame(index=subindex)
    for x in classes:
        blend1 = classes[x].corrwith(classes[x][base]).idxmin()
        b1 =  classes[x][base].copy()
        b2 = classes[x][blend1].copy()
        target_index = b1.loc[(b1 > low)&(b1 < up)].index
        b1[target_index] = ((b1[target_index]*.2) + (b2[target_index]*.8))
        temp[x] = b1.copy()
    return temp
for (up,low) in [(.9,.1) #.9839
                 ,(.85,.15) #.9839
                 ,(.8,.2) #.9840
                 ,(.7,.3) # .9840
                ]:
    subs["slicer"+str(up)+str(low)] = slicer(base='bigru',up=up,low=low)#.shape

In [105]:
base = "bigru"
def norm_slicer(base, up, low):
    temp = pd.DataFrame(index=subindex)
    for x in classes:
        blend1 = classes[x].corrwith(classes[x][base]).idxmin()
        b1 =  classes[x][base].copy()
        b2 = classes[x][blend1].copy()
        target_index = b1.loc[(b1 > low)&(b1 < up)].index
        b1[target_index] = ((minmax_scale(b1[target_index].values)*.2) + ((minmax_scale(b2[target_index].values)*.8)))
        temp[x] = b1.copy()
    return temp
for (up,low) in [(.6,.4) #
                 ,(.5,.5) #
                 ,(.8,.2) #
                 ,(.7,.3) #
                ]:
    subs["norm_slicer"+str(up)+str(low)] = slicer(base='bigru',up=up,low=low)#.shape

In [106]:
assemble()
heatclass()

In [107]:
for x in [out for out in subs if out not in OGsubs]:
    subs[x].to_csv("{}.csv".format(x))
    print("\n{}".format(x))
    print(subs[x].reset_index(drop=True).head(1))

In [None]:
# subs['weighted_blend'].to_csv("weighted_blend.csv")
# subs['weighted_blend'].head(1)
# subs['min_max'].to_csv("min_max.csv")
# subs['min_max'].head(1)