In [1]:
import os
import pandas as pd
import numpy as np
import gc

import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use("fivethirtyeight")
sns.set_style("darkgrid")

In [2]:
DATA_DIR = '../dataset/raw'
PROCESSED_DIR = '../dataset/processed'
SUBMISSION_DIR = '../submission'

In [3]:
sub1 = pd.read_csv(os.path.join(PROCESSED_DIR, 'output (1).csv'), converters={"predicted_list": eval})
sub2 = pd.read_csv(os.path.join(PROCESSED_DIR, 'output (2).csv'), converters={"predicted_list": eval})

In [4]:
sub1.head()

Unnamed: 0,profile_id,predicted_list
0,3,"[20, 21, 15, 25, 31, 30, 29, 28, 27, 26, 24, 1..."
1,5,"[128, 74, 75, 84, 317, 73, 86, 72, 90, 15, 134..."
2,7,"[237, 343, 255, 0, 125, 124, 65, 241, 127, 190..."
3,12,"[124, 16, 19, 65, 126, 125, 18, 424, 190, 339,..."
4,16,"[38, 39, 225, 136, 230, 40, 41, 42, 43, 44, 45..."


In [5]:
sub2.head()

Unnamed: 0,profile_id,predicted_list
0,3,"[17, 16, 19, 18, 347, 124, 55, 224, 21, 23, 30..."
1,5,"[38, 39, 15, 42, 75, 78, 76, 80, 136, 19, 128,..."
2,7,"[343, 237, 255, 241, 347, 125, 124, 0, 339, 18..."
3,12,"[356, 357, 352, 353, 355, 3663, 2174, 426, 366..."
4,16,"[65, 124, 125, 339, 190, 2029, 127, 241, 50, 1..."


In [8]:
ensemble = pd.DataFrame(sub1.profile_id)
ensemble['prediction1'] = sub1['predicted_list']
ensemble['prediction2'] = sub2['predicted_list']

In [9]:
ensemble.head()

Unnamed: 0,profile_id,prediction1,prediction2
0,3,"[20, 21, 15, 25, 31, 30, 29, 28, 27, 26, 24, 1...","[17, 16, 19, 18, 347, 124, 55, 224, 21, 23, 30..."
1,5,"[128, 74, 75, 84, 317, 73, 86, 72, 90, 15, 134...","[38, 39, 15, 42, 75, 78, 76, 80, 136, 19, 128,..."
2,7,"[237, 343, 255, 0, 125, 124, 65, 241, 127, 190...","[343, 237, 255, 241, 347, 125, 124, 0, 339, 18..."
3,12,"[124, 16, 19, 65, 126, 125, 18, 424, 190, 339,...","[356, 357, 352, 353, 355, 3663, 2174, 426, 366..."
4,16,"[38, 39, 225, 136, 230, 40, 41, 42, 43, 44, 45...","[65, 124, 125, 339, 190, 2029, 127, 241, 50, 1..."


In [14]:
def cust_blend(dt, W = [1,1,1]):
    #Global ensemble weights
    #W = [1.15,0.95,0.85]
    
    #Create a list of all model predictions
    REC = []
    REC.append(dt['prediction1'])
    REC.append(dt['prediction2'])
    
    #Create a dictionary of items recommended. 
    #Assign a weight according the order of appearance and multiply by global weights
    res = {}
    for M in range(len(REC)):
        for n, v in enumerate(REC[M]):
            if v in res:
                res[v] += (W[M]/(n+1))
            else:
                res[v] = (W[M]/(n+1))
    
    # Sort dictionary by item weights
    res = list(dict(sorted(res.items(), key=lambda item: -item[1])).keys())
    
    # Return the top 12 itens only
    return res[:25]

In [15]:
ensemble['predicted_list'] = ensemble.apply(cust_blend, W = [1.15, 1.00], axis=1)

In [16]:
ensemble.head()

Unnamed: 0,profile_id,prediction1,prediction2,predicted_list
0,3,"[20, 21, 15, 25, 31, 30, 29, 28, 27, 26, 24, 1...","[17, 16, 19, 18, 347, 124, 55, 224, 21, 23, 30...","[20, 17, 21, 16, 19, 15, 25, 18, 31, 30, 124, ..."
1,5,"[128, 74, 75, 84, 317, 73, 86, 72, 90, 15, 134...","[38, 39, 15, 42, 75, 78, 76, 80, 136, 19, 128,...","[128, 38, 75, 74, 39, 15, 84, 73, 42, 317, 136..."
2,7,"[237, 343, 255, 0, 125, 124, 65, 241, 127, 190...","[343, 237, 255, 241, 347, 125, 124, 0, 339, 18...","[237, 343, 255, 0, 125, 241, 124, 347, 65, 339..."
3,12,"[124, 16, 19, 65, 126, 125, 18, 424, 190, 339,...","[356, 357, 352, 353, 355, 3663, 2174, 426, 366...","[124, 356, 16, 357, 19, 352, 65, 353, 126, 355..."
4,16,"[38, 39, 225, 136, 230, 40, 41, 42, 43, 44, 45...","[65, 124, 125, 339, 190, 2029, 127, 241, 50, 1...","[38, 65, 39, 124, 225, 125, 136, 339, 230, 190..."


In [17]:
(ensemble[['profile_id', 'predicted_list']]
 .to_csv(os.path.join(SUBMISSION_DIR, 'final.csv'), index=False, encoding='utf-8-sig')
)