In [None]:
# Import Required packages
import os
import numpy as np
import pandas as pd
import gc
from datetime import datetime, timedelta


In [None]:
# Input from all best performing models
sub1 = pd.read_csv('submission_ltr.csv').sort_values('customer_id').reset_index(drop=True)
sub2 = pd.read_csv('submission_age.csv').sort_values('customer_id').reset_index(drop=True)
sub3 = pd.read_csv('submission_trending.csv').sort_values('customer_id').reset_index(drop=True)
sub4 = pd.read_csv('submission_regbole.csv').sort_values('customer_id').reset_index(drop=True)

In [None]:
# Merge all submissions
sub1.columns = ['customer_id', 'prediction1']
sub1['prediction2'] = sub2['prediction']
sub1['prediction3'] = sub3['prediction']
sub1['prediction4'] = sub4['prediction']
sub1.head()

In [None]:
# Save for later
sub_copy = sub1.copy()

In [None]:
# Creating Recommendation based on weights
def cust_blend(dt, W):
#     Agenda: Create recommendations based on the weights provided by user
    
    REC = []
    REC.append(dt['prediction1'].split())
    REC.append(dt['prediction2'].split())
    REC.append(dt['prediction3'].split())
    REC.append(dt['prediction4'].split())
    

    #Assign a weight according the order of appearance
    res = {}
    for M in range(len(REC)):
        for n, v in enumerate(REC[M]):
            if v in res:
                res[v] += (W[M]/(n+1))
            else:
                res[v] = (W[M]/(n+1))

    # Sort recommendations based on item weights
    res = list(dict(sorted(res.items(), key=lambda item: -item[1])).keys())

    # Returning top 12
    return ' '.join(res[:12])

### MAP


In [None]:
# Functions to perform MAP check using the validation dataset
def apk(actual, predicted, k=12):
    if len(predicted)>k:
        predicted = predicted[:k]

    score = 0.0
    num_hits = 0.0

    for i,p in enumerate(predicted):
        if p in actual and p not in predicted[:i]:
            num_hits += 1.0
            score += num_hits / (i+1.0)

    if not actual:
        return 0.0

    return score / min(len(actual), k)

def mapk(actual, predicted, k=12):
    return np.mean([apk(a,p,k) for a,p in zip(actual, predicted)])

#### Creating Validation dataset

In [None]:
# Input - Transactions
transactions = pd.read_csv('transactions_train.csv',
                          usecols= ['t_dat', 'customer_id', 'article_id'], dtype={'article_id': 'int32'})



In [None]:
# input customers
customers = pd.read_csv('customers.csv',
                        usecols=['customer_id'])

In [None]:
# Type Conversions
transactions['t_dat'] = pd.to_datetime(transactions['t_dat'])

# Splitting
valid_df = transactions.loc[transactions.t_dat >= '2020-09-16']

# Sorting
valid_df = valid_df.sort_values(["customer_id", "t_dat"], ascending=False)


In [None]:
# Formatting
valid_df = valid_df.sort_values(['customer_id', 't_dat'], ascending = [True, True]) 
valid_cust = valid_df.groupby('customer_id')['article_id'].apply(list).reset_index()
valid_cust['valid_true'] = valid_cust['article_id'].map(lambda x: '0'+' 0'.join(str(x)[1:-1].split(', ')))


In [None]:
# Run the MAP check using different weights and save scores
df_score = pd.DataFrame()
k = 12 #recommendations

for i in range(100):
    
    print(i)
    W1 = round(np.random.uniform(0.00, 1.90), 2)
    W2 = round(np.random.uniform(1.90, 2.50), 2)
    W3 = round(np.random.uniform(0.00, 1.90), 2)
    W4 = round(np.random.uniform(0.00, 1.90), 2)
    
    sub1 = sub_copy.copy()
    sub1['prediction'] = sub1.apply(cust_blend, W = [W1, W2, W3, W4], axis=1)
    
    del sub1['prediction1']
    del sub1['prediction2']
    del sub1['prediction3']
    del sub1['prediction4']
    
    submission = valid_cust.merge(sub1, on ='customer_id', how ='left')
    
    score = mapk(
    submission['valid_true'].map(lambda x: x.split()), 
    submission['prediction'].map(lambda x: x.split()), 
    k = k ) 
    
    d = {"weight" : [W1, W2, W3, W4], "score" : score}
    df_score = df_score.append(d, ignore_index = True)
    
    if i % 15 == 0:
        df_score.to_csv("scores.csv", index=False)
#         print(df_score)
#     df_score.to_csv("scores.csv", index=False)
    

In [None]:
df_score.to_csv("scores.csv", index=False)

In [None]:
# Predict recommendations for optimal weights - Entire dataset
sub1 = sub_copy.copy()
sub1['prediction'] = sub1.apply(cust_blend, W = [0.24, 0.86, 1.01, 0.74], axis=1)

del sub1['prediction1']
del sub1['prediction2']
del sub1['prediction3']
del sub1['prediction4']

In [None]:
# Output
sub1.to_csv("submission.csv", index = False)