In [None]:
import numpy as np
import pandas as pd

from pathlib import Path
from tqdm import tqdm

In [None]:
data_path = Path('../input/h-and-m-personalized-fashion-recommendations/')
N = 12

In [None]:
df = pd.read_csv(data_path / 'transactions_train.csv',
                 usecols = ['t_dat', 'customer_id', 'article_id'],
                 dtype={'article_id': str})

df['t_dat'] = pd.to_datetime(df['t_dat'])
last_ts = df['t_dat'].max()

In [None]:
sales = df.drop('customer_id', axis=1).groupby('article_id').count()
general_pred = sales['t_dat'].nlargest(N).index.tolist()

In [None]:
purchase_dict = {}

for i in tqdm(df.index):
    cust_id = df.at[i, 'customer_id']
    art_id = df.at[i, 'article_id']
    t_dat = df.at[i, 't_dat']

    if cust_id not in purchase_dict:
        purchase_dict[cust_id] = {}

    if art_id not in purchase_dict[cust_id]:
        purchase_dict[cust_id][art_id] = 0

    x = max(1, (last_ts - t_dat).days)

    a, b, c, d = 2.5e4, 1.5e5, 2e-1, 1e3
    y = a / np.sqrt(x) + b * np.exp(-c*x) - d

    purchase_dict[cust_id][art_id] += max(0, y)

In [None]:
sub = pd.read_csv(data_path / 'sample_submission.csv')

pred_list = []
for cust_id in tqdm(sub['customer_id']):
    if cust_id in purchase_dict:
        series = pd.Series(purchase_dict[cust_id])
        series = series[series > 0]
        l = series.nlargest(N).index.tolist()
        if len(l) < N:
            l = l + general_pred[:(N-len(l))]
    else:
        l = general_pred
    pred_list.append(' '.join(l))

sub['prediction'] = pred_list
sub.to_csv('submission.csv', index=None)