# creates "atomic files" for recbole

In [None]:
import pandas as pd
import numpy as np
df_item = pd.read_csv(r"/kaggle/input/h-and-m-personalized-fashion-recommendations/articles.csv", dtype={'article_id': 'str'})
df_phrase_embeddings = pd.read_csv('../input/handmarticledescriptionembeddings/phrase_embeddings.csv').drop(columns=['article_id', 'Unnamed: 0'])
df_inter = pd.read_csv(r"/kaggle/input/h-and-m-personalized-fashion-recommendations/transactions_train.csv", 
                 dtype={'article_id': 'str'})

In [None]:
df_item = df_item.drop(columns = ['product_type_name', 'graphical_appearance_name', 'colour_group_name', 'perceived_colour_value_name',
                        'perceived_colour_master_name', 'index_name', 'index_group_name', 'section_name', 
                        'garment_group_name', 'prod_name', 'department_name', 'detail_desc'])

## items + phrase encodings

In [None]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

x = df_phrase_embeddings.values
x_scaled = StandardScaler().fit_transform(x)
x_pca = PCA(n_components=0.5, svd_solver='full').fit_transform(x_scaled)
df_phrase = pd.DataFrame(x_pca)
df_phrase.head()

In [None]:
df_phrase.to_csv('phrase_embeddings_pca.csv')

In [None]:
df_item = pd.concat([df_item, df_phrase], axis=1)
df_item.head(3)

In [None]:
temp = df_item.rename(
    columns={'article_id': 'item_id:token', 'product_code': 'product_code:token', 'product_type_no': 'product_type_no:float',
             'product_group_name': 'product_group_name:token_seq', 'graphical_appearance_no': 'graphical_appearance_no:token', 
             'colour_group_code': 'colour_group_code:token', 'perceived_colour_value_id': 'perceived_colour_value_id:token', 
             'perceived_colour_master_id': 'perceived_colour_master_id:token', 'department_no': 'department_no:token', 
             'index_code': 'index_code:token', 'index_group_no': 'index_group_no:token', 'section_no': 'section_no:token', 
             'garment_group_no': 'garment_group_no:token',
             **{i: f'{i}:float' for i in range(df_phrase.shape[1])}})
temp.head()

In [None]:
!mkdir /kaggle/working/hm_data
temp.to_csv(r'/kaggle/working/hm_data/hm_data.item', index=False, sep='\t')

## interactions

In [None]:
df_inter['t_dat'] = pd.to_datetime(df_inter['t_dat'], format="%Y-%m-%d")
df_inter['timestamp'] = df_inter.t_dat.values.astype(np.int64) // 10 ** 9
df_inter.head()

In [None]:
temp = df_inter[df_inter['timestamp'] > 1585620000][['customer_id', 'article_id', 'timestamp']].rename(
    columns={'customer_id': 'user_id:token', 'article_id': 'item_id:token', 'timestamp': 'timestamp:float'})
temp

In [None]:
temp.to_csv('/kaggle/working/hm_data/hm_data.inter', index=False, sep='\t')

# default rec

In [None]:
import os
import numpy as np
import pandas as pd

In [None]:
sub0 = pd.read_csv('../input/hm-pre-recommendation/submissio_byfone_chris.csv').sort_values('customer_id').reset_index(drop=True)
sub1 = pd.read_csv('../input/hm-pre-recommendation/submission_trending.csv').sort_values('customer_id').reset_index(drop=True)
sub2 = pd.read_csv('../input/hm-pre-recommendation/submission_exponential_decay.csv').sort_values('customer_id').reset_index(drop=True)

sub0.shape, sub1.shape, sub2.shape

In [None]:
sub0.columns = ['customer_id', 'prediction0']
sub0['prediction1'] = sub1['prediction']
sub0['prediction2'] = sub2['prediction']
del sub1, sub2

In [None]:
def cust_blend(dt, W = [1,1,1]):
    #Global ensemble weights
    #W = [1.15,0.95,0.85]
    
    #Create a list of all model predictions
    REC = []
    REC.append(dt['prediction0'].split())
    REC.append(dt['prediction1'].split())
    REC.append(dt['prediction2'].split())
    
    #Create a dictionary of items recommended. 
    #Assign a weight according the order of appearance and multiply by global weights
    res = {}
    for M in range(len(REC)):
        for n, v in enumerate(REC[M]):
            if v in res:
                res[v] += (W[M]/(n+1))
            else:
                res[v] = (W[M]/(n+1))
    
    # Sort dictionary by item weights
    res = list(dict(sorted(res.items(), key=lambda item: -item[1])).keys())
    
    # Return the top 12 itens only
    return ' '.join(res[:12])

sub0['prediction'] = sub0.apply(cust_blend, W = [1.05,1.00,0.95], axis=1)
sub0.head()

In [None]:
del sub0['prediction0']
del sub0['prediction1']
del sub0['prediction2']
sub0.to_csv(f'submission.csv', index=False)