# **DATA DOWNLOAD**

In [None]:
import pandas as pd
import os

fname_tran ='../input/h-and-m-personalized-fashion-recommendations/transactions_train.csv'
fname_cus ='../input/h-and-m-personalized-fashion-recommendations/customers.csv'
fname_article ='../input/h-and-m-personalized-fashion-recommendations/articles.csv'

In [None]:
data = pd.read_csv(fname_tran, sep=',')
data

# **PREPROCESS DATA**

In [None]:
using_cols = ['customer_id', 'article_id', 'price']
data = data[using_cols]
data.head(10)

In [None]:
# same customer buy same items -> increase count value 
data.loc[:,'count'] = 1

In [None]:
data = data.groupby(['customer_id','article_id'], as_index=False).sum()
#because price is subordinated to article

In [None]:
data

In [None]:
user_unique = data['customer_id'].unique()
article_unique = data['article_id'].unique()

In [None]:
user_unique

In [None]:
user_to_idx = {v:k for k,v in enumerate(user_unique)}
article_to_idx = {v:k for k,v in enumerate(article_unique)}

In [None]:
temp_user_data = data['customer_id'].map(user_to_idx.get).dropna()

if len(temp_user_data) == len(data):  
    print('no-null')
    data['customer_id'] = temp_user_data   
else:
    print('detect null')

temp_artist_data = data['article_id'].map(article_to_idx.get).dropna()
if len(temp_artist_data) == len(data):
    print('no-null')
    data['article_id'] = temp_artist_data
else:
    print('detect null')

data

# ALS

In [None]:
from scipy.sparse import csr_matrix

num_user = data['customer_id'].nunique()
num_artist = data['article_id'].nunique()

csr_data = csr_matrix((data['count'], (data.customer_id, data.article_id)), shape= (num_user, num_artist))
csr_data

In [None]:
from implicit.als import AlternatingLeastSquares
import os
import numpy as np

In [None]:
os.environ['OPENBLAS_NUM_THREADS']='1'
os.environ['KMP_DUPLICATE_LIB_OK']='True'
os.environ['MKL_NUM_THREADS']='1'
#article has 12 feature, customer estimatley has 6 and transaction data has 5
als_model = AlternatingLeastSquares(factors=360, regularization=0.01, use_gpu=True, iterations=5, dtype=np.float32, calculate_training_loss=True)

In [None]:
csr_data_transpose = csr_data.T
csr_data_transpose

In [None]:
history = als_model.fit(csr_data_transpose)

In [None]:
a_user, a_article = user_to_idx['000058a12d5b43e67d225668fa1f8d618c13dc232df0cad8ffe7ad4a1091e318'], article_to_idx[176209023]
a_user_vector, a_article_vector = als_model.user_factors[a_user], als_model.item_factors[a_article]

In [None]:
test_article = article_to_idx[176209023]
test_article_vector = als_model.item_factors[test_article]
np.dot(a_user, test_article_vector)

In [None]:
a_test_article = 176209023
movie_id = article_to_idx[a_test_article]
similar_article = als_model.similar_items(movie_id, N=15)
similar_article

In [None]:
idx_to_article = {v:k for k,v in article_to_idx.items()}
j = [idx_to_article[i[0]] for i in similar_article]
print(j)

In [None]:
from IPython.display import Image
num_sim = 4
Image(f'../input/h-and-m-personalized-fashion-recommendations/images/0{str(j[num_sim])[:2]}/0{int(j[num_sim])}.jpg' , width = 200)

In [None]:
user = user_to_idx['00000dbacae5abe5e23885899a1fa44253a17956c6d1c3d25f88aa139fdfc657']
# recommend에서는 user*item CSR Matrix를 받습니다.
article_recommended = als_model.recommend(user, csr_data, N=12, filter_already_liked_items=True)
article_recommended

In [None]:
k = [str(idx_to_article[i[0]]) for i in article_recommended]
print(' '.join(k))

In [None]:
from IPython.display import Image
num = 1
Image(f'../input/h-and-m-personalized-fashion-recommendations/images/0{str(k[num])[:2]}/0{int(k[num])}.jpg' , width = 200)

# **SUBMISSION**

In [None]:
data_R = pd.read_csv(fname_tran, sep=',')

In [None]:
submission=pd.read_csv('../input/h-and-m-personalized-fashion-recommendations/sample_submission.csv')

In [None]:
data_R['customer_id']

In [None]:
users_sub = submission.drop_duplicates(['customer_id'], keep = 'first', ignore_index = True)

In [None]:
users_sub

In [None]:
if users_sub['customer_id'].head(1).to_numpy()[0] in user_to_idx:
    print("contain")

In [None]:
users_a = users_sub['customer_id']

In [None]:
import time
import tensorflow as tf

def run():
    result_list = []
    num = 0
    time_count = 0

    for user in users_a:
        start = time.time()

        if user not in user_to_idx:
            user_id = 0
        else:
            user_id = user_to_idx[user]

        article_recommended = als_model.recommend(user_id, csr_data, N=12, filter_already_liked_items=False)
        
        lists = ['0'+str(idx_to_article[i[0]]) for i in article_recommended]
        recommand_list = ' '.join(lists)
        
        result_list.append(recommand_list)
        end = time.time()
        time_count = time_count + (end - start)

        if num%1000 == 0 :
            print ('.' , end = ' ')
        if num%10000 == 0:
            time_left = ((len(users_a)-num) / 10000) * time_count / 60
            print(f"{time_count:.5f} sec / TIME_LEFT(min): ",time_left)
            time_count = 0
        num = num +1
    return result_list

In [None]:
result_lists = run()

In [None]:
final_list = [result_lists[i] if i<len(result_lists) else '0110065001 0110065001 0699080001 0838055001 0351484027 0351484033 0458543009 0715303001 0759871001 0566140001 0351484026 0399223029' for i in range(len(submission))]

In [None]:
submission['customer_id'] = users_sub['customer_id']
submission['prediction'] = final_list
submission

In [None]:
submission.to_csv('prediction.csv', index=False)