In [1]:
%env OPENBLAS_NUM_THREADS=1

env: OPENBLAS_NUM_THREADS=1


In [2]:
import implicit
import pandas as pd
import scipy.sparse as sparse

from tqdm import tqdm
from collections import defaultdict

In [3]:
# Load the data
raw_data = pd.read_csv("train.csv")
test_data = pd.read_csv("test.csv")

# Drop NaN columns
data = raw_data.dropna()
data = data.copy()

In [4]:
data.head(10)

Unnamed: 0,UserId,productid,Quantity,OrderDate
0,18075,12322648,1,01/04/18
1,6820,12371370,1,01/04/18
2,6820,12973004,1,01/04/18
3,6820,12657560,1,01/04/18
4,6820,11659914,1,01/04/18
5,18075,12322648,1,01/04/18
6,26784,12658110,1,01/04/18
7,16920,11659532,1,01/04/18
8,26784,12407108,1,01/04/18
9,26784,11660062,1,01/04/18


In [5]:
sparse_item_user = sparse.csr_matrix((data['Quantity'].astype(float), (data['productid'], data['UserId'])))
sparse_user_item = sparse.csr_matrix((data['Quantity'].astype(float), (data['UserId'], data['productid'])))

In [6]:
#### sort based on date
data['OrderDate'] = pd.to_datetime(data.OrderDate)
data = data.sort_values('OrderDate')

In [7]:
##### user_2_prod: mapping of user_id to list of products purchased by user
user_2_prod = defaultdict(list)

for user, prod in zip(data.UserId, data.productid):
    user_2_prod[user].append(prod)

In [8]:
# Create recommendations for all users
fields=['UserId', 'product_list']
userids = test_data['UserId'].drop_duplicates().values.tolist()

# Initialize the als model and fit it using the sparse item-user matrix
model = implicit.als.AlternatingLeastSquares(factors=40, 
                                             regularization=0.2, 
                                             iterations=20, 
                                             calculate_training_loss=True)

alpha_val = 25
data_conf = (sparse_item_user * alpha_val).astype('double')

# Fit the model
model.fit(data_conf)

100%|██████████| 20.0/20 [00:46<00:00,  1.95s/it, loss=2.3e-6] 


In [9]:
final_recommendation_list = []

for user in tqdm(userids):

    # Use implicit to get similar items.
    # similar items are got based on last purchased item from user. 
    n_similar = 10
    similar = model.similar_items(user_2_prod[user][-1], n_similar)
    
    prod_ids_sim = [obj[0] for obj in similar]
    final_recommendation_list.append(prod_ids_sim)

100%|██████████| 2350/2350 [08:23<00:00,  4.89it/s]


In [10]:
#### create submission
filename = 'submission.csv'
sub = pd.DataFrame(columns=fields)

sub['UserId'] = userids
sub['product_list'] = final_recommendation_list
sub.to_csv(filename, index=False)

In [11]:
####score on Publi LB: Mean Average Precision @10: