# Data loading 
Next cells are used to load the data we need.

In [None]:
import pandas as pd
import numpy as np 
import scipy.sparse as sps
import os
import sys
import matplotlib.pyplot as plt
sys.path.append('..')


In [None]:
#data which contains users (row), items(col) and implicit interaction (data)
dataset = pd.read_csv('data_train.csv')
dataset

In [None]:
users = dataset.row
items = dataset.col
data = dataset.data
URM_all = sps.coo_matrix((data, (users, items)))
URM_all = URM_all.tocsr() #fast row access -> fast access to users 
URM_all.shape

In [None]:
items_per_user = np.ediff1d(URM_all.indptr) #tells in which position each of the rows begin
users_per_item = np.ediff1d(sps.csc_matrix(URM_all).indptr)
URM_all = sps.csr_matrix(URM_all)

In [None]:
items_per_user = np.sort(items_per_user) #sorting based on the single value. Losing reference to the user. 
users_per_item = np.sort(users_per_item)

In [None]:
ICM_df = pd.read_csv('data_ICM_title_abstract.csv')
ICM_df

In [None]:
items = ICM_df.row
features = ICM_df.col
data = ICM_df.data
ICM_all = sps.coo_matrix((data, (items, features)))
ICM_all = ICM_all.tocsr() #fast row access -> fast access to users 
ICM_all.shape

In [None]:
ICM_all = sps.csr_matrix(ICM_all)
features_per_item = np.ediff1d(ICM_all.indptr)

ICM_all = sps.csc_matrix(ICM_all)
items_per_feature = np.ediff1d(ICM_all.indptr)

ICM_all = sps.csr_matrix(ICM_all)

In [None]:
features_per_item = np.sort(features_per_item)
items_per_feature = np.sort(items_per_feature)

In [None]:
from Base.Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.85)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])


In [None]:
from GraphBased.P3alphaRecommender import P3alphaRecommender
from MatrixFactorization.PureSVDRecommender import PureSVDRecommender
from KNN.ItemKNNCBFRecommender import ItemKNNCBFRecommender
from KNN.ItemKNNSimilarityHybridRecommender import ItemKNNSimilarityHybridRecommender
from KNN.ItemKNNScoresHybridRecommender_Normalized import ItemKNNScoresHybridRecommender_Normalized
from MatrixFactorization.Cython.MatrixFactorization_Cython import MatrixFactorization_AsySVD_Cython

recommender_alpha = P3alphaRecommender(URM_train)
recommender_alpha.fit(topK=475, alpha = 0.45, implicit = True)

recommender_asy_svd = MatrixFactorization_AsySVD_Cython(URM_train)
recommender_asy_svd.fit()

In [None]:
hybridrecommender = ItemKNNSimilarityHybridRecommender(URM_train, recommender_alpha_ICM.W_sparse, recommender_alpha.W_sparse)
hybridrecommender.fit(topK=600, alpha = 0.45)

hybridrecommender_final = ItemKNNScoresHybridRecommender_Normalized(URM_train, hybridrecommender, recommender_SVD)
hybridrecommender_final.fit(alpha = 0.6)



In [None]:
evaluator_validation.evaluateRecommender(hybridrecommender_final)

In [None]:
test_users = pd.read_csv('data_target_users_test.csv')
test_users

In [None]:
recommender_alpha = P3alphaRecommender(URM_all)
recommender_alpha.fit(topK=475, alpha = 0.45, implicit = True)

recommender_alpha_ICM = P3alphaRecommender(ICM_all.T)
recommender_alpha_ICM.fit(topK=175, alpha = 0.45)
recommender_alpha_ICM.URM_train = URM_train

hybridrecommender = ItemKNNSimilarityHybridRecommender(URM_all, recommender_alpha_ICM.W_sparse, recommender_alpha.W_sparse)
hybridrecommender.fit(topK=600, alpha = 0.45)

hybridrecommender_final = ItemKNNScoresHybridRecommender_Normalized(URM_all, hybridrecommender, recommender_rp3)
hybridrecommender_final.fit(alpha = 0.45)

user_id = test_users['user_id']
recommendations = hybridrecommender.recommend(user_id,cutoff = 10)


In [None]:
for index in range(len(recommendations)):
    #print(element)
    recommendations[index]=np.array(recommendations[index])
    #print(type(element))
print(len(recommendations))

In [None]:
test_users['item_list']= recommendations
#test_users['item_list'] =  test_users['item_list'].apply(lambda x: x.replace('[','').replace(']','')) 
test_users['item_list'] = pd.DataFrame([str(line).strip('[').strip(']').replace("'","") for line in test_users['item_list']])

#convert the string columns to int
#test_users['item_list'] = test_users['item_list'].astype(int)
test_users
test_users.to_csv('submission.csv', index=False)
