In [1]:
import pandas as pd
import random
from scipy.sparse import coo_matrix, csr_matrix
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
interactions = pd.read_csv("D:\RecSys\syntetic_data.csv", usecols=['uid', 'item_id'])

In [3]:
df_weights = pd.DataFrame({"page_type": ["bought", "added to wishlist"],
                           'weight': [1, 1]})
df_weights

Unnamed: 0,page_type,weight
0,bought,1
1,added to wishlist,1


In [4]:
interactions2 = pd.DataFrame([],columns=['uid', 'item_id', 'page_type'])
lst_prods = []
for uid in range(0,len(interactions.uid)):
    lst_prods = list(filter(lambda x: x.isdigit(), [i.strip('[]\n') for i in interactions[interactions['uid']==uid]['item_id'].to_list()[0].split(' ')]))
    for prod in lst_prods:
        interactions2 = interactions2.append({'uid':uid, 'item_id':prod, 'page_type':random.sample(df_weights['page_type'].to_list(),1)[0]}, ignore_index=True)

In [5]:
products_dict = {}
new_products_dict = {}
n = 0
for i in set(interactions2['item_id']):
    products_dict[int(i)] = n
    new_products_dict[n] = int(i)
    n += 1

In [6]:
interactions2['new_item_id'] = [products_dict[int(item)] for item in interactions2['item_id']]
interactions2

Unnamed: 0,uid,item_id,page_type,new_item_id
0,0,11802,bought,175
1,0,11893,bought,449
2,0,11900,bought,531
3,0,11936,added to wishlist,417
4,0,12023,added to wishlist,19
...,...,...,...,...
4320,199,529589,bought,84
4321,199,574155,added to wishlist,309
4322,199,611127,added to wishlist,320
4323,199,645909,added to wishlist,118


In [7]:
data = pd.read_csv("D:\RecSys\products.csv")

In [8]:
weighted = interactions2.merge(df_weights, on="page_type")\
                  .groupby(["uid", "item_id", "new_item_id"])["weight"]\
                  .sum()\
                  .reset_index()

In [9]:
matrix = coo_matrix((weighted["weight"], (weighted["uid"], weighted["new_item_id"])), 
                    shape=(len(set(weighted["uid"])), len(set(weighted["new_item_id"]))))

In [10]:
class ColloborativeFiltering():
        df_weights_user = {"bought":3, "added to wishlist":1}
        def __init__(self, interactions):
            df_weights = pd.DataFrame({"page_type": ["bought", "added to wishlist"],
                           'weight': [1, 1]})
            #create table with weights and clean data
            interactions2 = pd.DataFrame([],columns=['uid', 'item_id', 'page_type'])
            lst_prods = []
            for uid in range(0,len(interactions.uid)):
                lst_prods = list(filter(lambda x: x.isdigit(), [i.strip('[]\n') for i in interactions[interactions['uid']==uid]['item_id'].to_list()[0].split(' ')]))
                for prod in lst_prods:
                    interactions2 = interactions2.append({'uid':uid, 'item_id':prod, 'page_type':random.sample(df_weights['page_type'].to_list(),1)[0]}, ignore_index=True)
            #create dict with new_products indexes
            self.products_dict = {}
            n = 0
            for i in set(interactions2['item_id']):
                self.products_dict[int(i)] = n
                n += 1
            #make new column with new item ids
            interactions2['new_item_id'] = [self.products_dict[int(item)] for item in interactions2['item_id']]
            #make new column with weights
            weighted = interactions2.merge(df_weights, on="page_type")\
                  .groupby(["uid", "item_id", "new_item_id"])["weight"]\
                  .sum()\
                  .reset_index()
            #create coordinate matrix for users and items
            self.matrix = coo_matrix((weighted["weight"], (weighted["uid"], weighted["new_item_id"])), 
                    shape=(len(set(weighted["uid"])), len(set(weighted["new_item_id"]))))
            
        def get_rec_U2I(self, users_products,page_types):
            #create user_interactions array
            usr_int_arr = np.zeros(matrix.shape[1])
            for i in users_products:
                usr_int_arr[self.products_dict[i]]=self.df_weights_user[page_types[i]]
            #косинусная близость между пользователем и остальными пользователями
            metrics = cosine_similarity([usr_int_arr], self.matrix).reshape(-1, 1)
            #умножаем  все взаимодействия каждого пользователя с товарами на косинусную близость с этим пользователем
            rates = csr_matrix.multiply(self.matrix.copy(), metrics)
            #складываем коэффициенты для каждого товара по всем пользователям и домножаем на 0 те товары, которые уже есть в выборке
            total_rate = (1 - usr_int_arr.astype(bool)) * np.array(np.sum(rates, axis=0))[0]
            return total_rate


In [11]:
CB = ColloborativeFiltering(interactions)

In [12]:
users_products = [96087,114517,70110,110624]

In [13]:
page_types = {96087:"added to wishlist",114517:"bought",70110:"added to wishlist",110624:"added to wishlist"}

In [14]:
rates = CB.get_rec_U2I(users_products, page_types)

In [15]:
prod_metrics = {}
for i in range(0,len(rates)):
    prod_metrics[i] = rates[i]
sorted_list = sorted(prod_metrics.items(), key=lambda x: x[1], reverse=True)
sorted_list[:15]

[(11, 1.492318137393784),
 (575, 1.3713927451162833),
 (554, 1.2030940095329288),
 (53, 1.1341577649026067),
 (167, 1.1205497351180314),
 (35, 1.106428375874431),
 (288, 1.0774776207203083),
 (96, 1.067815737922685),
 (322, 1.061315703020014),
 (374, 1.015931875230642),
 (28, 1.0011227764771293),
 (377, 0.9574028507427479),
 (503, 0.9433877495581668),
 (141, 0.9430039530018304),
 (264, 0.9421972113782504)]

In [16]:
user_df = pd.DataFrame([], columns=['name','article'])
for product in users_products:
    user_df = user_df.append({'name':data[data['article']==product]['name'],'article':product}, ignore_index=True)
user_df

Unnamed: 0,name,article
0,"273 Кукла Роралай CAVE CLUB Name: name, dty...",96087
1,287 Набор с куклой Ася Семья TOYSLAB Name: ...,114517
2,370 LEGO City Fire Лесные пожарные арт. 602...,70110
3,371 Lego 60279 пожарный автомобиль Name: na...,110624


In [17]:
results = pd.DataFrame([],columns=['name','article','metric'])
for i in sorted_list[:15]:
    results = results.append({'name' : data[data['article']==new_products_dict[i[0]]]['name'].to_list()[0], 'article' : int(new_products_dict[i[0]]), 'metric' : i[1]}, ignore_index=True)

In [18]:
results

Unnamed: 0,name,article,metric
0,Набор кукла с мебелью Miliya Valori,119664,1.492318
1,Lego 60304 перекрёсток,110621,1.371393
2,Lego 60287 трактор,110601,1.203094
3,Lego 10953 Единорог,110607,1.134158
4,Кукла Ася. Розовый в моде ToysLab,114518,1.12055
5,Lego 71381 кусалкин на цепи,110620,1.106428
6,Кукла Disney Frozen Холодное сердце 2 в ассорт...,111843,1.077478
7,LEGO City Fire Пожар в бургер-кафе арт. 60214,659801,1.067816
8,Набор кукла с коляской Nini love Valori,119669,1.061316
9,Barbie кукла космонавт,112553,1.015932
