In [1]:
#importing necessary libraries 
import json
import pandas as pd
import scipy.sparse as sp
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

from flask import Flask,request,jsonify
from flask_cors import CORS


In [2]:
#reading meta data set
with open("C:/Users/DELL/Desktop/meta.json",encoding='utf-8') as df:
    meta = json.load(df)
Meta = pd.DataFrame(meta['meta'])

In [3]:
#checking missing values in meta data
Meta.isnull().any()

productid      True
brand          True
category       True
subcategory    True
name           True
dtype: bool

In [4]:
#removing missing values in meta data 
Meta=Meta.dropna()

In [5]:
#changing format of columns by making upper case
Meta['brand']=Meta['brand'].str.upper()
Meta['category']=Meta['category'].str.upper()
Meta['subcategory']=Meta['subcategory'].str.upper()
Meta['name']=Meta['name'].str.upper()

Meta = Meta.replace(',','', regex=True)


In [6]:
#reading event data set
with open("C:/Users/DELL/Desktop/events.json",encoding='utf-8') as df2:
    Event_Data = json.load(df2)
Events = pd.DataFrame(Event_Data['events'])


In [7]:
#cheking missing values in Events data
Events.isnull().any()

event        False
sessionid    False
eventtime    False
price         True
productid     True
dtype: bool

In [8]:
#removing missing values in meta data 
Events=Events.dropna()

In [9]:
#this function creates combining text field by using name, subcategory, brand and category
def PhraseData(data):
        data_phrase = data.drop(columns=['productid'])
        data_phrase['phrase'] = data_phrase[data_phrase.columns[0:4]].apply(lambda x: ','.join(x.dropna().astype(str)),axis=1)
        data_phrase= data_phrase.drop(columns=['name','subcategory','brand','category'])
       
        return data_phrase

In [None]:
c=PhraseData(Meta)
c.head()

In [11]:
#this function creates vectors by transformed text columns
def TransformedData(data_phrase, data):
        object_count = CountVectorizer()
        count_matrix = object_count.fit_transform(data['name'])

        tfidf = TfidfVectorizer()
        tfidf_matrix = tfidf.fit_transform(data_phrase['phrase'])

        phrase_sparse = sp.hstack([count_matrix, tfidf_matrix], format='csr')
        cosine_sim = cosine_similarity(phrase_sparse, phrase_sparse)
        
        return cosine_sim

In [12]:
t=TransformedData(c,Meta)
print(t)

[[1.         0.         0.         ... 0.00195029 0.00184491 0.00146839]
 [0.         1.         0.         ... 0.         0.         0.        ]
 [0.         0.         1.         ... 0.         0.         0.        ]
 ...
 [0.00195029 0.         0.         ... 1.         0.21658714 0.01102765]
 [0.00184491 0.         0.         ... 0.21658714 1.         0.00868691]
 [0.00146839 0.         0.         ... 0.01102765 0.00868691 1.        ]]


Vector transformed version of 'phase' and 'name' attributes.

In [13]:
#finding products list in the same sessions 
def FindProductsWithRelatedSessionId(sessionId,dataFrame):

    itemsList = []
    CardItemsId = []
    values = dataFrame.loc[dataFrame["sessionid"] == sessionId]
    itemsList.append(list(values["productid"]))
    count = 0
    for i in range(len(itemsList[0])):
        CardItemsId.append(itemsList[0][i])
        count = count + 1

    ItemsCount = count
    print(CardItemsId)
    return CardItemsId
    

In [14]:
#for example this user has two products in her card
cardsitem=FindProductsWithRelatedSessionId('a0655eee-d2ea7bd3-9235-4a9f-a9ea-d7f296e71318',Events)

['HBV00000U2B18', 'HBV00000NE1S6', 'HBV00000NE1S6', 'HBV00000NE1S6', 'HBV00000NE1S6', 'HBV00000PQOY1', 'HBV00000PVQUP', 'HBV00000O2SGS', 'HBV00000NGSRL', 'ZYBAR7300400117951', 'HBV00000NFMU3', 'HBV00000NFMU3']


In [15]:
#creating explanation of items in cart
for i in range(len(cardsitem)):
    itemsList = []
    values = Meta.loc[Meta["productid"] == cardsitem[i]]
    itemsList.append(list(values["name"]))
    print(values)
                           

          productid   brand       category subcategory  \
9175  HBV00000U2B18  ORAL-B  KIŞISEL BAKIM  AĞIZ BAKIM   

                                               name  
9175  DIŞ FIRÇASI YEDEK BAŞLIĞI STAGES ÇOCUK 2 ADET  
          productid brand            category subcategory  \
7979  HBV00000NE1S6  İÇIM  KAHVALTILIK VE SÜT         SÜT   

                                              name  
7979  İÇIM PASTORIZE TAM YAĞLI GÜNLÜK SÜT 1 LT PET  
          productid brand            category subcategory  \
7979  HBV00000NE1S6  İÇIM  KAHVALTILIK VE SÜT         SÜT   

                                              name  
7979  İÇIM PASTORIZE TAM YAĞLI GÜNLÜK SÜT 1 LT PET  
          productid brand            category subcategory  \
7979  HBV00000NE1S6  İÇIM  KAHVALTILIK VE SÜT         SÜT   

                                              name  
7979  İÇIM PASTORIZE TAM YAĞLI GÜNLÜK SÜT 1 LT PET  
          productid brand            category subcategory  \
7979  HBV00000NE1S6  İÇIM  

In [16]:
#final recommendation function that returns top 10 related products in the same session
def RecommendedProducts(cardlist, data, combine, transform):
        
        newlist = list(dict.fromkeys(cardlist))
        simScoresForAllProducts =  list()
        for i in range(len(newlist)):
            
            try:
                indices = pd.Series(data.index, index = data['productid'])
                #print(indices['HBV00000NVZE8'])
                # print(cardlist[i])
                index = indices[newlist[i]]

                sim_scores = list(enumerate(transform[index]))
                sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
                sim_scores = sim_scores[1:11]

                for i in range(len(sim_scores)):
                    simScoresForAllProducts.append(sim_scores[i])
            except:
                print("Product has no brand")

        
        simScoresForAllProducts.sort()
        print(simScoresForAllProducts)
        
        product_indices = [i[0] for i in simScoresForAllProducts[1:11]]

        product_id = data['productid'].iloc[product_indices]
        product_brand = data['brand'].iloc[product_indices]
        product_category = data['category'].iloc[product_indices]
        product_subcategory = data['subcategory'].iloc[product_indices]
        product_name = data['name'].iloc[product_indices]

        recommendation_data = pd.DataFrame(columns=['productid','brand', 'category','subcategory','name'])

        recommendation_data['productid'] = product_id
        recommendation_data['brand'] = product_brand
        recommendation_data['category'] = product_category
        recommendation_data['subcategory'] = product_subcategory
        recommendation_data['name'] = product_name

        return recommendation_data

In [17]:
recommendations=RecommendedProducts(cardsitem, Meta, c, t)
print(recommendations)

Product has no brand
[(134, 0.6387672866124542), (239, 0.536218017745264), (243, 0.3861344203244569), (385, 0.2188381618488833), (1346, 0.47385012050342123), (1565, 0.6461689572634213), (1601, 0.529964168043756), (1712, 0.31162479869036736), (1773, 0.6040475504900477), (1873, 0.892673324478071), (1935, 0.2188381618488833), (2021, 0.5348260870184335), (2040, 0.5252697470306805), (2581, 0.6391727713032002), (2725, 0.6148965791528653), (3142, 0.7226849561152398), (3292, 0.17554228838268354), (3449, 0.6508572619583131), (3504, 0.49717157747450996), (3506, 0.375454199966814), (3599, 0.5201129017785204), (3621, 0.3205790434906542), (3674, 0.41438354836911456), (3732, 0.5298945882893689), (3913, 0.38704417503767913), (3951, 0.30647541759089214), (4112, 0.37446986056602644), (4466, 0.6179964299781096), (4503, 0.5245648491650026), (4516, 0.505959193208145), (4612, 0.2260669899880376), (4743, 0.22606698998803756), (5394, 0.7640662681783955), (5406, 0.1978552472350105), (5643, 0.6389063514991251)

In [18]:
#recommended top 10 products for 
recommendation_dict=recommendations.to_dict("records")
print(recommendation_dict)

[{'productid': 'HBV00000NFMLA', 'brand': 'CARREFOUR', 'category': 'TEMEL GIDA', 'subcategory': 'BAHARAT HARÇ VE BULYON', 'name': 'CARREFOUR NANE 70 GR'}, {'productid': 'HBV00000OE7R9', 'brand': 'AKMINA', 'category': 'İÇECEKLER', 'subcategory': 'MADEN SULARI', 'name': 'AKMINA EXTRA MADEN SUYU CAM 200 ML'}, {'productid': 'KDBR90003', 'brand': 'DOĞAN BURDA DERGI', 'category': 'OYUNCAK VE KIRTASIYE', 'subcategory': 'GAZETE VE DERGI', 'name': 'ATLAS'}, {'productid': 'HBV00000PVR36', 'brand': 'VILEDA', 'category': 'EV BAKIM VE TEMIZLIK', 'subcategory': 'EV TEMIZLIK ÜRÜNLERI', 'name': 'VILEDA EXTRA SENSATION ELDIVEN (KÜÇÜK)'}, {'productid': 'HBV00000PV7ED', 'brand': 'FA', 'category': 'SAĞLIK VE KOZMETIK', 'subcategory': 'PARFÜM DEODORANT', 'name': 'SPORT ERKEK ROLL-ON 50 ML'}, {'productid': 'HBV00000NE0ZQ', 'brand': 'CARREFOUR', 'category': 'TEMEL GIDA', 'subcategory': 'BAHARAT HARÇ VE BULYON', 'name': 'CARREFOUR KÖRI 70 GR'}, {'productid': 'HBV00000SP811', 'brand': 'ÜLKER', 'category': 'TATL

In [None]:
#api application
#it could be tested with Postman
#after running Postman get the link of session and run
#for a0655eee-1267-4820-af21-ad8ac068ff7a session use 'http://192.168.1.5:5000/events?sessionid=a0655eee-d2ea7bd3-9235-4a9f-a9ea-d7f296e71318'
app = Flask(__name__)
CORS(app) 
        
@app.route('/events', methods=['GET'])
def Recommended_Products():

        SessionId =  request.args.get('sessionid')
        CardList = FindProductsWithRelatedSessionId(SessionId,Events)
        RecommendData = RecommendedProducts(cardsitem, Meta, c, t)
        recommendation_dict=RecommendData.to_dict("records")
        print(recommendations)
        
        return jsonify(recommendation_dict)

app.run(host='0.0.0.0')

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on all addresses.
 * Running on http://192.168.1.5:5000/ (Press CTRL+C to quit)
192.168.1.5 - - [02/Jun/2022 22:24:27] "GET /events?sessionid=a0655eee-d2ea7bd3-9235-4a9f-a9ea-d7f296e71318 HTTP/1.1" 200 -


[]
Product has no brand
[(134, 0.6387672866124542), (239, 0.536218017745264), (243, 0.3861344203244569), (385, 0.2188381618488833), (1346, 0.47385012050342123), (1565, 0.6461689572634213), (1601, 0.529964168043756), (1712, 0.31162479869036736), (1773, 0.6040475504900477), (1873, 0.892673324478071), (1935, 0.2188381618488833), (2021, 0.5348260870184335), (2040, 0.5252697470306805), (2581, 0.6391727713032002), (2725, 0.6148965791528653), (3142, 0.7226849561152398), (3292, 0.17554228838268354), (3449, 0.6508572619583131), (3504, 0.49717157747450996), (3506, 0.375454199966814), (3599, 0.5201129017785204), (3621, 0.3205790434906542), (3674, 0.41438354836911456), (3732, 0.5298945882893689), (3913, 0.38704417503767913), (3951, 0.30647541759089214), (4112, 0.37446986056602644), (4466, 0.6179964299781096), (4503, 0.5245648491650026), (4516, 0.505959193208145), (4612, 0.2260669899880376), (4743, 0.22606698998803756), (5394, 0.7640662681783955), (5406, 0.1978552472350105), (5643, 0.63890635149912

192.168.1.5 - - [02/Jun/2022 22:24:35] "GET /events?sessionid=a0655eee-d2ea7bd3-9235-4a9f-a9ea-d7f296e71318 HTTP/1.1" 200 -


[]
Product has no brand
[(134, 0.6387672866124542), (239, 0.536218017745264), (243, 0.3861344203244569), (385, 0.2188381618488833), (1346, 0.47385012050342123), (1565, 0.6461689572634213), (1601, 0.529964168043756), (1712, 0.31162479869036736), (1773, 0.6040475504900477), (1873, 0.892673324478071), (1935, 0.2188381618488833), (2021, 0.5348260870184335), (2040, 0.5252697470306805), (2581, 0.6391727713032002), (2725, 0.6148965791528653), (3142, 0.7226849561152398), (3292, 0.17554228838268354), (3449, 0.6508572619583131), (3504, 0.49717157747450996), (3506, 0.375454199966814), (3599, 0.5201129017785204), (3621, 0.3205790434906542), (3674, 0.41438354836911456), (3732, 0.5298945882893689), (3913, 0.38704417503767913), (3951, 0.30647541759089214), (4112, 0.37446986056602644), (4466, 0.6179964299781096), (4503, 0.5245648491650026), (4516, 0.505959193208145), (4612, 0.2260669899880376), (4743, 0.22606698998803756), (5394, 0.7640662681783955), (5406, 0.1978552472350105), (5643, 0.63890635149912

192.168.1.5 - - [02/Jun/2022 22:24:44] "GET /events?sessionid=a0655eee-d2ea7bd3-9235-4a9f-a9ea-d7f296e71318 HTTP/1.1" 200 -


[]
Product has no brand
[(134, 0.6387672866124542), (239, 0.536218017745264), (243, 0.3861344203244569), (385, 0.2188381618488833), (1346, 0.47385012050342123), (1565, 0.6461689572634213), (1601, 0.529964168043756), (1712, 0.31162479869036736), (1773, 0.6040475504900477), (1873, 0.892673324478071), (1935, 0.2188381618488833), (2021, 0.5348260870184335), (2040, 0.5252697470306805), (2581, 0.6391727713032002), (2725, 0.6148965791528653), (3142, 0.7226849561152398), (3292, 0.17554228838268354), (3449, 0.6508572619583131), (3504, 0.49717157747450996), (3506, 0.375454199966814), (3599, 0.5201129017785204), (3621, 0.3205790434906542), (3674, 0.41438354836911456), (3732, 0.5298945882893689), (3913, 0.38704417503767913), (3951, 0.30647541759089214), (4112, 0.37446986056602644), (4466, 0.6179964299781096), (4503, 0.5245648491650026), (4516, 0.505959193208145), (4612, 0.2260669899880376), (4743, 0.22606698998803756), (5394, 0.7640662681783955), (5406, 0.1978552472350105), (5643, 0.63890635149912