In [1]:
#importing necessary libraries 
import json
import pandas as pd
import scipy.sparse as sp
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

from flask import Flask,request,jsonify
from flask_cors import CORS


In [2]:
#reading meta data set
with open("C:/Users/DELL/Desktop/meta.json",encoding='utf-8') as df:
    meta = json.load(df)
Meta = pd.DataFrame(meta['meta'])

In [3]:
#checking missing values in meta data
Meta.isnull().any()

productid      True
brand          True
category       True
subcategory    True
name           True
dtype: bool

In [4]:
#removing missing values in meta data 
Meta=Meta.dropna()

In [5]:
#changing format of columns by making upper case
Meta['brand']=Meta['brand'].str.upper()
Meta['category']=Meta['category'].str.upper()
Meta['subcategory']=Meta['subcategory'].str.upper()
Meta['name']=Meta['name'].str.upper()

Meta = Meta.replace(',','', regex=True)


In [6]:
#reading event data set
with open("C:/Users/DELL/Desktop/events.json",encoding='utf-8') as df2:
    Event_Data = json.load(df2)
Events = pd.DataFrame(Event_Data['events'])


In [7]:
#cheking missing values in Events data
Events.isnull().any()

event        False
sessionid    False
eventtime    False
price         True
productid     True
dtype: bool

In [8]:
#removing missing values in meta data 
Events=Events.dropna()

In [9]:
#this function creates combining text field by using name, subcategory, brand and category
def PhraseData(data):
        data_phrase = data.drop(columns=['productid'])
        data_phrase['phrase'] = data_phrase[data_phrase.columns[0:4]].apply(lambda x: ','.join(x.dropna().astype(str)),axis=1)
        data_phrase= data_phrase.drop(columns=['name','subcategory','brand','category'])
       
        return data_phrase

In [10]:
c=PhraseData(Meta)
c.head()

Unnamed: 0,phrase
0,"PALETTE,KIŞISEL BAKIM,SAÇ BAKIMI,PALETTE KALIC..."
1,"BEST,PET SHOP,KEDI,BEST PET JÖLE İÇINDE PARÇA ..."
2,"TARIM KREDI,TEMEL GIDA,BAKLIYAT PIRINÇ MAKARNA..."
3,"NAMET,ET BALIK ŞARKÜTERI,ŞARKÜTERI,NAMET FISTI..."
4,"MURATBEY,KAHVALTILIK VE SÜT,PEYNIR,MURATBEY BU..."


In [11]:
#this function creates vectors by transformed text columns
def TransformedData(data_phrase, data):
        object_count = CountVectorizer()
        count_matrix = object_count.fit_transform(data['name'])

        tfidf = TfidfVectorizer()
        tfidf_matrix = tfidf.fit_transform(data_phrase['phrase'])

        phrase_sparse = sp.hstack([count_matrix, tfidf_matrix], format='csr')
        cosine_sim = cosine_similarity(phrase_sparse, phrase_sparse)
        
        return cosine_sim

In [12]:
t=TransformedData(c,Meta)
print(t)

[[1.         0.         0.         ... 0.00195029 0.00184491 0.00146839]
 [0.         1.         0.         ... 0.         0.         0.        ]
 [0.         0.         1.         ... 0.         0.         0.        ]
 ...
 [0.00195029 0.         0.         ... 1.         0.21658714 0.01102765]
 [0.00184491 0.         0.         ... 0.21658714 1.         0.00868691]
 [0.00146839 0.         0.         ... 0.01102765 0.00868691 1.        ]]


In [13]:
#finding products list in the same sessions 
def FindProductsWithRelatedSessionId(sessionId,dataFrame):

    itemsList = []
    CardItemsId = []
    values = dataFrame.loc[dataFrame["sessionid"] == sessionId]
    itemsList.append(list(values["productid"]))
    count = 0
    for i in range(len(itemsList[0])):
        CardItemsId.append(itemsList[0][i])
        count = count + 1

    ItemsCount = count
    print(CardItemsId)
    return CardItemsId
    

In [14]:
#for example this user has two products in her card
cardsitem=FindProductsWithRelatedSessionId('a0655eee-1267-4820-af21-ad8ac068ff7a',Events)

['HBV00000NVZE8', 'HBV00000NVZE8', 'HBV00000NVZEQ', 'HBV00000NVZFS', 'HBV00000NVZDA', 'HBV00000NVZDA', 'HBV00000NE1Z6', 'HBV00000NE10E', 'HBV00000NE10A', 'HBV00000NFGX7', 'HBV00000O3C6Z', 'HBV00000O2SDB', 'HBV00000NVZBQ', 'HBV00000NVZ9S', 'HBV00000NVZAM', 'HBV00000NVZAM', 'HBV00000NVZAM', 'HBV00000P7QHC', 'HBV00000P7QHC', 'HBV00000P7QHC', 'HBV00000NFMQ2', 'HBV000002O5M7', 'ZYULKER0216308', 'ZYPEYMAN200061', 'ZYTAD7401033', 'HBV00000NE0XW', 'ZYPEYMAN202256', 'ZYPEYMAN202294', 'ZYTAD7121030', 'HBV00000NFGQQ', 'ZYTAD7201015', 'HBV00000NFGQQ', 'HBV00000SP6Y8', 'HBV00000QU4CI', 'HBV00000QU4CI', 'HBV00000JUHKQ', 'HBV00000NFGTT', 'HBV00000NFGTP', 'HBV00000NFMUH', 'HBV00000NFMU9', 'HBV00000NFHGX', 'HBV00000NFHIL', 'ZYSINAN93634200018', 'ZYSINAN93634200018', 'HBV000008QBGR', 'ZYBICN9286983', 'HBV00000NE17E', 'HBV00000KJMHN', 'HBV0000040DB0', 'HBV00000PQJYY', 'HBV00000PQK3S', 'HBV00000NE0TW', 'ZYHPREISBBKL008', 'ZYHPREISBBKL008', 'HBV00000NE0TW', 'ZYDUNUTE80176800', 'HBV00000QU3Z9', 'HBV00000MKE

In [15]:
#creating explanation of items in cart
for i in range(len(cardsitem)):
    itemsList = []
    values = Meta.loc[Meta["productid"] == cardsitem[i]]
    itemsList.append(list(values["name"]))
    print(values)
                           

Empty DataFrame
Columns: [productid, brand, category, subcategory, name]
Index: []
Empty DataFrame
Columns: [productid, brand, category, subcategory, name]
Index: []
Empty DataFrame
Columns: [productid, brand, category, subcategory, name]
Index: []
Empty DataFrame
Columns: [productid, brand, category, subcategory, name]
Index: []
Empty DataFrame
Columns: [productid, brand, category, subcategory, name]
Index: []
Empty DataFrame
Columns: [productid, brand, category, subcategory, name]
Index: []
         productid brand            category subcategory  \
643  HBV00000NE1Z6  İÇIM  KAHVALTILIK VE SÜT      YOĞURT   

                         name  
643  ICIM SUZME YOGURT 650 GR  
          productid      brand    category             subcategory  \
6966  HBV00000NE10E  CARREFOUR  TEMEL GIDA  BAHARAT HARÇ VE BULYON   

                       name  
6966  CARREFOUR SUSAM 90 GR  
         productid      brand    category             subcategory  \
479  HBV00000NE10A  CARREFOUR  TEMEL GIDA  BAHA

8251  FILIZ MAKARNA BURGU 500 GR  
          productid  brand            category            subcategory  \
2984  HBV00000MKEF7  KOSKA  KAHVALTILIK VE SÜT  TAHIN PEKMEZ VE HELVA   

                    name  
2984  KOSKA 620 GR TAHIN  
          productid  brand            category      subcategory  \
7352  HBV00000SP80T  ÜLKER  KAHVALTILIK VE SÜT  KREMA VE KAYMAK   

                           name  
7352  İÇIM ŞEF %18 KREMA 200 ML  
          productid  brand            category      subcategory  \
7352  HBV00000SP80T  ÜLKER  KAHVALTILIK VE SÜT  KREMA VE KAYMAK   

                           name  
7352  İÇIM ŞEF %18 KREMA 200 ML  
          productid brand            category subcategory            name
9143  HBV00000PKHK6  İÇIM  KAHVALTILIK VE SÜT         SÜT  İÇIM SÜT 4X1 L
          productid  brand            category subcategory  \
1543  HBV00000SP80V  ÜLKER  KAHVALTILIK VE SÜT      YOĞURT   

                            name  
1543  İÇIM ORGANIK YOĞURT 750 GR  
          produ

In [16]:
#final recommendation function that returns top 10 related products in the same session
def RecommendedProducts(cardlist, data, combine, transform):
        
        newlist = list(dict.fromkeys(cardlist))
        simScoresForAllProducts =  list()
        for i in range(len(newlist)):
            
            try:
                indices = pd.Series(data.index, index = data['productid'])
                #print(indices['HBV00000NVZE8'])
                # print(cardlist[i])
                index = indices[newlist[i]]

                sim_scores = list(enumerate(transform[index]))
                sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
                sim_scores = sim_scores[1:11]

                for i in range(len(sim_scores)):
                    simScoresForAllProducts.append(sim_scores[i])
            except:
                print("Product has no brand")

        
        simScoresForAllProducts.sort()
        print(simScoresForAllProducts)
        
        product_indices = [i[0] for i in simScoresForAllProducts[1:11]]

        product_id = data['productid'].iloc[product_indices]
        product_brand = data['brand'].iloc[product_indices]
        product_category = data['category'].iloc[product_indices]
        product_subcategory = data['subcategory'].iloc[product_indices]
        product_name = data['name'].iloc[product_indices]

        recommendation_data = pd.DataFrame(columns=['productid','brand', 'category','subcategory','name'])

        recommendation_data['productid'] = product_id
        recommendation_data['brand'] = product_brand
        recommendation_data['category'] = product_category
        recommendation_data['subcategory'] = product_subcategory
        recommendation_data['name'] = product_name

        return recommendation_data

In [17]:
recommendations=RecommendedProducts(cardsitem, Meta, c, t)
print(recommendations)

Product has no brand
Product has no brand
Product has no brand
Product has no brand
Product has no brand
Product has no brand
Product has no brand
Product has no brand
Product has no brand
Product has no brand
Product has no brand
Product has no brand
Product has no brand
Product has no brand
Product has no brand
Product has no brand
[(8, 0.6973428000444871), (9, 0.10595699676400382), (19, 0.45674974686606573), (74, 0.48900755668842655), (128, 0.49360552929443163), (181, 0.4496336621014074), (202, 0.5508089510064973), (239, 0.4678345279223045), (246, 0.6246456272673361), (281, 0.7201514436941098), (290, 0.40777793849066835), (314, 0.41253471852611817), (326, 0.7023530010966923), (340, 0.5549902207206056), (341, 0.6165347903048579), (353, 0.5856553202489198), (372, 0.6024978238301524), (393, 0.7663884072724076), (469, 0.5768549620677105), (494, 0.6376424529288379), (495, 0.939568931810817), (523, 0.8336032902145583), (547, 0.2205308817370708), (562, 0.5298800992444802), (564, 0.56630795

In [18]:
recommendation_dict=recommendations.to_dict("records")
print(recommendation_dict)

[{'productid': 'HBV00000OMGZY', 'brand': 'DOĞAN KITAP', 'category': 'OYUNCAK VE KIRTASIYE', 'subcategory': 'GAZETE VE DERGI', 'name': 'CUMHURIYET GAZETESI'}, {'productid': 'HBV00000PV8KX', 'brand': 'HOT WHEELS', 'category': 'OYUNCAK VE KIRTASIYE', 'subcategory': 'OYUNCAK', 'name': 'HOT WHEELS TAŞIYICI TIRLAR'}, {'productid': 'HBV00000PQJKH', 'brand': 'FRISKIES', 'category': 'PET SHOP', 'subcategory': 'KEDI', 'name': 'FRISKIES YAVRU KEDI MAMASI 15 KG'}, {'productid': 'HBV00000NFGX7', 'brand': 'ZÜBER', 'category': 'ATIŞTIRMALIK', 'subcategory': 'ÇIKOLATA GOFRET VE BARLAR', 'name': 'ZÜBER ISPANAKLI KAYISILI SEBZELI MEYVE TATLISI'}, {'productid': 'HBV00000PLGLF', 'brand': 'DE CECCO', 'category': 'ORGANIK VE DIYET', 'subcategory': 'ORGANIK BAKLIYAT MAKARNA', 'name': 'DE CECCO SPAGHETTINI MAKARNA 500 G'}, {'productid': 'HBV00000PVQ77', 'brand': 'DURU', 'category': 'KIŞISEL BAKIM', 'subcategory': 'DUŞ JELLERI VE SABUNLAR', 'name': 'DURU SABUN SAF-DOĞAL LAVANTA 4*70 GR'}, {'productid': 'HBV000

In [None]:
#api application
#it could be tested with Postman
#after running Postman get the link of session and run
#for a0655eee-1267-4820-af21-ad8ac068ff7a session use 'http://192.168.1.5:5000/events?sessionid=a0655eee-1267-4820-af21-ad8ac068ff7a'
app = Flask(__name__)
CORS(app) 
        
@app.route('/events', methods=['GET'])
def Recommended_Products():

        SessionId =  request.args.get('sessionid')
        CardList = FindProductsWithRelatedSessionId(SessionId,Events)
        RecommendData = RecommendedProducts(cardsitem, Meta, c, t)
        recommendation_dict=RecommendData.to_dict("records")
        
        return jsonify(recommendation_dict)

app.run(host='0.0.0.0')

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on all addresses.
 * Running on http://192.168.1.5:5000/ (Press CTRL+C to quit)


['HBV00000NVZE8', 'HBV00000NVZE8', 'HBV00000NVZEQ', 'HBV00000NVZFS', 'HBV00000NVZDA', 'HBV00000NVZDA', 'HBV00000NE1Z6', 'HBV00000NE10E', 'HBV00000NE10A', 'HBV00000NFGX7', 'HBV00000O3C6Z', 'HBV00000O2SDB', 'HBV00000NVZBQ', 'HBV00000NVZ9S', 'HBV00000NVZAM', 'HBV00000NVZAM', 'HBV00000NVZAM', 'HBV00000P7QHC', 'HBV00000P7QHC', 'HBV00000P7QHC', 'HBV00000NFMQ2', 'HBV000002O5M7', 'ZYULKER0216308', 'ZYPEYMAN200061', 'ZYTAD7401033', 'HBV00000NE0XW', 'ZYPEYMAN202256', 'ZYPEYMAN202294', 'ZYTAD7121030', 'HBV00000NFGQQ', 'ZYTAD7201015', 'HBV00000NFGQQ', 'HBV00000SP6Y8', 'HBV00000QU4CI', 'HBV00000QU4CI', 'HBV00000JUHKQ', 'HBV00000NFGTT', 'HBV00000NFGTP', 'HBV00000NFMUH', 'HBV00000NFMU9', 'HBV00000NFHGX', 'HBV00000NFHIL', 'ZYSINAN93634200018', 'ZYSINAN93634200018', 'HBV000008QBGR', 'ZYBICN9286983', 'HBV00000NE17E', 'HBV00000KJMHN', 'HBV0000040DB0', 'HBV00000PQJYY', 'HBV00000PQK3S', 'HBV00000NE0TW', 'ZYHPREISBBKL008', 'ZYHPREISBBKL008', 'HBV00000NE0TW', 'ZYDUNUTE80176800', 'HBV00000QU3Z9', 'HBV00000MKE

192.168.1.5 - - [02/Jun/2022 22:13:29] "GET /events?sessionid=a0655eee-1267-4820-af21-ad8ac068ff7a HTTP/1.1" 200 -


Product has no brand
Product has no brand
Product has no brand
Product has no brand
Product has no brand
[(8, 0.6973428000444871), (9, 0.10595699676400382), (19, 0.45674974686606573), (74, 0.48900755668842655), (128, 0.49360552929443163), (181, 0.4496336621014074), (202, 0.5508089510064973), (239, 0.4678345279223045), (246, 0.6246456272673361), (281, 0.7201514436941098), (290, 0.40777793849066835), (314, 0.41253471852611817), (326, 0.7023530010966923), (340, 0.5549902207206056), (341, 0.6165347903048579), (353, 0.5856553202489198), (372, 0.6024978238301524), (393, 0.7663884072724076), (469, 0.5768549620677105), (494, 0.6376424529288379), (495, 0.939568931810817), (523, 0.8336032902145583), (547, 0.2205308817370708), (562, 0.5298800992444802), (564, 0.5663079513814688), (581, 0.6863833845946723), (664, 0.37645576574078693), (685, 0.5296591707785016), (744, 0.7681208701361567), (754, 0.32633763537509947), (789, 0.5640919756881734), (799, 0.6100409673149507), (841, 0.5315158828883368), (8

192.168.1.5 - - [02/Jun/2022 22:13:33] "GET / HTTP/1.1" 404 -


['HBV00000NVZE8', 'HBV00000NVZE8', 'HBV00000NVZEQ', 'HBV00000NVZFS', 'HBV00000NVZDA', 'HBV00000NVZDA', 'HBV00000NE1Z6', 'HBV00000NE10E', 'HBV00000NE10A', 'HBV00000NFGX7', 'HBV00000O3C6Z', 'HBV00000O2SDB', 'HBV00000NVZBQ', 'HBV00000NVZ9S', 'HBV00000NVZAM', 'HBV00000NVZAM', 'HBV00000NVZAM', 'HBV00000P7QHC', 'HBV00000P7QHC', 'HBV00000P7QHC', 'HBV00000NFMQ2', 'HBV000002O5M7', 'ZYULKER0216308', 'ZYPEYMAN200061', 'ZYTAD7401033', 'HBV00000NE0XW', 'ZYPEYMAN202256', 'ZYPEYMAN202294', 'ZYTAD7121030', 'HBV00000NFGQQ', 'ZYTAD7201015', 'HBV00000NFGQQ', 'HBV00000SP6Y8', 'HBV00000QU4CI', 'HBV00000QU4CI', 'HBV00000JUHKQ', 'HBV00000NFGTT', 'HBV00000NFGTP', 'HBV00000NFMUH', 'HBV00000NFMU9', 'HBV00000NFHGX', 'HBV00000NFHIL', 'ZYSINAN93634200018', 'ZYSINAN93634200018', 'HBV000008QBGR', 'ZYBICN9286983', 'HBV00000NE17E', 'HBV00000KJMHN', 'HBV0000040DB0', 'HBV00000PQJYY', 'HBV00000PQK3S', 'HBV00000NE0TW', 'ZYHPREISBBKL008', 'ZYHPREISBBKL008', 'HBV00000NE0TW', 'ZYDUNUTE80176800', 'HBV00000QU3Z9', 'HBV00000MKE

192.168.1.5 - - [02/Jun/2022 22:13:45] "GET /events?sessionid=a0655eee-1267-4820-af21-ad8ac068ff7a HTTP/1.1" 200 -


Product has no brand
Product has no brand
Product has no brand
Product has no brand
Product has no brand
[(8, 0.6973428000444871), (9, 0.10595699676400382), (19, 0.45674974686606573), (74, 0.48900755668842655), (128, 0.49360552929443163), (181, 0.4496336621014074), (202, 0.5508089510064973), (239, 0.4678345279223045), (246, 0.6246456272673361), (281, 0.7201514436941098), (290, 0.40777793849066835), (314, 0.41253471852611817), (326, 0.7023530010966923), (340, 0.5549902207206056), (341, 0.6165347903048579), (353, 0.5856553202489198), (372, 0.6024978238301524), (393, 0.7663884072724076), (469, 0.5768549620677105), (494, 0.6376424529288379), (495, 0.939568931810817), (523, 0.8336032902145583), (547, 0.2205308817370708), (562, 0.5298800992444802), (564, 0.5663079513814688), (581, 0.6863833845946723), (664, 0.37645576574078693), (685, 0.5296591707785016), (744, 0.7681208701361567), (754, 0.32633763537509947), (789, 0.5640919756881734), (799, 0.6100409673149507), (841, 0.5315158828883368), (8