## Calculations of lift values for the same cart product pairs

In [3]:
import pandas as pd
import numpy as np
import json

# Opening JSON file
f = open('C:\\Users\\User\\Desktop\\hepsiburada\\data\\events.json', encoding = 'utf-8')

# returns JSON object as a dictionary
events = json.load(f)

f = open('C:\\Users\\User\\Desktop\\hepsiburada\\data\\meta.json', encoding = 'utf-8')
meta = json.load(f)

# covert to dataframe
pd.DataFrame(events)
events = pd.json_normalize(events, record_path =['events'])

pd.DataFrame(meta)
meta = pd.json_normalize(meta, record_path =['meta'])

In [8]:
products = events.productid.unique().tolist()
sessions = events.sessionid.unique().tolist()

print("number of unique products: ", len(products))
print("number of sessions: ", len(sessions))

number of unique products:  10236
number of sessions:  54442


In [5]:
order_counts = pd.DataFrame({'order_count': events.groupby(['productid'])['sessionid'].count()})
order_counts.reset_index(inplace = True)
order_counts_sort = order_counts.sort_values(by = 'order_count', ascending= False)
order_counts_sort['order_count'].describe()

count    10235.000000
mean        37.874939
std        231.855446
min          1.000000
25%          3.000000
50%          8.000000
75%         24.000000
max      17082.000000
Name: order_count, dtype: float64

In [106]:
order_counts_sort.quantile(0.90)

order_count    66.0
Name: 0.9, dtype: float64

In [107]:
product_top99pct = order_counts_sort.loc[order_counts_sort.order_count > 65]
print("product count for top 80% = ", product_top99pct.shape)
product_list_99pct = product_top99pct.productid.unique()
product_99pct = events.loc[events.productid.isin(product_list_99pct)]

product count for top 80% =  (1029, 2)


In [109]:
order_matrix = pd.crosstab(product_99pct['sessionid'], product_99pct['productid'], rownames=['sessionid'], colnames=['productid'])
order_matrix

productid,AILEETI1811300,HBV000001K3DI,HBV0000020P42,HBV000002B6KD,HBV000002O5M7,HBV000002XD9R,HBV0000031A41,HBV0000047XQO,HBV000004FX7K,HBV000004FX7O,...,ZYULKER0009204,ZYULKER0270504,ZYULKERSBZ004,ZYULKERSSU002,ZYULUDAMSY001,ZYULUDAMSY003,ZYUNI20052929,ZYUNIL20040349,ZYUNIL21127409,ZYUNIL581595
sessionid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
000280f4-62fc-4dcd-b51d-c66ac14d7d8c,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
0002e53b-1f60-4309-8380-31ca03de51f8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
0002ef34-6bee-4953-874b-8298ec26b625,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
000618de-d415-408c-863e-6124db43f529,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
000770d6-c2d4-4ad2-bb2c-b35274bc5e7e,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
fffb235e-4745-49fb-a452-9d18d54b186b,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
fffb5e6a-2676-4cd9-b4e4-ab8b6621e0fe,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
fffbba74-6999-460f-bd5f-70eaebe689cf,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
fffd3c61-2f71-4437-986c-e1c30ef5b5fe,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [110]:
product_map = {}
for product in order_matrix:
    product_map[product] = order_matrix.loc[order_matrix[product] > 0].shape[0]
product_map

{'AILEETI1811300': 54,
 'HBV000001K3DI': 151,
 'HBV0000020P42': 120,
 'HBV000002B6KD': 83,
 'HBV000002O5M7': 93,
 'HBV000002XD9R': 56,
 'HBV0000031A41': 70,
 'HBV0000047XQO': 65,
 'HBV000004FX7K': 70,
 'HBV000004FX7O': 82,
 'HBV000005E662': 117,
 'HBV00000609PC': 68,
 'HBV000006IYE3': 72,
 'HBV000006IYE9': 108,
 'HBV000006IYJM': 78,
 'HBV000006IYLM': 88,
 'HBV000006IYQ6': 56,
 'HBV000006IYQ8': 54,
 'HBV000006IYTS': 53,
 'HBV000006IYWI': 44,
 'HBV000006IYY4': 333,
 'HBV000006IYYA': 75,
 'HBV000006IYYQ': 193,
 'HBV000006IZ6Z': 54,
 'HBV000006IZ7R': 54,
 'HBV000006JCZD': 43,
 'HBV000007N2WJ': 61,
 'HBV000009ILS2': 74,
 'HBV00000BE1FW': 67,
 'HBV00000DJ9PH': 32,
 'HBV00000DJ9S1': 171,
 'HBV00000DJ9WH': 114,
 'HBV00000DJ9X5': 309,
 'HBV00000DJ9Y5': 169,
 'HBV00000DW9NT': 76,
 'HBV00000DW9NZ': 118,
 'HBV00000DW9O1': 135,
 'HBV00000FSLRU': 63,
 'HBV00000GI4SZ': 45,
 'HBV00000GNAXL': 60,
 'HBV00000H1O7F': 139,
 'HBV00000IFZG2': 76,
 'HBV00000IGET2': 51,
 'HBV00000IGF3P': 85,
 'HBV00000IQDIZ': 

In [112]:
product_pair_map = {}
support_rows = []
for product_A in order_matrix:
    trans_product_A = product_map.get(product_A)
    for product_B in order_matrix:
        #print("product_A = ", product_A , " Product_B =", product_B)
        if product_A != product_B:
            rows = []
            rows.append(product_A) 
            pair_key = str(product_A) + '#' + str(product_B)
            if product_B > product_A:
                pair_key = str(product_B) + "#" + str(product_A)
            if product_pair_map.get(pair_key) == None:
                trans_product_A_B = order_matrix.loc[(order_matrix[product_A] > 0) & (order_matrix[product_B] > 0)].shape[0]
                product_pair_map[pair_key] = trans_product_A_B
            
                confidence_A_B = trans_product_A_B / trans_product_A
                support_B = product_map.get(product_B) / total_order
                lift_A_B = confidence_A_B / support_B
                rows.append(product_B)
                rows.append(lift_A_B)
                #print(rows)
                support_rows.append(rows)
            else:
                trans_product_A_B = product_pair_map.get(pair_key)

lift_df = pd.DataFrame(support_rows, columns = ['Product_A', 'Product_B', 'Lift'])
lift_df.head()

Unnamed: 0,Product_A,Product_B,Lift
0,AILEETI1811300,HBV000001K3DI,0.0
1,AILEETI1811300,HBV0000020P42,0.0
2,AILEETI1811300,HBV000002B6KD,0.0
3,AILEETI1811300,HBV000002O5M7,0.0
4,AILEETI1811300,HBV000002XD9R,0.0


In [114]:
product_A_name = lift_df.Product_A.apply(lambda x:meta.loc[meta.productid == x].name.tolist()[0])
product_B_name = lift_df.Product_B.apply(lambda x:meta.loc[meta.productid == x].name.tolist()[0])
lift_df['product_A_name'] = product_A_name
lift_df['product_B_name'] = product_B_name
lift_df_sorted = lift_df.sort_values(by = ['Lift'], ascending= False)
lift_df_sorted.head(200)

Unnamed: 0,Product_A,Product_B,Lift,product_A_name,product_B_name
484953,HBV00000QU3WO,HBV00000QU3YT,907.520000,Damla Su 0.5 lt,Erikli Su 1 lt
80028,HBV00000NE0SG,HBV00000NE0SI,605.013333,Pınar Kido Muzlu Süt 180 ml,Pınar Kido Çilekli Süt 180 ml
488350,HBV00000QU3ZX,ZYHPDANONSSU009,583.882353,"Hayat Su 0,5 Lt",Hayat 1 Lt Su Pet Sise
485107,HBV00000QU3WO,ZYBICN9286940,533.835294,Damla Su 0.5 lt,Nestle Su 1.5 Lt
485106,HBV00000QU3WO,ZYBICN9286939,533.835294,Damla Su 0.5 lt,Nestle Su 0.5 Lt
...,...,...,...,...,...
242046,HBV00000NFI14,HBV00000NG8S5,114.585859,Sırma C Vitaminli Elma 200 ml,Frutti Karpuz-Çilek Aromalı 200 ml
161207,HBV00000NE1VR,ZYHPULKERKGF024,113.581977,Ülker Probis Atıştırmalık Sandviç Bisküvi 75 gr,Ülker Albeni Atıştırmalık 72 gr
492694,HBV00000QU4CM,ZYBICN9286256,113.440000,Ülker Coco Star Bar 25 Gr,Snickers 50 Gr
195404,HBV00000NFGWV,ZYBICN9286256,113.440000,Ülker Metro Çikolata Kaplamalı Bar 36 gr,Snickers 50 Gr


In [115]:
lift_df_sorted[lift_df_sorted.Product_A == 'HBV00000NVZ9S'].product_B_name.head(10)

291684        Malta Eriği Paket 500 gr
291521                     Kivi 250 gr
291217            Elma Starking 500 gr
291220               Siyah Üzüm 500 gr
291301                    Avokado Adet
291218        Granny Smith Elma 500 gr
291604                 Hayat Su 0,5 Lt
291219              Golden Elma 500 gr
291271    Sultan Mantar Izgaralık 1 kg
291716              Kiraz Paket 500 gr
Name: product_B_name, dtype: object

In [116]:
lift_df_sorted.to_csv('C:\\Users\\User\\Desktop\\hepsiburada\\data\\product_90pct.txt',sep=';')