In [1]:
!pip install researchpy



In [6]:
!pip install mlxtend

Collecting mlxtend
  Downloading mlxtend-0.21.0-py2.py3-none-any.whl (1.3 MB)
[K     |████████████████████████████████| 1.3 MB 227 kB/s eta 0:00:01
[?25hCollecting scikit-learn>=1.0.2
  Downloading scikit_learn-1.1.3-cp39-cp39-macosx_10_9_x86_64.whl (8.7 MB)
[K     |████████████████████████████████| 8.7 MB 789 kB/s eta 0:00:01     |█████████████████████████▌      | 7.0 MB 1.5 MB/s eta 0:00:02
Installing collected packages: scikit-learn, mlxtend
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 0.24.2
    Uninstalling scikit-learn-0.24.2:
      Successfully uninstalled scikit-learn-0.24.2
Successfully installed mlxtend-0.21.0 scikit-learn-1.1.3


In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
import missingno as msno
from datetime import date
import researchpy as rp
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 500)
#pd.set_option('display.float_format', lambda x: '%.4f' % x)

from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [2]:
df=pd.read_csv('/Users/serhandulger/armut_data.csv')

In [3]:
df.head()

Unnamed: 0,UserId,ServiceId,CategoryId,CreateDate
0,25446,4,5,2017-08-06 16:11:00
1,22948,48,5,2017-08-06 16:12:00
2,10618,0,8,2017-08-06 16:13:00
3,7256,9,4,2017-08-06 16:14:00
4,25446,48,5,2017-08-06 16:16:00


In [4]:
df.shape

(162523, 4)

In [5]:
def check_df(dataframe, head=3):
    print("##################### Shape #####################")
    print(dataframe.shape)
    print("##################### Types #####################")
    print(dataframe.dtypes)
    print("##################### Head #####################")
    print(dataframe.head(head))
    print("##################### Tail #####################")
    print(dataframe.tail(head))
    print("##################### NA #####################")
    print(dataframe.isnull().sum())
    print("##################### NA SUM #####################")
    print(dataframe.isnull().sum().sum())
    print("##################### Describe #####################")
    print(dataframe.describe())
    print("##################### Nunique #####################")
    print(dataframe.nunique())

In [6]:
check_df(df)

##################### Shape #####################
(162523, 4)
##################### Types #####################
UserId         int64
ServiceId      int64
CategoryId     int64
CreateDate    object
dtype: object
##################### Head #####################
   UserId  ServiceId  CategoryId           CreateDate
0   25446          4           5  2017-08-06 16:11:00
1   22948         48           5  2017-08-06 16:12:00
2   10618          0           8  2017-08-06 16:13:00
##################### Tail #####################
        UserId  ServiceId  CategoryId           CreateDate
162520   10591         31           6  2018-08-06 14:47:00
162521   12666         38           4  2018-08-06 16:01:00
162522   17497         47           7  2018-08-06 16:04:00
##################### NA #####################
UserId        0
ServiceId     0
CategoryId    0
CreateDate    0
dtype: int64
##################### NA SUM #####################
0
##################### Describe #####################
          

In [7]:
import datetime as dt
df["CreateDate"] = pd.to_datetime(df["CreateDate"]).dt.normalize()

In [11]:
# Creating NEW Variables

df["Service"] = [str(row[1]) + '_' + str(row[2]) for row in df.values]
df["Basket"] = df["UserId"].astype(str) + '_' + df["CreateDate"].astype(str).str[:7]

In [12]:
df.head()

Unnamed: 0,UserId,ServiceId,CategoryId,CreateDate,Service,Basket
0,25446,4,5,2017-08-06,4_5,25446_2017-08
1,22948,48,5,2017-08-06,48_5,22948_2017-08
2,10618,0,8,2017-08-06,0_8,10618_2017-08
3,7256,9,4,2017-08-06,9_4,7256_2017-08
4,25446,48,5,2017-08-06,48_5,25446_2017-08


In [22]:
basket = (df.groupby(["Basket", "Service"])["ServiceId"]
              .count().unstack().fillna(0)
              .applymap(lambda x: 1 if x > 0 else 0))

In [24]:
basket.head()

Service,0_8,10_9,11_11,12_7,13_11,14_7,15_1,16_8,17_5,18_4,19_6,1_4,20_5,21_5,22_0,23_10,24_10,25_0,26_7,27_7,28_4,29_0,2_0,30_2,31_6,32_4,33_4,34_6,35_11,36_1,37_0,38_4,39_10,3_5,40_8,41_3,42_1,43_2,44_0,45_6,46_4,47_7,48_5,49_1,4_5,5_11,6_7,7_3,8_5,9_4
Basket,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1
0_2017-08,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0
0_2017-09,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0
0_2018-01,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0
0_2018-04,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
10000_2017-08,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0


In [25]:
frequent_itemsets = apriori(basket, min_support=0.01, use_colnames=True)



In [32]:
frequent_itemsets.sort_values(by='support',ascending=False).head(10)

Unnamed: 0,support,itemsets
8,0.238121,(18_4)
19,0.130286,(2_0)
5,0.120963,(15_1)
39,0.067762,(49_1)
28,0.066568,(38_4)
3,0.056627,(13_11)
12,0.047515,(22_0)
9,0.045563,(19_6)
15,0.042895,(25_0)
7,0.041533,(17_5)


In [33]:
rules = association_rules(frequent_itemsets, metric="support", min_threshold=0.01)

In [35]:
rules.head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(2_0),(13_11),0.130286,0.056627,0.012819,0.098394,1.737574,0.005442,1.046325
1,(13_11),(2_0),0.056627,0.130286,0.012819,0.226382,1.737574,0.005442,1.124216
2,(2_0),(15_1),0.130286,0.120963,0.033951,0.260588,2.154278,0.018191,1.188833
3,(15_1),(2_0),0.120963,0.130286,0.033951,0.280673,2.154278,0.018191,1.209066
4,(15_1),(33_4),0.120963,0.02731,0.011233,0.092861,3.400299,0.007929,1.072262
5,(33_4),(15_1),0.02731,0.120963,0.011233,0.411311,3.400299,0.007929,1.493211
6,(38_4),(15_1),0.066568,0.120963,0.011177,0.167897,1.388001,0.003124,1.056404
7,(15_1),(38_4),0.120963,0.066568,0.011177,0.092397,1.388001,0.003124,1.028458
8,(15_1),(49_1),0.120963,0.067762,0.010011,0.082763,1.221375,0.001815,1.016354
9,(49_1),(15_1),0.067762,0.120963,0.010011,0.147741,1.221375,0.001815,1.03142


In [36]:
sorted_rules = rules.sort_values("lift", ascending=False)

In [39]:
sorted_rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
10,(22_0),(25_0),0.047515,0.042895,0.01112,0.234043,5.456141,0.009082,1.249553
11,(25_0),(22_0),0.042895,0.047515,0.01112,0.259247,5.456141,0.009082,1.285834
19,(38_4),(9_4),0.066568,0.041393,0.010067,0.151234,3.653623,0.007312,1.129413
18,(9_4),(38_4),0.041393,0.066568,0.010067,0.243216,3.653623,0.007312,1.233418
4,(15_1),(33_4),0.120963,0.02731,0.011233,0.092861,3.400299,0.007929,1.072262


In [47]:
def arl_recommender(rules_df, product_id, rec_count=1):
    sorted_rules = rules_df.sort_values("lift", ascending=False)
    recommendation_list = []
    
    for i, product in sorted_rules["antecedents"].items():
        for j in list(product):  #antecedent i yani urunu secti
            if j == product_id:
                recommendation_list.append(list(sorted_rules.iloc[i]["consequents"]))
                
    recommendation_list = list({item for item_list in recommendation_list for item in item_list})
    
    return recommendation_list[:rec_count]

In [52]:
arl_recommender(rules, "2_0", 3)

['9_4', '38_4', '25_0']

In [53]:
arl_recommender(rules, "2_0", 4)

['9_4', '38_4', '25_0', '2_0']