<a href="https://www.kaggle.com/code/osmanacar/armut-association-rule-based-learning?scriptVersionId=187685848" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

**Business Problem**

ARMUT is one of the biggest service platform on the Turkiye. 

It connects with people on the website or mobile phone for cleaning, renovation and shipping. They want to creating recommender system with 

Association Rule Learning for users getting service. 



**Dataset Information**

UserId: Customer number

ServiceId: Anonymous services for each category

CategoryId: Anonymous categories

CreateDate: The date service was purchased 



In [31]:
# import some library and display options

import pandas as pd

pd.set_option("display.max_columns", None)
pd.set_option('display.max_rows', 10)
pd.set_option("display.width", 500)
# çıktının tek bir satırda olmasını sağlar
pd.set_option("display.expand_frame_repr", False)
from mlxtend.frequent_patterns import apriori, association_rules

In [32]:
# read the data

df = pd.read_csv("/kaggle/input/armutcom/armut_data.csv")

In [33]:
# Data Information
def check_df(dataframe, head=5):
    print("################# shape #################")
    print(dataframe.shape)
    print("################# types #################")
    print(dataframe.dtypes)
    print("################# na #################")
    print(dataframe.isnull().sum())
    
check_df(df)

################# shape #################
(162523, 4)
################# types #################
UserId         int64
ServiceId      int64
CategoryId     int64
CreateDate    object
dtype: object
################# na #################
UserId        0
ServiceId     0
CategoryId    0
CreateDate    0
dtype: int64


In [34]:
df.head(10)

Unnamed: 0,UserId,ServiceId,CategoryId,CreateDate
0,25446,4,5,2017-08-06 16:11:00
1,22948,48,5,2017-08-06 16:12:00
2,10618,0,8,2017-08-06 16:13:00
3,7256,9,4,2017-08-06 16:14:00
4,25446,48,5,2017-08-06 16:16:00
5,14354,15,1,2017-08-06 16:27:00
6,14162,21,5,2017-08-06 16:28:00
7,21230,46,4,2017-08-06 16:34:00
8,25446,6,7,2017-08-06 16:39:00
9,10659,4,5,2017-08-06 16:44:00


In [35]:
df.tail(10)

Unnamed: 0,UserId,ServiceId,CategoryId,CreateDate
162513,6680,28,4,2018-08-06 13:58:00
162514,1532,32,4,2018-08-06 14:01:00
162515,13231,46,4,2018-08-06 14:35:00
162516,11014,15,1,2018-08-06 14:38:00
162517,10591,22,0,2018-08-06 14:38:00
162518,10591,25,0,2018-08-06 14:40:00
162519,10591,2,0,2018-08-06 14:43:00
162520,10591,31,6,2018-08-06 14:47:00
162521,12666,38,4,2018-08-06 16:01:00
162522,17497,47,7,2018-08-06 16:04:00


In [36]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
UserId,162523.0,13089.803862,7325.81606,0.0,6953.0,13139.0,19396.0,25744.0
ServiceId,162523.0,21.64114,13.774405,0.0,13.0,18.0,32.0,49.0
CategoryId,162523.0,4.325917,3.129292,0.0,1.0,4.0,6.0,11.0


In [37]:
# Should do some join() methods for service and category
df["ServiceId"] = df["ServiceId"].astype(str)
df["CategoryId"] = df["CategoryId"].astype(str)

df["Service"] = df[["ServiceId", "CategoryId"]].apply(lambda x: "_".join(x), axis=1)

df.head()

Unnamed: 0,UserId,ServiceId,CategoryId,CreateDate,Service
0,25446,4,5,2017-08-06 16:11:00,4_5
1,22948,48,5,2017-08-06 16:12:00,48_5
2,10618,0,8,2017-08-06 16:13:00,0_8
3,7256,9,4,2017-08-06 16:14:00,9_4
4,25446,48,5,2017-08-06 16:16:00,48_5


In [38]:
# Create the new column as NewDate
df["UserId"] = df["UserId"].astype(str)
df["NewDate"] = df["CreateDate"].str.split("-").str[0] + "-" + df["CreateDate"].str.split("-").str[1]

df.head()

Unnamed: 0,UserId,ServiceId,CategoryId,CreateDate,Service,NewDate
0,25446,4,5,2017-08-06 16:11:00,4_5,2017-08
1,22948,48,5,2017-08-06 16:12:00,48_5,2017-08
2,10618,0,8,2017-08-06 16:13:00,0_8,2017-08
3,7256,9,4,2017-08-06 16:14:00,9_4,2017-08
4,25446,48,5,2017-08-06 16:16:00,48_5,2017-08


In [39]:
df["BasketID"] = df[["UserId", "NewDate"]].apply(lambda x: "_".join(x), axis=1)

df.head()

Unnamed: 0,UserId,ServiceId,CategoryId,CreateDate,Service,NewDate,BasketID
0,25446,4,5,2017-08-06 16:11:00,4_5,2017-08,25446_2017-08
1,22948,48,5,2017-08-06 16:12:00,48_5,2017-08,22948_2017-08
2,10618,0,8,2017-08-06 16:13:00,0_8,2017-08,10618_2017-08
3,7256,9,4,2017-08-06 16:14:00,9_4,2017-08,7256_2017-08
4,25446,48,5,2017-08-06 16:16:00,48_5,2017-08,25446_2017-08


**Association Rule Learning**

This method's main goal is to recommend another service after the customer has received a service. 

These recommendations must be related to and relevant to that transaction.

In [40]:
# Create pivot table as Basket-Service
# The columns will be filled with 0s and 1s if users has service

basket_service_pivot_table = df.pivot_table(index="BasketID", columns="Service", values="ServiceId",aggfunc="count", fill_value=0) \
                                                    .apply(lambda x: x.map(lambda y: 1 if y > 0 else 0))

basket_service_pivot_table.head(10)

Service,0_8,10_9,11_11,12_7,13_11,14_7,15_1,16_8,17_5,18_4,19_6,1_4,20_5,21_5,22_0,23_10,24_10,25_0,26_7,27_7,28_4,29_0,2_0,30_2,31_6,32_4,33_4,34_6,35_11,36_1,37_0,38_4,39_10,3_5,40_8,41_3,42_1,43_2,44_0,45_6,46_4,47_7,48_5,49_1,4_5,5_11,6_7,7_3,8_5,9_4
BasketID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1
0_2017-08,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0
0_2017-09,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0
0_2018-01,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0
0_2018-04,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
10000_2017-08,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
10000_2017-12,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
10000_2018-03,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
10001_2017-09,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
10001_2018-05,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
10001_2018-06,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [41]:
# Apriori Algorithm is detects the most repeated objects in a dataset.
basket_service_itemSets = apriori(basket_service_pivot_table, min_support=0.01, use_colnames=True)
asc_rules = association_rules(basket_service_itemSets, metric="support", min_threshold=0.01)



In [42]:
asc_rules.sort_values("support", ascending=False).head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
2,(15_1),(2_0),0.120963,0.130286,0.033951,0.280673,2.154278,0.018191,1.209066,0.609539
3,(2_0),(15_1),0.130286,0.120963,0.033951,0.260588,2.154278,0.018191,1.188833,0.616073
12,(22_0),(2_0),0.047515,0.130286,0.016568,0.3487,2.676409,0.010378,1.33535,0.657611
13,(2_0),(22_0),0.130286,0.047515,0.016568,0.127169,2.676409,0.010378,1.09126,0.720197
14,(2_0),(25_0),0.130286,0.042895,0.013437,0.103136,2.404371,0.007849,1.067168,0.67159
15,(25_0),(2_0),0.042895,0.130286,0.013437,0.313257,2.404371,0.007849,1.266432,0.610268
0,(13_11),(2_0),0.056627,0.130286,0.012819,0.226382,1.737574,0.005442,1.124216,0.449965
1,(2_0),(13_11),0.130286,0.056627,0.012819,0.098394,1.737574,0.005442,1.046325,0.488074
4,(15_1),(33_4),0.120963,0.02731,0.011233,0.092861,3.400299,0.007929,1.072262,0.803047
5,(33_4),(15_1),0.02731,0.120963,0.011233,0.411311,3.400299,0.007929,1.493211,0.725728


In [43]:
# Creating recommender function. Listening some services according to users

def arl_recommender(rules, service_id, rec_count=1):
    sorted_asc_rules = rules.sort_values("lift", ascending=False)
    recommendation_list = []
    for i, service in enumerate(sorted_asc_rules["antecedents"]):
        for j in list(service):
            if j == service_id:
                recommendation_list.append(list(sorted_asc_rules.iloc[i]["consequents"])[0])

    return recommendation_list[0:rec_count]

In [44]:
arl_recommender(asc_rules, "2_0", 1)

['22_0']

In [45]:
arl_recommender(asc_rules, "2_0", 3)

['22_0', '25_0', '15_1']