In [1]:
import numpy as np
import pandas as pd

### Import dataset

In [2]:
df_raw = pd.read_csv('../Data/Groceries_dataset.csv')
df_raw.head(5)

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,tropical fruit
1,2552,05-01-2015,whole milk
2,2300,19-09-2015,pip fruit
3,1187,12-12-2015,other vegetables
4,3037,01-02-2015,whole milk


### Pre-processing

In [3]:
print(df_raw.shape)
print(df_raw.isnull().sum())

df = df_raw.copy()

(38765, 3)
Member_number      0
Date               0
itemDescription    0
dtype: int64


#### Encode item description column

In [4]:
df_encoded = pd.get_dummies(df['itemDescription'])

### Setup

In [5]:
from pycaret.arules import *
exp_ar = setup(df, transaction_id='Member_number', item_id='itemDescription')

Description,Value
session_id,7712.0
# Transactions,3898.0
# Items,167.0
Ignore Items,


### Create model and get rules

In [6]:
model1 = create_model()



In [7]:
model1.shape

(30, 9)

In [8]:
model1

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,"(other vegetables, bottled water)",(whole milk),0.0939,0.4582,0.0562,0.5984,1.3059,0.0132,1.349
1,"(other vegetables, yogurt)",(whole milk),0.1203,0.4582,0.0718,0.597,1.303,0.0167,1.3445
2,"(yogurt, rolls/buns)",(whole milk),0.1113,0.4582,0.0659,0.5922,1.2924,0.0149,1.3285
3,"(other vegetables, rolls/buns)",(whole milk),0.1467,0.4582,0.0821,0.5594,1.221,0.0149,1.2298
4,"(soda, yogurt)",(whole milk),0.0975,0.4582,0.0544,0.5579,1.2176,0.0097,1.2255
5,"(other vegetables, soda)",(whole milk),0.1242,0.4582,0.0693,0.5579,1.2175,0.0124,1.2254
6,"(tropical fruit, other vegetables)",(whole milk),0.0913,0.4582,0.0505,0.5534,1.2077,0.0087,1.2131
7,"(soda, rolls/buns)",(whole milk),0.1198,0.4582,0.0652,0.5439,1.1871,0.0103,1.1879
8,(shopping bags),(whole milk),0.1683,0.4582,0.0913,0.5427,1.1844,0.0142,1.1848
9,"(other vegetables, sausage)",(whole milk),0.0929,0.4582,0.0503,0.5414,1.1817,0.0077,1.1816


Import second dataset

In [10]:
df_raw = pd.read_csv('./data/basket_analysis.csv')
df_raw.drop('Unnamed: 0', axis=1, inplace=True)
df_raw.head(5)

Unnamed: 0,Apple,Bread,Butter,Cheese,Corn,Dill,Eggs,Ice cream,Kidney Beans,Milk,Nutmeg,Onion,Sugar,Unicorn,Yogurt,chocolate
0,False,True,False,False,True,True,False,True,False,False,False,False,True,False,True,True
1,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False
2,True,False,True,False,False,True,False,True,False,True,False,False,False,False,True,True
3,False,False,True,True,False,True,False,False,False,True,True,True,False,False,False,False
4,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False


### Put together encoded items

In [11]:

def item(row):
    items = []
    for c in df_raw.columns:
        if row[c] == True:
            items.append(c)
    return items

df_raw['items'] = df_raw.apply(item, axis=1)
df_new = df_raw[['items']]
df_new.head()

Unnamed: 0,items
0,"[Bread, Corn, Dill, Ice cream, Sugar, Yogurt, ..."
1,[Milk]
2,"[Apple, Butter, Dill, Ice cream, Milk, Yogurt,..."
3,"[Butter, Cheese, Dill, Milk, Nutmeg, Onion]"
4,"[Apple, Bread]"


In [12]:
df_exploded = df_new['items'].explode()
df = pd.DataFrame(df_exploded)
df.reset_index(inplace=True)
df = df.rename(columns={'index': 'Id'})
df.head()

Unnamed: 0,Id,items
0,0,Bread
1,0,Corn
2,0,Dill
3,0,Ice cream
4,0,Sugar


### Setup 2

In [13]:
from pycaret.arules import *
exp_ar = setup(df, transaction_id='Id', item_id='items')

Description,Value
session_id,635.0
# Transactions,999.0
# Items,16.0
Ignore Items,


### Create models and show rules

In [14]:
model2 = create_model()

In [15]:
model2.shape

(1494, 9)

In [16]:
model2.head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,"(Dill, Milk, Unicorn)",(chocolate),0.0911,0.4214,0.0621,0.6813,1.6167,0.0237,1.8155
1,"(Sugar, Cheese, Unicorn)",(Kidney Beans),0.0811,0.4084,0.0541,0.6667,1.6324,0.0209,1.7748
2,"(Cheese, Ice cream, Yogurt)",(Kidney Beans),0.0871,0.4084,0.0571,0.6552,1.6042,0.0215,1.7156
3,"(Cheese, Dill, Milk)",(chocolate),0.0841,0.4214,0.0551,0.6548,1.5537,0.0196,1.6759
4,"(Apple, Corn, Onion)",(Sugar),0.0831,0.4094,0.0541,0.6506,1.5891,0.02,1.6903
5,"(Nutmeg, Milk, Corn)",(Kidney Beans),0.0851,0.4084,0.0551,0.6471,1.5843,0.0203,1.6762
6,"(Dill, Kidney Beans, Onion)",(Cheese),0.0851,0.4044,0.0551,0.6471,1.6,0.0206,1.6875
7,"(Cheese, Dill, Unicorn)",(chocolate),0.0821,0.4214,0.0531,0.6463,1.5337,0.0185,1.636
8,"(Butter, Dill, Unicorn)",(chocolate),0.0791,0.4214,0.0511,0.6456,1.5319,0.0177,1.6324
9,"(Dill, Unicorn, Onion)",(chocolate),0.0931,0.4214,0.0601,0.6452,1.5309,0.0208,1.6305
