In [1]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth, association_rules
import random

In [None]:
random.seed(42)

In [2]:
DATA_SOURCE = "/Users/mac/Desktop/DSTI_Project/Machine_Learning/Project 2/archive/"

In [3]:
orders = pd.read_csv(DATA_SOURCE + "orders.csv")
order_products_train = pd.read_csv(DATA_SOURCE + "order_products__train.csv")
order_products_prior = pd.read_csv(DATA_SOURCE + "order_products__prior.csv")
products = pd.read_csv(DATA_SOURCE + "products.csv")

In [4]:
products["quanity"] = [random.randint(1, 30) for _ in range(len(products))]

In [6]:
order_products = pd.concat([order_products_train, order_products_prior])

In [7]:
data = order_products.merge(products, on="product_id", how="left")

In [9]:
data = data[['order_id', 'product_name']]

In [10]:
TOP_N_PRODUCTS = 10

In [11]:
top_products = (
    data['product_name']
    .value_counts()
    .head(TOP_N_PRODUCTS)
    .index
)

In [12]:
data = data[data['product_name'].isin(top_products)]

In [13]:
data.head(10)

Unnamed: 0,order_id,product_name
5,1,Bag of Organic Bananas
6,1,Organic Hass Avocado
35,98,Bag of Organic Bananas
85,112,Organic Hass Avocado
94,170,Bag of Organic Bananas
104,170,Organic Strawberries
115,226,Banana
143,393,Strawberries
154,456,Large Lemon
156,473,Banana


In [14]:
basket = data.groupby('order_id')['product_name'].apply(list).tolist()

In [16]:
te = TransactionEncoder()
te_ary = te.fit(basket).transform(basket)
df = pd.DataFrame(te_ary, columns=te.columns_)

In [18]:
frequent_itemsets = fpgrowth(df, min_support = 0.00006, use_colnames=True, max_len = 3)

In [19]:
rules = association_rules(
    frequent_itemsets,
    metric="lift",
    min_threshold = 1.5
)

In [20]:
rules = rules.sort_values(by= ["lift"], ascending=False)

In [21]:
rules[["antecedents", "consequents", "support", "confidence", "lift"]].head(10)

Unnamed: 0,antecedents,consequents,support,confidence,lift
73,(Large Lemon),"(Limes, Strawberries)",0.00156,0.015156,2.81969
70,"(Limes, Strawberries)",(Large Lemon),0.00156,0.290154,2.81969
41,"(Limes, Organic Avocado)",(Large Lemon),0.004214,0.285677,2.776188
44,(Large Lemon),"(Limes, Organic Avocado)",0.004214,0.040947,2.776188
43,(Limes),"(Large Lemon, Organic Avocado)",0.004214,0.044893,2.722507
42,"(Large Lemon, Organic Avocado)",(Limes),0.004214,0.255531,2.722507
68,(Large Lemon),"(Banana, Limes)",0.005247,0.050985,2.406656
67,"(Banana, Limes)",(Large Lemon),0.005247,0.247651,2.406656
47,"(Organic Hass Avocado, Large Lemon)",(Limes),0.002259,0.224298,2.389744
48,(Limes),"(Organic Hass Avocado, Large Lemon)",0.002259,0.024069,2.389744
