Introduction to Market Basket Analysis in Python

http://pbpython.com/market-basket-analysis.html

In [None]:
!pip install mlxtend

In [None]:
import mlxtend

mlxtend.__version__

In [None]:
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [None]:
pd.__version__

In [None]:
bb_df = pd.read_csv("https://raw.githubusercontent.com/manaranjanp/IIMBClasses/main/recsys/BB_2014.csv")
bb_df.head()

In [None]:
len(bb_df.Order.unique())

In [None]:
len(bb_df.SKU.unique())

In [None]:
sku_desc_dict = dict(zip(bb_df.SKU, bb_df.Description))

In [None]:
len(sku_desc_dict)

In [None]:
bb_df.drop( 'Description', axis = 1, inplace = True)

In [None]:
bb_df['Quantity'] = 1

In [None]:
bb_df.head()

In [None]:
bb_df.info()

In [None]:
bb_pivot_df = bb_df.pivot( index='Order', 
                          columns='SKU', 
                          values = "Quantity" ).reset_index(drop=True)

In [None]:
bb_pivot_df.head(5)

In [None]:
bb_pivot_df.fillna( 0, inplace = True )

In [None]:
bb_pivot_df.head(5)

In [None]:
bb_pivot_df.info()

In [None]:
bb_pivot_df.shape

In [None]:
frequent_itemsets = apriori(bb_pivot_df, 
                            min_support=0.003, 
                            use_colnames=True)

In [None]:
frequent_itemsets.tail(10)

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.hist(frequent_itemsets.support, bins = 20);

In [None]:
rules = association_rules(frequent_itemsets, 
                          metric="lift", 
                          min_threshold=1)
rules.head(100)

In [None]:
rules.info()

In [None]:
def get_sku_description( skus ):
    all_skus = []
    for sku in skus:
         all_skus.append(sku_desc_dict[sku])
    return ",".join(all_skus)        

In [None]:
rules.antecedents[0]

In [None]:
rules['antecedents_desc'] = rules.antecedents.map(lambda rec: sku_desc_dict[list(rec)[0]])

In [None]:
rules['consequents_desc'] = rules.consequents.map(lambda rec: sku_desc_dict[list(rec)[0]])

In [None]:
rules.sort_values('confidence', ascending=False)