In [2]:
!pip install mlxtend

Collecting mlxtend
  Downloading mlxtend-0.23.4-py3-none-any.whl.metadata (7.3 kB)
Collecting scikit-learn>=1.3.1 (from mlxtend)
  Downloading scikit_learn-1.6.1-cp311-cp311-win_amd64.whl.metadata (15 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn>=1.3.1->mlxtend)
  Downloading threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)
Downloading mlxtend-0.23.4-py3-none-any.whl (1.4 MB)
   ---------------------------------------- 0.0/1.4 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.4 MB ? eta -:--:--
    --------------------------------------- 0.0/1.4 MB ? eta -:--:--
   - -------------------------------------- 0.0/1.4 MB 653.6 kB/s eta 0:00:03
   -- ------------------------------------- 0.1/1.4 MB 762.6 kB/s eta 0:00:02
   --- ------------------------------------ 0.1/1.4 MB 722.1 kB/s eta 0:00:02
   ------ --------------------------------- 0.2/1.4 MB 1.1 MB/s eta 0:00:02
   -------- ------------------------------- 0.3/1.4 MB 1.1 MB/s eta 0:00:01
   ----------

In [None]:
import numpy as np 
import pandas as pd 
from mlxtend.frequent_patterns import apriori, association_rules 

In [None]:
cust_data = pd.read_excel('Customer_Shopping_data.xlsx') 
cust_data.head() 


In [None]:
# Remove the unnecessary spaces in the description
cust_data['Description'] = cust_data['Description'].str.strip() 

# Dropping the rows without any invoice number 
cust_data.dropna(axis = 0, subset =['InvoiceNo'], inplace = True) 
cust_data['InvoiceNo'] = cust_data['InvoiceNo'].astype('str') 


In [None]:
cust_data

In [None]:
cust_data['Country'].value_counts()

In [None]:
# Transactions analysis and association in United Kingdom  

cust_data_UK = (cust_data[cust_data['Country'] =="United Kingdom"] 
		.groupby(['InvoiceNo', 'Description'])['Quantity'] 
		.sum().unstack().reset_index().fillna(0) 
		.set_index('InvoiceNo'))

In [None]:
cust_data_UK

In [None]:
def one_hot_encoding(x): 
    if(x<= 0): 
        return 0
    if(x>= 1): 
        return 1

In [None]:
cust_data_UK_encoded = cust_data_UK.map(one_hot_encoding) 

In [None]:
cust_data_UK_encoded

In [None]:
# Building the model 
frq_items = apriori(cust_data_UK_encoded, min_support = 0.01, use_colnames = True) 

# Collecting the inferred rules in a dataframe 
rules = association_rules(frq_items, metric ="lift", min_threshold = 1) 
rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False]) 

In [None]:
rules.head()

In [None]:
# Experiment with different min_support values
for min_sup in [0.01, 0.02, 0.05]:
    frq_items = apriori(cust_data_UK_encoded, min_support=min_sup, use_colnames=True)
    rules = association_rules(frq_items, metric="lift", min_threshold=1)
    print(f"Number of rules with min_support {min_sup}: {len(rules)}")