In [5]:
# Import libraries
import pandas as pd
from mlxtend.frequent_patterns import apriori ,association_rules
import matplotlib.pyplot as plt
import seaborn as snsno

In [2]:
#  Load data from an Excel file named "Basket_Cosmetic.xlsx"
df_sale_product=pd.read_excel("Basket_Cosmetic.xlsx")

In [3]:
#Check the shape (number of rows and columns) of the dataset
df_sale_product.shape

(4792, 10)

In [4]:
# Get information about the dataset, including data types and non-null counts
df_sale_product.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4792 entries, 0 to 4791
Data columns (total 10 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   OrderId                4792 non-null   int64  
 1   OrderQty               4792 non-null   int64  
 2   SaleAmount             4792 non-null   float64
 3   CountOrder             4792 non-null   int64  
 4   AttributeSetName       4792 non-null   object 
 5   AttributeSetId         4792 non-null   int64  
 6   CategoryName_Level2    4792 non-null   object 
 7   ProductMiddleCategory  4792 non-null   object 
 8   ProductName            4792 non-null   object 
 9   ProductCode            4792 non-null   int64  
dtypes: float64(1), int64(5), object(4)
memory usage: 374.5+ KB


In [6]:
# display the first two rows and the last row of the data to get an overview.
df_sale_product.head(2)
df_sale_product.tail(2)

Unnamed: 0,OrderId,OrderQty,SaleAmount,CountOrder,AttributeSetName,AttributeSetId,CategoryName_Level2,ProductMiddleCategory,ProductName,ProductCode
4790,365641,1,280000.0,1,آرايشي و بهداشتي,17,لوازم آرایشی,آرايشي و بهداشتي,highlighter,357021
4791,358833,1,168000.0,1,آرايشي و بهداشتي,17,لوازم آرایشی,آرايشي و بهداشتي,highlighter,357021


In [7]:
# Generate descriptive statistics for the dataset, including count, mean, min, max, etc.
df_sale_product.describe(include = 'all')

Unnamed: 0,OrderId,OrderQty,SaleAmount,CountOrder,AttributeSetName,AttributeSetId,CategoryName_Level2,ProductMiddleCategory,ProductName,ProductCode
count,4792.0,4792.0,4792.0,4792.0,4792,4792.0,4792,4792,4792,4792.0
unique,,,,,1,,6,3,32,
top,,,,,آرايشي و بهداشتي,,لوازم آرایشی,آرايشي و بهداشتي,perfume,
freq,,,,,4792,,1985,4699,750,
mean,366736.435309,1.137312,146812.3,1.000835,,17.0,,,,474233.797579
std,8837.134977,0.735771,210654.1,0.028883,,0.0,,,,91167.328871
min,352685.0,0.0,0.0,1.0,,17.0,,,,253867.0
25%,358701.0,1.0,44000.0,1.0,,17.0,,,,473203.0
50%,366208.0,1.0,89000.0,1.0,,17.0,,,,501795.0
75%,374290.0,1.0,178000.0,1.0,,17.0,,,,540831.0


In [8]:
# Insert a new column named 'quantity' with a default value of 1 at position 9
df_sale_product.insert(9, 'quantity',1)

In [10]:
# Group and pivot the data to create a basket of products by OrderId
product_basket = (df_sale_product.groupby(['OrderId', 'ProductName'])['quantity']).sum().unstack().reset_index().fillna(0).set_index('OrderId')

In [11]:
# Define a function to convert values to 0 if they are less than or equal to 0, and 1 if greater than or equal to 1
def convto0(x):
    if (x<=0):
        return 0
    if (x>=1):
        return 1
# Apply the conversion function to the product_basket dataset
Basket_sets = product_basket.applymap(convto0)
Basket_sets.head()

ProductName,Gel creame,Mascara,Rough skin,Sunscreen,Toner,Women's mom stick,body lotion,body shampoo,body splash,coclear stick,...,moisturizing lotion,nail polish,perfume,shampoo,skin cream,skin foam,skin primer,soap,soft cream,spray
OrderId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
352685,0,0,0,0,0,0,0,0,0,0,...,0,1,1,0,0,0,0,0,0,0
352697,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
352740,0,0,0,1,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
352743,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,1,0
352761,1,1,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


In [12]:
# Use the Apriori algorithm to mine frequent itemsets from the basket dataset
frequent_itemsets = apriori(Basket_sets.astype('bool'), min_support = 0.0008, use_colnames = True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets

Unnamed: 0,support,itemsets,length
0,0.058560,(Gel creame),1
1,0.059374,(Mascara),1
2,0.010167,(Rough skin),1
3,0.103294,(Sunscreen),1
4,0.017893,(Toner ),1
...,...,...,...
702,0.000813,"(eye shadow, nail polish, eye liner, perfume, ...",5
703,0.000813,"(micellar water, nail polish, eye liner, perfu...",5
704,0.000813,"(micellar water, eye shadow, nail polish, eye ...",5
705,0.000813,"(micellar water, eye shadow, nail polish, perf...",5


In [13]:
# Filter the itemsets where the length is greater than or equal to 1
lenght=frequent_itemsets['itemsets'].str.len()
Filter1=lenght>=1
frequent_itemsets[Filter1]

Unnamed: 0,support,itemsets,length
0,0.058560,(Gel creame),1
1,0.059374,(Mascara),1
2,0.010167,(Rough skin),1
3,0.103294,(Sunscreen),1
4,0.017893,(Toner ),1
...,...,...,...
702,0.000813,"(eye shadow, nail polish, eye liner, perfume, ...",5
703,0.000813,"(micellar water, nail polish, eye liner, perfu...",5
704,0.000813,"(micellar water, eye shadow, nail polish, eye ...",5
705,0.000813,"(micellar water, eye shadow, nail polish, perf...",5


In [14]:
# Generate association rules from the frequent itemsets based on support
association_rules_data = association_rules(frequent_itemsets, metric = 'support', min_threshold = 0.0008)
association_rules_data

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Gel creame),(Mascara),0.058560,0.059374,0.002033,0.034722,0.584808,-0.001444,0.974462
1,(Mascara),(Gel creame),0.059374,0.058560,0.002033,0.034247,0.584808,-0.001444,0.974824
2,(Sunscreen),(Gel creame),0.103294,0.058560,0.006507,0.062992,1.075678,0.000458,1.004730
3,(Gel creame),(Sunscreen),0.058560,0.103294,0.006507,0.111111,1.075678,0.000458,1.008794
4,(Gel creame),(Women's mom stick),0.058560,0.013827,0.001220,0.020833,1.506740,0.000410,1.007156
...,...,...,...,...,...,...,...,...,...
3365,(eye shadow),"(micellar water, nail polish, eye liner, perfu...",0.042294,0.000813,0.000813,0.019231,23.644231,0.000779,1.018779
3366,(nail polish),"(micellar water, eye shadow, eye liner, perfum...",0.071980,0.000813,0.000813,0.011299,13.892655,0.000755,1.010606
3367,(eye liner),"(micellar water, eye shadow, nail polish, perf...",0.028467,0.000813,0.000813,0.028571,35.128571,0.000790,1.028575
3368,(perfume),"(micellar water, eye shadow, nail polish, eye ...",0.228955,0.000813,0.000813,0.003552,4.367673,0.000627,1.002749


In [15]:
association_rules_data.dtypes

antecedents            object
consequents            object
antecedent support    float64
consequent support    float64
support               float64
confidence            float64
lift                  float64
leverage              float64
conviction            float64
dtype: object

In [16]:
# Export the rules to an Excel file named 'BasketAnalysisOutput.xlsx'
excell=association_rules_data.to_excel('BasketAnalysisOutput.xlsx')