In [None]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

from google.colab import files
uploaded=files.upload()

### Use the Online retail dataset to apply the association rules.

In [None]:
# Load the data
data = pd.read_excel(r"Copy of Online retail.xlsx", header=None)
data.head()

Unnamed: 0,0
0,"shrimp,almonds,avocado,vegetables mix,green gr..."
1,"burgers,meatballs,eggs"
2,chutney
3,"turkey,avocado"
4,"mineral water,milk,energy bar,whole wheat rice..."


In [None]:
data.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7501 entries, 0 to 7500
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   0       7501 non-null   object
dtypes: object(1)
memory usage: 58.7+ KB


In [None]:
data.isnull().sum()

0    0
dtype: int64

In [None]:
data.describe()

Unnamed: 0,0
count,7501
unique,5176
top,cookies
freq,223


In [None]:
data.columns = ['Transactions']

In [None]:
# Split the transactions into a list of items
data['Transactions'] = data['Transactions'].apply(lambda x: x.split(','))

In [None]:
# Convert transactions into the required format
te = TransactionEncoder()
te_ary = te.fit(data['Transactions']).transform(data['Transactions'])
df = pd.DataFrame(te_ary, columns=te.columns_)
df

Unnamed: 0,asparagus,almonds,antioxydant juice,asparagus.1,avocado,babies food,bacon,barbecue sauce,black tea,blueberries,...,turkey,vegetables mix,water spray,white wine,whole weat flour,whole wheat pasta,whole wheat rice,yams,yogurt cake,zucchini
0,False,True,True,False,True,False,False,False,False,False,...,False,True,False,False,True,False,False,True,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,True,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7496,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7497,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7498,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7499,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [None]:
# Apply the Apriori algorithm
frequent_itemsets = apriori(df, min_support=0.01, use_colnames=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.020397,(almonds)
1,0.033329,(avocado)
2,0.010799,(barbecue sauce)
3,0.014265,(black tea)
4,0.011465,(body spray)
...,...,...
252,0.011065,"(milk, mineral water, ground beef)"
253,0.017064,"(ground beef, mineral water, spaghetti)"
254,0.015731,"(milk, mineral water, spaghetti)"
255,0.010265,"(mineral water, olive oil, spaghetti)"


In [None]:
# Generate the association rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

# Display the rules
print(rules.head())

       antecedents      consequents  antecedent support  consequent support  \
0  (mineral water)        (avocado)            0.238368            0.033329   
1        (avocado)  (mineral water)            0.033329            0.238368   
2        (burgers)           (cake)            0.087188            0.081056   
3           (cake)        (burgers)            0.081056            0.087188   
4        (burgers)      (chocolate)            0.087188            0.163845   

    support  confidence      lift  leverage  conviction  zhangs_metric  
0  0.011598    0.048658  1.459926  0.003654    1.016113       0.413630  
1  0.011598    0.348000  1.459926  0.003654    1.168147       0.325896  
2  0.011465    0.131498  1.622319  0.004398    1.058080       0.420238  
3  0.011465    0.141447  1.622319  0.004398    1.063198       0.417434  
4  0.017064    0.195719  1.194537  0.002779    1.039630       0.178411  


In [None]:
# Display the rules
print("Top 5 rules based on lift:")
print(rules.nlargest(5, 'lift'))

# Sort the rules by confidence and display the top 5
print("\nTop 5 rules based on confidence:")
print(rules.nlargest(5, 'confidence'))

# Sort the rules by support and display the top 5
print("\nTop 5 rules based on support:")
print(rules.nlargest(5, 'support'))

# Further analysis to interpret the results
# Identify frequent antecedents and consequents
frequent_antecedents = rules['antecedents'].value_counts().head()
frequent_consequents = rules['consequents'].value_counts().head()

print("\nMost frequent antecedents:")
print(frequent_antecedents)

print("\nMost frequent consequents:")
print(frequent_consequents)

# Example of interpreting a rule
example_rule = rules.iloc[0]
antecedents = list(example_rule['antecedents'])
consequents = list(example_rule['consequents'])
confidence = example_rule['confidence']
lift = example_rule['lift']

print(f"\nExample Rule Interpretation:\nIf a customer buys {antecedents}, they are likely to buy {consequents} as well.")
print(f"Confidence: {confidence}, Lift: {lift}")

Top 5 rules based on lift:
                    antecedents                 consequents  \
214             (herb & pepper)               (ground beef)   
215               (ground beef)             (herb & pepper)   
384  (mineral water, spaghetti)               (ground beef)   
385               (ground beef)  (mineral water, spaghetti)   
395  (mineral water, spaghetti)                 (olive oil)   

     antecedent support  consequent support   support  confidence      lift  \
214            0.049460            0.098254  0.015998    0.323450  3.291994   
215            0.098254            0.049460  0.015998    0.162822  3.291994   
384            0.059725            0.098254  0.017064    0.285714  2.907928   
385            0.098254            0.059725  0.017064    0.173677  2.907928   
395            0.059725            0.065858  0.010265    0.171875  2.609786   

     leverage  conviction  zhangs_metric  
214  0.011138    1.332860       0.732460  
215  0.011138    1.135410       0