In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

# Load the dataset
file_path = '/content/drive/MyDrive/Data Mining Project/sales short.xlsx'
data = pd.read_excel(file_path)

# Preprocess the data
# Convert 'Date' to datetime
data['Date'] = pd.to_datetime(data['Date'], errors='coerce')

# Drop rows with missing values
data = data.dropna()

# Create a basket format of the data
basket = data.groupby(['Invoice Number', 'Item Description'])['Quantity'].sum().unstack().reset_index().fillna(0).set_index('Invoice Number')

# Encode the quantities: if the quantity is greater than 0, set it to 1, otherwise 0
def encode_units(x):
    return 1 if x >= 1 else 0

basket = basket.applymap(encode_units)

# Apply the Apriori algorithm to find frequent itemsets
frequent_itemsets = apriori(basket, min_support=0.01, use_colnames=True)

# Generate the association rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

# Display the association rules
rules.sort_values('lift', ascending=False, inplace=True)
rules.head()




Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
17,(Face mask Disposable 3PLY Of 50 Pcs),(Surgical Gloves 7.5),0.193031,0.039024,0.016028,0.083032,2.127708,0.008495,1.047993,0.656792
16,(Surgical Gloves 7.5),(Face mask Disposable 3PLY Of 50 Pcs),0.039024,0.193031,0.016028,0.410714,2.127708,0.008495,1.369401,0.551534
3,(Ibuprofen Denk 400 Mg Of 10 Tabs.),(Face mask Disposable 3PLY Of 50 Pcs),0.027875,0.193031,0.01115,0.4,2.072202,0.005769,1.344948,0.532258
2,(Face mask Disposable 3PLY Of 50 Pcs),(Ibuprofen Denk 400 Mg Of 10 Tabs.),0.193031,0.027875,0.01115,0.057762,2.072202,0.005769,1.031719,0.641192
19,(Volfast Powder 50 Mg),(Face mask Disposable 3PLY Of 50 Pcs),0.044599,0.193031,0.017422,0.390625,2.023635,0.008813,1.324256,0.529453


In [3]:
import plotly.express as px

# Convert frozensets to strings
rules['antecedents'] = rules['antecedents'].apply(lambda x: ', '.join(list(x)))
rules['consequents'] = rules['consequents'].apply(lambda x: ', '.join(list(x)))

# Visualize the support, confidence, and lift of the association rules
fig = px.scatter(rules, x='support', y='confidence',
                 size='lift', color='lift',
                 hover_data=['antecedents', 'consequents'],
                 title='Association Rules')

fig.show()

# Visualize the top 10 association rules by lift
top10_rules = rules.nlargest(10, 'lift')
fig2 = px.bar(top10_rules, x='antecedents', y='lift', color='confidence',
              hover_data=['consequents', 'support'],
              title='Top 10 Association Rules by Lift')

fig2.show()


  and should_run_async(code)


In [4]:
# Output rules in Markdown format
markdown_rules = rules.apply(lambda x: f"- **Rule:** {x['antecedents']} -> {x['consequents']}\n  - Support: {x['support']:.4f}\n  - Confidence: {x['confidence']:.4f}\n  - Lift: {x['lift']:.4f}", axis=1)

# Print the rules in Markdown format
for rule in markdown_rules:
    print(rule)
    print()


- **Rule:** Face mask  Disposable 3PLY Of 50 Pcs -> Surgical Gloves  7.5
  - Support: 0.0160
  - Confidence: 0.0830
  - Lift: 2.1277

- **Rule:** Surgical Gloves  7.5 -> Face mask  Disposable 3PLY Of 50 Pcs
  - Support: 0.0160
  - Confidence: 0.4107
  - Lift: 2.1277

- **Rule:** Ibuprofen Denk 400 Mg Of 10 Tabs. -> Face mask  Disposable 3PLY Of 50 Pcs
  - Support: 0.0111
  - Confidence: 0.4000
  - Lift: 2.0722

- **Rule:** Face mask  Disposable 3PLY Of 50 Pcs -> Ibuprofen Denk 400 Mg Of 10 Tabs.
  - Support: 0.0111
  - Confidence: 0.0578
  - Lift: 2.0722

- **Rule:** Volfast Powder 50 Mg -> Face mask  Disposable 3PLY Of 50 Pcs
  - Support: 0.0174
  - Confidence: 0.3906
  - Lift: 2.0236

- **Rule:** Face mask  Disposable 3PLY Of 50 Pcs -> Volfast Powder 50 Mg
  - Support: 0.0174
  - Confidence: 0.0903
  - Lift: 2.0236

- **Rule:** Paracetamol 500 Mg Of 10 Tabs. -> Face mask  Disposable 3PLY Of 50 Pcs
  - Support: 0.0230
  - Confidence: 0.3837
  - Lift: 1.9879

- **Rule:** Face mask  Dis


`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.

