<a href="https://colab.research.google.com/github/juliuslaggah/Calculator_using_C-/blob/main/BasketAssignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install mlxtend
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

1. Uploading the Files

In [None]:
from google.colab import files
uploaded = files.upload()


2. Loading and Exploring the Data

In [None]:
# Count the number of transactions in Sales1998.txt
with open('Sales1998.txt', 'r') as f:
    transactions = f.readlines()

print("Number of transactions in Sales1998.txt:", len(transactions))

# Process each line in productList.txt to build product_dict
product_lines = []
with open('productList.txt', 'r') as f:
    product_lines = f.readlines()

# Use regex to extract product ID and name
import re
product_dict = {}
for line in product_lines:
    match = re.match(r'(\d+)\s+"(.+)"', line.strip())
    if match:
        prod_id = match.group(1)
        prod_name = match.group(2)
        product_dict[prod_id] = prod_name

print("Number of products in productList.txt:", len(product_dict))

# Optional: Preview a few mappings
print("\nSample Product Mappings:")
for pid, pname in list(product_dict.items())[:10]:
    print(f"{pid}: {pname}")


3. Preparing the Transactions Data

In [None]:

transactions_split = [line.strip().split() for line in transactions]
print("Example transaction (product IDs):", transactions_split[0])


In [None]:
from mlxtend.preprocessing import TransactionEncoder
import pandas as pd
transactions_split = [line.strip().split() for line in transactions]
transactions_named = [
    [product_dict.get(pid, f"Unknown_{pid}") for pid in basket]
    for basket in transactions_split
]

# Encode transactions with product names
te = TransactionEncoder()
te_array = te.fit(transactions_named).transform(transactions_named)
df = pd.DataFrame(te_array, columns=te.columns_)
print("One-hot encoded transaction sample:")
print(df.head(10))


4. Applying the FP-Growth Algorithm

In [None]:
from mlxtend.frequent_patterns import fpgrowth
frequent_itemsets_fp = fpgrowth(df, min_support=0.0001, use_colnames=True)
frequent_itemsets_fp['item_count'] = frequent_itemsets_fp['itemsets'].apply(lambda x: len(x))
multi_itemsets = frequent_itemsets_fp[frequent_itemsets_fp['item_count'] > 1].copy()
multi_itemsets['itemsets'] = multi_itemsets['itemsets'].apply(lambda x: ', '.join(x))
print(f"Total Multi-Item Frequent Itemsets Found: {len(multi_itemsets)}")
print(multi_itemsets[['support', 'itemsets']].head(10))


5. Generating Association Rules

In [48]:
from mlxtend.frequent_patterns import association_rules

# Generate association rules
rules_fp = association_rules(frequent_itemsets_fp, metric="confidence", min_threshold=0.05)

# Convert frozen sets to readable strings
rules_fp['antecedents'] = rules_fp['antecedents'].apply(lambda x: list(x))
rules_fp['consequents'] = rules_fp['consequents'].apply(lambda x: list(x))

# Sort by confidence
rules_fp_sorted = rules_fp.sort_values(by='confidence', ascending=False)

# Display top 10 rules in plain language
print(f"\nTotal Association Rules Found: {len(rules_fp_sorted)}")
print("\nTop 20 Association Rules in Plain Language:\n")
for idx, row in rules_fp_sorted.head(20).iterrows():
    antecedents = ', '.join(row['antecedents'])
    consequents = ', '.join(row['consequents'])
    print(f"If a customer buys [{antecedents}], they are also likely to buy [{consequents}].")
    print(f"  - Support: {row['support']:.4f}, Confidence: {row['confidence']:.2f}, Lift: {row['lift']:.2f}\n")



Total Association Rules Found: 6421

Top 20 Association Rules in Plain Language:

If a customer buys [Super Creamy Peanut Butter, Ebony Garlic], they are also likely to buy [Faux Products Silky Smooth Hair Conditioner].
  - Support: 0.0001, Confidence: 1.00, Lift: 448.29

If a customer buys [Golden Frozen Mushroom Pizza, Lake Low Fat Cole Slaw], they are also likely to buy [Club Low Fat Cottage Cheese].
  - Support: 0.0001, Confidence: 1.00, Lift: 370.33

If a customer buys [Lake Low Fat Cole Slaw, Club Low Fat Cottage Cheese], they are also likely to buy [Golden Frozen Mushroom Pizza].
  - Support: 0.0001, Confidence: 1.00, Lift: 306.94

If a customer buys [Gulf Coast White Chocolate Bar, American Corned Beef], they are also likely to buy [High Top Garlic].
  - Support: 0.0001, Confidence: 1.00, Lift: 330.78

If a customer buys [Thresher Mint Chocolate Bar, Big Time Popsicles], they are also likely to buy [Bravo Canned Tuna in Oil].
  - Support: 0.0001, Confidence: 1.00, Lift: 324.48