In [1]:
# 📘 Frequent Itemsets Analysis - Group 10
# Kevin Korir - Data Simulation Task

# %% [markdown]
# ## 🛍️ PART 1: Simulate Supermarket Transactions
# [Student: Kevin] This section generates 3000 fake supermarket transactions.
# Each transaction contains 2–7 items randomly selected from a pool of 30 items.
# The output is saved to `supermarket_transactions.csv`.

# %%
# [Student: Kevin] Import required libraries
import random
import pandas as pd

# [Student: Kevin] Define a list of 30+ unique supermarket items
items = [
    'Milk', 'Bread', 'Butter', 'Cheese', 'Eggs', 'Juice', 'Apples', 'Bananas',
    'Chicken', 'Beef', 'Fish', 'Rice', 'Pasta', 'Tomatoes', 'Onions', 'Potatoes',
    'Carrots', 'Peppers', 'Yogurt', 'Cereal', 'Chips', 'Soda', 'Cookies', 'Ice Cream',
    'Toilet Paper', 'Soap', 'Shampoo', 'Toothpaste', 'Detergent', 'Coffee', 'Tea', 'Water'
]

# [Student: Kevin] Generate 3000 transactions
transactions = []
for _ in range(3000):
    transaction = random.sample(items, k=random.randint(2, 7))  # 2 to 7 random items
    transactions.append(transaction)

# [Student: Kevin] Convert list of transactions into a DataFrame for inspection
transaction_df = pd.DataFrame({'Transaction': transactions})
transaction_df.head()

# [Student: Kevin] Save transactions to CSV (as comma-separated items per row)
transaction_df.to_csv("supermarket_transactions.csv", index=False)
print("✅ supermarket_transactions.csv created with 3000 transactions.")

# %% [markdown]
# ## 🔢 PART 2 and Beyond (To be implemented by other group members)
# [Student: Margret] One-hot encode transactions for Apriori
# [Student: Geoffrey] Generate frequent itemsets using apriori()
# [Student: Bricole] Identify closed frequent itemsets
# [Student: Bricole] Identify maximal frequent itemsets
# Each member must add inline comments to their section of code
# All output files will be saved in .csv format as required


✅ supermarket_transactions.csv created with 3000 transactions.


## PART 2 and Beyond (To be implemented by other group members)
[Student: Margaret] One-hot encode transactions for Apriori


In [2]:
# PART 2: One-Hot Encoding for Apriori
# [Student: Margaret] This section converts the transaction data into one-hot encoded format.
# The output is saved as `onehot_transactions.csv` for use in Apriori analysis.

# %%
# [Student: Margaret] Import required libraries
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
import ast

# %%
# [Student: Margaret] Load the transaction data
df = pd.read_csv("supermarket_transactions.csv")

# Convert string representation of lists to actual lists
transactions = df['Transaction'].apply(ast.literal_eval).tolist()

# %%
# [Student: Margaret] Apply one-hot encoding
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)

# Convert to DataFrame
onehot_df = pd.DataFrame(te_ary, columns=te.columns_)

# Preview
onehot_df.head()

# %%
# [Student: Margaret] Save the one-hot encoded data
onehot_df.to_csv("onehot_transactions.csv", index=False)
print("✅ onehot_transactions.csv created for Apriori analysis.")


✅ onehot_transactions.csv created for Apriori analysis.
