In [70]:
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori

In [71]:
def generate_dataset(num_transactions, num_items, avg_items_per_transaction):
	np.random.seed(42)
	items = [f"item{i}" for i in range(1, num_items + 1)]
	data = []
	for _ in range(num_transactions):
		num_items_in_transaction = np.random.poisson(avg_items_per_transaction)
		transaction = np.random.choice(items, size=num_items_in_transaction, replace=False)
		data.append(transaction.tolist())
	return data

In [87]:
data = generate_dataset(num_transactions=10, num_items=6, avg_items_per_transaction=5)
df = pd.DataFrame(data)
min_support=0.2

In [112]:
encoded_data = pd.get_dummies(df.stack()).groupby(level=0).max()
frequent_item_sets = apriori(encoded_data, min_support=min_support, use_colnames=True, low_memory=True)
frequent_item_sets = frequent_item_sets[frequent_item_sets['itemsets'].apply(len) > 1]

sorted_freq = frequent_item_sets.sort_values(by='support', ascending=False)
print(sorted_freq)

    support                             itemsets
16      0.6                       (item3, item5)
10      0.5                       (item1, item6)
14      0.5                       (item2, item6)
6       0.4                       (item2, item1)
18      0.4                       (item4, item5)
36      0.4                (item2, item6, item5)
35      0.4                (item4, item2, item6)
7       0.4                       (item1, item3)
24      0.4                (item2, item1, item6)
20      0.4                       (item6, item5)
19      0.4                       (item4, item6)
33      0.4                (item2, item3, item6)
17      0.4                       (item3, item6)
9       0.4                       (item1, item5)
13      0.4                       (item2, item5)
12      0.4                       (item4, item2)
15      0.4                       (item4, item3)
8       0.4                       (item4, item1)
11      0.4                       (item2, item3)
54      0.3         