In [None]:
from itertools import combinations

In [None]:
def load_data(filename):
    transactions = []
    with open(filename, 'r') as file:
        for line in file:
            transaction = line.strip().split(',')
            transactions.append(transaction)
    return transactions
transactions = load_data('store_data.csv')

In [None]:
transactions

[['shrimp',
  'almonds',
  'avocado',
  'vegetables mix',
  'green grapes',
  'whole weat flour',
  'yams',
  'cottage cheese',
  'energy drink',
  'tomato juice',
  'low fat yogurt',
  'green tea',
  'honey',
  'salad',
  'mineral water',
  'salmon',
  'antioxydant juice',
  'frozen smoothie',
  'spinach',
  'olive oil'],
 ['burgers', 'meatballs', 'eggs'],
 ['chutney'],
 ['turkey', 'avocado'],
 ['mineral water', 'milk', 'energy bar', 'whole wheat rice', 'green tea'],
 ['low fat yogurt'],
 ['whole wheat pasta', 'french fries'],
 ['soup', 'light cream', 'shallot'],
 ['frozen vegetables', 'spaghetti', 'green tea'],
 ['french fries'],
 ['eggs', 'pet food'],
 ['cookies'],
 ['turkey', 'burgers', 'mineral water', 'eggs', 'cooking oil'],
 ['spaghetti', 'champagne', 'cookies'],
 ['mineral water', 'salmon'],
 ['mineral water'],
 ['shrimp',
  'chocolate',
  'chicken',
  'honey',
  'oil',
  'cooking oil',
  'low fat yogurt'],
 ['turkey', 'eggs'],
 ['turkey',
  'fresh tuna',
  'tomatoes',
  'spagh

In [None]:
def generate_candidates(prev_candidates, k):
    candidates = {}
    prev_candidates_list = list(prev_candidates.keys())

    for i in range(len(prev_candidates_list)):
        for j in range(i + 1, len(prev_candidates_list)):
            itemset1 = prev_candidates_list[i]
            itemset2 = prev_candidates_list[j]

            items1 = sorted(list(itemset1))
            items2 = sorted(list(itemset2))

            if items1[:k-2] == items2[:k-2]:
                new_itemset = tuple(sorted(set(items1 + items2)))
                candidates[new_itemset] = 0
    return candidates

In [None]:
def generate_frequent_itemsets(transactions, min_support):
    itemsets = {}
    candidates = {}
    frequent_itemsets = {}
    for transaction in transactions:
        for item in transaction:
            itemsets[item] = itemsets.get(item, 0) + 1
    for item, count in itemsets.items():
        support = count / len(transactions)
        if support >= min_support:
            frequent_itemsets[(item,)] = support
            candidates[(item,)] = count
    k = 2
    while candidates:
        candidates = generate_candidates(candidates, k)
        for transaction in transactions:
            for candidate in candidates:
                if set(candidate).issubset(set(transaction)):
                    candidates[candidate] += 1
        candidates = {itemset: count for itemset, count in candidates.items()
                      if count / len(transactions) >= min_support}
        frequent_itemsets.update(candidates)
        k += 1
    return frequent_itemsets

In [None]:
def generate_association_rules(frequent_itemsets, min_confidence):
    association_rules = []

    for itemset in frequent_itemsets:
        if len(itemset) >= 2:
            subsets = list(combinations(itemset, 1))
            for i in range(1, len(itemset)):
                subsets.extend(list(combinations(itemset, i)))

            for subset in subsets:
                remaining = tuple(sorted(set(itemset) - set(subset)))
                if subset in frequent_itemsets and remaining in frequent_itemsets:
                    confidence = frequent_itemsets[itemset] / frequent_itemsets[subset]
                    if confidence >= min_confidence:
                        association_rules.append((subset, remaining, confidence))

    return association_rules


In [None]:
min_support = 0.01
min_confidence = 0.01

frequent_itemsets = generate_frequent_itemsets(transactions, min_support)
association_rules = generate_association_rules(frequent_itemsets, min_confidence)

print("Frequent Itemsets:")
for itemset, support in frequent_itemsets.items():
    print(itemset, "Support:", support)

print("\nAssociation Rules:")
for rule in association_rules:
    antecedent, consequent, confidence = rule
    print(antecedent, "->", consequent, "Confidence:", confidence)


Frequent Itemsets:
('shrimp',) Support: 0.07145713904812692
('almonds',) Support: 0.020397280362618318
('avocado',) Support: 0.03332888948140248
('vegetables mix',) Support: 0.025729902679642713
('yams',) Support: 0.011465137981602452
('cottage cheese',) Support: 0.03186241834422077
('energy drink',) Support: 0.026663111585121985
('tomato juice',) Support: 0.030395947207039063
('low fat yogurt',) Support: 0.07652313024930009
('green tea',) Support: 0.13211571790427942
('honey',) Support: 0.047460338621517134
('mineral water',) Support: 0.23836821757099053
('salmon',) Support: 0.04252766297826956
('frozen smoothie',) Support: 0.06332489001466471
('olive oil',) Support: 0.0658578856152513
('burgers',) Support: 0.0871883748833489
('meatballs',) Support: 0.020930542594320756
('eggs',) Support: 0.17970937208372217
('turkey',) Support: 0.06252499666711105
('milk',) Support: 0.12958272230369283
('energy bar',) Support: 0.027063058258898813
('whole wheat rice',) Support: 0.058525529929342755
(