In [9]:
import pandas as pd
from itertools import combinations

In [10]:
dataset = pd.read_csv('./data_10.csv', header=None)
dataset = dataset.fillna(0)

In [11]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7501 entries, 0 to 7500
Data columns (total 20 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   0       7501 non-null   object
 1   1       7501 non-null   object
 2   2       7501 non-null   object
 3   3       7501 non-null   object
 4   4       7501 non-null   object
 5   5       7501 non-null   object
 6   6       7501 non-null   object
 7   7       7501 non-null   object
 8   8       7501 non-null   object
 9   9       7501 non-null   object
 10  10      7501 non-null   object
 11  11      7501 non-null   object
 12  12      7501 non-null   object
 13  13      7501 non-null   object
 14  14      7501 non-null   object
 15  15      7501 non-null   object
 16  16      7501 non-null   object
 17  17      7501 non-null   object
 18  18      7501 non-null   object
 19  19      7501 non-null   object
dtypes: object(20)
memory usage: 1.1+ MB


In [12]:
transactions = []
for i in range(0, 1000):
    transactions.append([str(dataset.values[i,j]) for j in range(0, 20)])

In [13]:
def generate_candidates(prev_itemsets, k):
    candidates = set()
    for itemset1 in prev_itemsets:
        for itemset2 in prev_itemsets:
            if len(itemset1.union(itemset2)) == k:
                candidate = itemset1.union(itemset2)
                if candidate not in candidates:
                    candidates.add(candidate)
    return candidates

def prune_itemsets(itemsets, prev_itemsets):
    pruned_itemsets = []
    for itemset in itemsets:
        subsets = list(combinations(itemset, len(itemset) - 1))
        if all(frozenset(subset) in prev_itemsets for subset in subsets):
            pruned_itemsets.append(itemset)
    return pruned_itemsets

def apriori(transactions, min_support):
    item_counts = {}
    for transaction in transactions:
        for item in transaction:
            if item not in item_counts:
                item_counts[item] = 0
            item_counts[item] += 1

    frequent_itemsets = [frozenset([item]) for item, count in item_counts.items() if count >= min_support]
    frequent_itemsets = [itemset for itemset in frequent_itemsets]

    all_frequent_itemsets = frequent_itemsets
    k = 2
    while frequent_itemsets:
        candidates = generate_candidates(frequent_itemsets, k)
        candidate_counts = {}
        for transaction in transactions:
            for candidate in candidates:
                if candidate.issubset(transaction):
                    if candidate not in candidate_counts:
                        candidate_counts[candidate] = 0
                    candidate_counts[candidate] += 1
        frequent_itemsets = [itemset for itemset, count in candidate_counts.items() if count >= min_support]
        frequent_itemsets = prune_itemsets(frequent_itemsets, all_frequent_itemsets)
        all_frequent_itemsets += frequent_itemsets
        k += 1

    return all_frequent_itemsets

In [14]:
min_support = 100
frequent_itemsets = apriori(transactions, min_support)
print("Frequent itemsets:")
for itemset in frequent_itemsets:
    print(itemset)

Frequent itemsets:
frozenset({'green tea'})
frozenset({'mineral water'})
frozenset({'eggs'})
frozenset({'0'})
frozenset({'milk'})
frozenset({'french fries'})
frozenset({'spaghetti'})
frozenset({'chocolate'})
frozenset({'0', 'eggs'})
frozenset({'mineral water', '0'})
frozenset({'green tea', '0'})
frozenset({'milk', '0'})
frozenset({'french fries', '0'})
frozenset({'spaghetti', '0'})
frozenset({'chocolate', '0'})
