In [1]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, fpgrowth

In [2]:
dataset = [
    ['I1', 'I2', 'I5'],
    ['I2', 'I4'],        
    ['I2', 'I3'],        
    ['I1', 'I2', 'I4'],  
    ['I1', 'I3'],        
    ['I2', 'I3'],        
    ['I1', 'I3'],        
    ['I1', 'I2', 'I3', 'I5'],  
    ['I1', 'I2', 'I3']   
]

In [3]:
rows = []
for transaction in dataset:
    row = {item: 1 for item in transaction}
    rows.append(row)
df = pd.DataFrame(rows).fillna(0)

In [4]:
min_support = 0.4
frequent_itemsets = apriori(df, min_support=min_support, use_colnames=True)
frequent_itemsets



Unnamed: 0,support,itemsets
0,0.666667,(I1)
1,0.777778,(I2)
2,0.666667,(I3)
3,0.444444,"(I1, I2)"
4,0.444444,"(I1, I3)"
5,0.444444,"(I3, I2)"


In [5]:
closed_patterns = []
for i, row in frequent_itemsets.iterrows():
    is_closed = True
    for j, superset in frequent_itemsets.iterrows():
        if set(row['itemsets']).issubset(set(superset['itemsets'])) and row['support'] == superset['support'] and row['itemsets'] != superset['itemsets']:
            is_closed = False  
            break
    if is_closed:
        closed_patterns.append(row)

In [6]:
closed_frequent_itemsets = pd.DataFrame(closed_patterns)
print("Closed Frequent Itemsets:")
closed_frequent_itemsets

Closed Frequent Itemsets:


Unnamed: 0,support,itemsets
0,0.666667,(I1)
1,0.777778,(I2)
2,0.666667,(I3)
3,0.444444,"(I1, I2)"
4,0.444444,"(I1, I3)"
5,0.444444,"(I3, I2)"


In [7]:
fp_frequent_itemsets = fpgrowth(df, min_support=min_support, use_colnames=True)
fp_frequent_itemsets



Unnamed: 0,support,itemsets
0,0.777778,(I2)
1,0.666667,(I1)
2,0.666667,(I3)
3,0.444444,"(I1, I2)"
4,0.444444,"(I1, I3)"
5,0.444444,"(I3, I2)"


In [8]:
maximal_patterns = []
for _, row in fp_frequent_itemsets.iterrows():
    is_maximal = True
    for _, superset in fp_frequent_itemsets.iterrows():
        if row['itemsets'] < superset['itemsets'] and row['support'] <= superset['support']:
            is_maximal = False
            break 

    if is_maximal:
        maximal_patterns.append(row)

In [9]:
maximal_frequent_itemsets = pd.DataFrame(maximal_patterns)
print("Maximal Frequent Itemsets:")
maximal_frequent_itemsets

Maximal Frequent Itemsets:


Unnamed: 0,support,itemsets
0,0.777778,(I2)
1,0.666667,(I1)
2,0.666667,(I3)
3,0.444444,"(I1, I2)"
4,0.444444,"(I1, I3)"
5,0.444444,"(I3, I2)"


In [10]:
vertical_db = {}
for tid in range(len(dataset)):
    for item in dataset[tid]:
        if item not in vertical_db:
            vertical_db[item] = set()
        vertical_db[item].add(tid)
vertical_db

{'I1': {0, 3, 4, 6, 7, 8},
 'I2': {0, 1, 2, 3, 5, 7, 8},
 'I5': {0, 7},
 'I4': {1, 3},
 'I3': {2, 4, 5, 6, 7, 8}}

In [11]:
min_support_count = min_support * len(dataset)
frequent_patterns = {}
items = list(vertical_db.keys())
for i in range(len(items)):
    for j in range(i + 1, len(items)):
        pattern = {items[i], items[j]}
        intersection = vertical_db[items[i]] & vertical_db[items[j]]
        if len(intersection) >= min_support_count:
            frequent_patterns[frozenset(pattern)] = len(intersection) / len(dataset)

In [12]:
print("Frequent Patterns using Naive Intersection:")
frequent_patterns

Frequent Patterns using Naive Intersection:


{frozenset({'I1', 'I2'}): 0.4444444444444444,
 frozenset({'I1', 'I3'}): 0.4444444444444444,
 frozenset({'I2', 'I3'}): 0.4444444444444444}