In [1]:
import csv
import numpy as np
from itertools import combinations
from collections import defaultdict
import itertools
from pandas import read_csv

def get_column_count(file_path):
    with open(file_path, 'r') as csvfile:
        reader = csv.reader(csvfile)
        first_row = next(reader)
        column_count = len(first_row)
    return column_count

In [2]:
filepath = 'SuperCenterDataNew.csv'
column_count = get_column_count(filepath)
df = read_csv(filepath, header=None, dtype={i: str for i in range(column_count)}) # specify dtype = string
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,66,67,68,69,70,71,72,73,74,75
0,39,48,130.0,147.0,475.0,2998.0,4792.0,5478.0,7146.0,7160.0,...,,,,,,,,,,
1,32,48,,,,,,,,,...,,,,,,,,,,
2,48,79,232.0,340.0,396.0,649.0,911.0,1016.0,1020.0,1172.0,...,,,,,,,,,,
3,32,48,65.0,130.0,772.0,816.0,1344.0,3220.0,4445.0,6536.0,...,,,,,,,,,,
4,39,175,421.0,806.0,1025.0,1629.0,2424.0,3151.0,,,...,,,,,,,,,,


In [3]:
data = df.apply(lambda x: x.dropna().to_list(), axis=1)

In [4]:
sample = data.head(2000)

In [5]:
from collections import Counter, defaultdict

def apriori_gen(frequent_sets, k):
    candidate_sets = []
    
    for itemset1 in frequent_sets:
        for itemset2 in frequent_sets:
            # Check if the first k-2 items are the same
            if itemset1[:-1] == itemset2[:-1] and itemset1[-1] < itemset2[-1]:
                # Create a new candidate set by merging the two frequent itemsets
                candidate_set = tuple(sorted(set(itemset1).union(itemset2)))
                candidate_sets.append(candidate_set)
    
    return candidate_sets

def frequent_1_itemsets(data, min_support):
    item_counts = Counter(item for transaction in data for item in transaction)
    frequent_itemsets = {(item,): count for item, count in item_counts.items() if count >= min_support}
    return frequent_itemsets

def apriori_prune(candidates, data, min_support):
    frequent_itemsets = {}
    support_counts = defaultdict(int)
    
    # Count the support for candidate itemsets
    for transaction in data:
        for candidate in candidates:
            if set(candidate).issubset(transaction):
                support_counts[candidate] += 1
    
    # Prune infrequent itemsets and update frequent itemsets
    for candidate, support in support_counts.items():
        if support >= min_support:
            frequent_itemsets[candidate] = support
    
    return frequent_itemsets

def apriori(data, min_support):
    data = [set(transaction) for transaction in data]
    
    frequent_itemsets = frequent_1_itemsets(data, min_support)
    itemsets = frequent_itemsets.copy()
    k = 2
    
    while frequent_itemsets:
        candidates = apriori_gen(frequent_itemsets, k)
        frequent_itemsets = apriori_prune(candidates, data, min_support)
        itemsets.update(frequent_itemsets)
        k += 1
    
    return itemsets

In [6]:
d = {}
for i in range(3, 10 + 1):
    d[i] = apriori(sample, i)

In [7]:
d[3]

{('39',): 1126,
 ('475',): 46,
 ('4792',): 3,
 ('147',): 47,
 ('10410',): 3,
 ('2998',): 7,
 ('48',): 912,
 ('32',): 252,
 ('1016',): 7,
 ('1172',): 4,
 ('232',): 8,
 ('14350',): 3,
 ('1020',): 17,
 ('2115',): 14,
 ('79',): 38,
 ('649',): 9,
 ('396',): 17,
 ('10579',): 9,
 ('911',): 4,
 ('2792',): 10,
 ('4070',): 6,
 ('1874',): 4,
 ('4146',): 4,
 ('3106',): 7,
 ('2208',): 7,
 ('14029',): 12,
 ('3964',): 15,
 ('4764',): 6,
 ('340',): 15,
 ('9082',): 3,
 ('1344',): 5,
 ('3220',): 3,
 ('65',): 99,
 ('772',): 22,
 ('816',): 8,
 ('2424',): 7,
 ('421',): 3,
 ('3151',): 6,
 ('1629',): 15,
 ('1025',): 8,
 ('175',): 9,
 ('806',): 11,
 ('5051',): 11,
 ('2625',): 8,
 ('4753',): 6,
 ('1319',): 6,
 ('1404',): 9,
 ('1987',): 20,
 ('1476',): 7,
 ('179',): 20,
 ('31',): 21,
 ('961',): 15,
 ('4064',): 6,
 ('2052',): 19,
 ('1486',): 29,
 ('10074',): 10,
 ('878',): 6,
 ('441',): 28,
 ('107',): 21,
 ('548',): 35,
 ('533',): 28,
 ('1897',): 10,
 ('13062',): 5,
 ('301',): 22,
 ('495',): 3,
 ('5994',): 3,
 (

In [8]:
d[10]

{('39',): 1126,
 ('475',): 46,
 ('147',): 47,
 ('48',): 912,
 ('32',): 252,
 ('1020',): 17,
 ('2115',): 14,
 ('79',): 38,
 ('396',): 17,
 ('2792',): 10,
 ('14029',): 12,
 ('3964',): 15,
 ('340',): 15,
 ('65',): 99,
 ('772',): 22,
 ('1629',): 15,
 ('806',): 11,
 ('5051',): 11,
 ('1987',): 20,
 ('179',): 20,
 ('31',): 21,
 ('961',): 15,
 ('2052',): 19,
 ('1486',): 29,
 ('10074',): 10,
 ('441',): 28,
 ('107',): 21,
 ('548',): 35,
 ('533',): 28,
 ('1897',): 10,
 ('301',): 22,
 ('55',): 18,
 ('664',): 19,
 ('1052',): 10,
 ('186',): 19,
 ('237',): 69,
 ('1071',): 12,
 ('365',): 19,
 ('1066',): 17,
 ('1677',): 33,
 ('89',): 99,
 ('3510',): 21,
 ('13413',): 11,
 ('14098',): 52,
 ('2815',): 14,
 ('10451',): 10,
 ('76',): 27,
 ('12946',): 25,
 ('45',): 23,
 ('2212',): 12,
 ('2202',): 21,
 ('3866',): 12,
 ('8985',): 10,
 ('4145',): 11,
 ('30',): 14,
 ('12981',): 27,
 ('1067',): 16,
 ('13335',): 10,
 ('3366',): 17,
 ('262',): 26,
 ('101',): 67,
 ('2191',): 10,
 ('10653',): 13,
 ('885',): 22,
 ('20