In [53]:
import csv
import datetime
import math
import numpy as np
from itertools import chain, combinations

### Basic I/O

In [54]:
def read_file(filename):
    ds = []
    with open(filename, newline='') as csvfile:
        filereader = csv.reader(csvfile, delimiter=',')
        for row in filereader:
            ds.append(tuple(sorted(row[:-1])))
        return ds

In [55]:
read = read_file('GroceryStore.csv')[:5]
print(read)

[('Butter', 'Cheese', 'Coffee Powder', 'Ghee', 'Lassi', 'Yougurt'), ('Coffee Powder', 'Ghee'), ('Butter', 'Cheese', 'Lassi', 'Tea Powder'), ('Bread', 'Butter', 'Cheese', 'Coffee Powder', 'Panner', 'Tea Powder'), ('Butter', 'Cheese', 'Coffee Powder', 'Sugar', 'Sweet', 'Yougurt')]


In [56]:
def write_file(data, filename):
    with open(filename, 'w', newline='') as csvfile:
        filewriter = csv.writer(csvfile, delimiter=',')
        for row in data:
            filewriter.writerow(row)

### Apriori Algorithm

In [57]:
def calculate_support_count(instance, data, is_set):
    count = 0
    if is_set:
        for row in data:
            if set(instance).issubset(set(row)): 
                count = count + 1
    else:
        for row in data:
            if instance in row:
                count = count + 1
    return count

In [58]:
def support_elimination(data, items, minimal_support):
    lgtn = len(data)
    dct = {}
    if isinstance(items, set): 
        for i in items:
            support_count = calculate_support_count(i, data, True)
            dct[i] = support_count / lgtn
    else: 
        for i in items:
            support_count = calculate_support_count(i, data, False)
            dct[i] = support_count / lgtn
    support_resistant = []
    for key in dct:
        if dct[key] >= minimal_support:
            support_resistant.append(key)
    return support_resistant

In [59]:
def find_unique_level_one(data):
    dct = {}
    for y in data:
        for x in y:
            dct[x] = x
    return tuple(sorted([i for i in dct]))

In [60]:
data = read_file('GroceryStore.csv')
unique = find_unique_level_one(data)
support_elimination(data, unique, 0.4)

['Bread',
 'Butter',
 'Cheese',
 'Coffee Powder',
 'Ghee',
 'Lassi',
 'Milk',
 'Panner',
 'Sugar',
 'Sweet',
 'Tea Powder',
 'Yougurt']

In [61]:
def generate_candidates_fk1_1(previous_step_f_itemset, step_one_f_itemset):
    lst = []
    if isinstance(previous_step_f_itemset[0], list) or isinstance(previous_step_f_itemset[0], tuple): 
        for i in previous_step_f_itemset:
            for k in step_one_f_itemset:
                if k not in i:
                    lst.append(tuple(sorted([*i, k])))
    else:
        for i in previous_step_f_itemset:
            for k in step_one_f_itemset:
                if k != i:
                    lst.append(tuple(sorted((i, k))))
    return set(lst)

In [62]:
generate_candidates_fk1_1(unique, unique)

{('Bread', 'Butter'),
 ('Bread', 'Cheese'),
 ('Bread', 'Coffee Powder'),
 ('Bread', 'Ghee'),
 ('Bread', 'Lassi'),
 ('Bread', 'Milk'),
 ('Bread', 'Panner'),
 ('Bread', 'Sugar'),
 ('Bread', 'Sweet'),
 ('Bread', 'Tea Powder'),
 ('Bread', 'Yougurt'),
 ('Butter', 'Cheese'),
 ('Butter', 'Coffee Powder'),
 ('Butter', 'Ghee'),
 ('Butter', 'Lassi'),
 ('Butter', 'Milk'),
 ('Butter', 'Panner'),
 ('Butter', 'Sugar'),
 ('Butter', 'Sweet'),
 ('Butter', 'Tea Powder'),
 ('Butter', 'Yougurt'),
 ('Cheese', 'Coffee Powder'),
 ('Cheese', 'Ghee'),
 ('Cheese', 'Lassi'),
 ('Cheese', 'Milk'),
 ('Cheese', 'Panner'),
 ('Cheese', 'Sugar'),
 ('Cheese', 'Sweet'),
 ('Cheese', 'Tea Powder'),
 ('Cheese', 'Yougurt'),
 ('Coffee Powder', 'Ghee'),
 ('Coffee Powder', 'Lassi'),
 ('Coffee Powder', 'Milk'),
 ('Coffee Powder', 'Panner'),
 ('Coffee Powder', 'Sugar'),
 ('Coffee Powder', 'Sweet'),
 ('Coffee Powder', 'Tea Powder'),
 ('Coffee Powder', 'Yougurt'),
 ('Ghee', 'Lassi'),
 ('Ghee', 'Milk'),
 ('Ghee', 'Panner'),
 ('Ghee'

In [63]:
def apriori_algorigth(filename, max_length, min_support):
    data = read_file(filename)
    one_unique = find_unique_level_one(data)
    el_one_unique = support_elimination(data, one_unique, min_support)
    lim = 1
    el_k_unique = el_one_unique
    while(lim < max_length):
        k_unique = generate_candidates_fk1_1(el_k_unique, el_one_unique)
        el_k_unique = support_elimination(data, k_unique, min_support)
        lim = lim + 1
    return el_k_unique

In [64]:
a = apriori_algorigth('GroceryStore.csv', max_length=4, min_support=0.048)

In [65]:
write_file(a, f'Result-{datetime.datetime.now()}.csv')

### Association Rule

In [69]:
def association_rule_props(itemset, data):
    T = len(data)
    support_count_dict = {}
    gen_dict = {}
    for item in itemset:
        if item not in support_count_dict:
            support_count_dict[item] = calculate_support_count(item, data, True)
        all_subsets = chain.from_iterable(combinations(item, i) for i in range(1, len(item)))
        for a in all_subsets:
            if a not in support_count_dict:
                support_count_dict[a] = calculate_support_count(a, data, True)
            diff = tuple(set(item).difference(set(a)))
            if not (a, diff) in gen_dict:
                gen_dict[(a, diff)] = {
                    'support': support_count_dict[item] / T,
                    'confidence': support_count_dict[item] / support_count_dict[a]
                }
    return gen_dict

In [70]:
def association_rule(min_support, min_confidence, data_file_name, frequent_file_name):
    data = read_file(data_file_name)
    frequent = read_file(frequent_file_name)
    characteristics = association_rule_props(frequent, data)
    return {k: v for k, v in characteristics.items() if v['support'] >= min_support and v['confidence'] >= min_confidence}

In [71]:
min_support = 0.096
min_confidence = 0.49
association_rule(min_support, min_confidence, 'GroceryStore.csv', 'Result-2021-03-15 15:47:31.656068.csv')

{(('Lassi', 'Panner'), ('Sweet',)): {'support': 0.10098994092288041,
  'confidence': 0.5066079295154186},
 (('Lassi', 'Sweet'), ('Panner',)): {'support': 0.10098994092288041,
  'confidence': 0.49107142857142855},
 (('Panner', 'Sweet'), ('Lassi',)): {'support': 0.10098994092288041,
  'confidence': 0.5049900199600799},
 (('Butter', 'Panner'), ('Ghee',)): {'support': 0.0971579115439885,
  'confidence': 0.4901329037454692},
 (('Butter', 'Milk'), ('Sugar',)): {'support': 0.09875459045186014,
  'confidence': 0.49658771577679645},
 (('Butter', 'Sweet'), ('Sugar',)): {'support': 0.0997125977965831,
  'confidence': 0.4911521824616595},
 (('Sugar', 'Sweet'), ('Butter',)): {'support': 0.0997125977965831,
  'confidence': 0.5006012024048097}}

### FP-Growth Algorithm

### Experiment on the Dataset