In [1]:
import csv
import datetime
import math
import numpy as np

### Basic I/O

In [2]:
def read_file(filename):
    ds = []
    with open(filename, newline='') as csvfile:
        filereader = csv.reader(csvfile, delimiter=',')
        for row in filereader:
            ds.append(sorted(row[:-1]))
        return ds

read = read_file('GroceryStore.csv')[:5]
print(read)

[['Butter', 'Cheese', 'Coffee Powder', 'Ghee', 'Lassi', 'Yougurt'], ['Coffee Powder', 'Ghee'], ['Butter', 'Cheese', 'Lassi', 'Tea Powder'], ['Bread', 'Butter', 'Cheese', 'Coffee Powder', 'Panner', 'Tea Powder'], ['Butter', 'Cheese', 'Coffee Powder', 'Sugar', 'Sweet', 'Yougurt']]


In [3]:
def write_file(data, filename):
    with open(filename, 'w', newline='') as csvfile:
        filewriter = csv.writer(csvfile, delimiter=',')
        for row in data:
            filewriter.writerow(row)

write_file(read, f'Result-{datetime.datetime.now()}.csv')

### Apriori Algorithm

In [4]:
def support_elimination(data, items, minimal_support):
    lgtn = len(data)
    dct = {}
    if isinstance(items, set): 
        for i in items:
            dct[i] = 0
            for row in data:
                if set(i).issubset(set(row)): 
                    dct[i] = dct[i] + 1
            dct[i] = dct[i] / lgtn
    else: 
        for i in items:
            dct[i] = 0
            for row in data:
                if i in row:
                    dct[i] = dct[i] + 1
            dct[i] = dct[i] / lgtn
    support_resistant = []
    for key in dct:
        if dct[key] >= minimal_support:
            support_resistant.append(key)
    return support_resistant

In [6]:
def find_unique_level_one(data):
    dct = {}
    for y in data:
        for x in y:
            dct[x] = x
    return tuple(sorted([i for i in dct]))

In [7]:
data = read_file('GroceryStore.csv')
unique = find_unique_level_one(data)
support_elimination(data, unique, 0.4)

['Bread',
 'Butter',
 'Cheese',
 'Coffee Powder',
 'Ghee',
 'Lassi',
 'Milk',
 'Panner',
 'Sugar',
 'Sweet',
 'Tea Powder',
 'Yougurt']

In [8]:
def generate_candidates_fk1_1(previous_step_f_itemset, step_one_f_itemset):
    lst = []
    if isinstance(previous_step_f_itemset[0], list) or isinstance(previous_step_f_itemset[0], tuple): 
        for i in previous_step_f_itemset:
            for k in step_one_f_itemset:
                if k not in i:
                    lst.append(tuple(sorted([*i, k])))
    else:
        for i in previous_step_f_itemset:
            for k in step_one_f_itemset:
                if k != i:
                    lst.append(tuple(sorted((i, k))))
    return set(lst)

In [9]:
generate_candidates_fk1_1(unique, unique)

{('Bread', 'Butter'),
 ('Bread', 'Cheese'),
 ('Bread', 'Coffee Powder'),
 ('Bread', 'Ghee'),
 ('Bread', 'Lassi'),
 ('Bread', 'Milk'),
 ('Bread', 'Panner'),
 ('Bread', 'Sugar'),
 ('Bread', 'Sweet'),
 ('Bread', 'Tea Powder'),
 ('Bread', 'Yougurt'),
 ('Butter', 'Cheese'),
 ('Butter', 'Coffee Powder'),
 ('Butter', 'Ghee'),
 ('Butter', 'Lassi'),
 ('Butter', 'Milk'),
 ('Butter', 'Panner'),
 ('Butter', 'Sugar'),
 ('Butter', 'Sweet'),
 ('Butter', 'Tea Powder'),
 ('Butter', 'Yougurt'),
 ('Cheese', 'Coffee Powder'),
 ('Cheese', 'Ghee'),
 ('Cheese', 'Lassi'),
 ('Cheese', 'Milk'),
 ('Cheese', 'Panner'),
 ('Cheese', 'Sugar'),
 ('Cheese', 'Sweet'),
 ('Cheese', 'Tea Powder'),
 ('Cheese', 'Yougurt'),
 ('Coffee Powder', 'Ghee'),
 ('Coffee Powder', 'Lassi'),
 ('Coffee Powder', 'Milk'),
 ('Coffee Powder', 'Panner'),
 ('Coffee Powder', 'Sugar'),
 ('Coffee Powder', 'Sweet'),
 ('Coffee Powder', 'Tea Powder'),
 ('Coffee Powder', 'Yougurt'),
 ('Ghee', 'Lassi'),
 ('Ghee', 'Milk'),
 ('Ghee', 'Panner'),
 ('Ghee'

In [10]:
def apriori_algorigth(filename, max_length, min_support):
    data = read_file(filename)
    one_unique = find_unique_level_one(data)
    el_one_unique = support_elimination(data, one_unique, min_support)
    lim = 1
    el_k_unique = el_one_unique
    while(lim < max_length):
        k_unique = generate_candidates_fk1_1(el_k_unique, el_one_unique)
        el_k_unique = support_elimination(data, k_unique, min_support)
        lim = lim + 1
    return el_k_unique

In [11]:
a = apriori_algorigth('GroceryStore.csv', max_length=4, min_support=0.049)

In [12]:
write_file(a, f'Result-{datetime.datetime.now()}.csv')

### Association Rule

In [None]:
def calculate_support(itemset, instance):
    pass

In [None]:
def calculate_condifence(itemset, instance):
    pass

In [71]:
def generate_sum_pairs(number):
    pairs = []
    for i in range(1, number):
        pairs.append((i, number - i))
        pairs.append((number - i, i))
    return set(pairs)

In [72]:
generate_sum_pairs(10)

{(1, 9), (2, 8), (3, 7), (4, 6), (5, 5), (6, 4), (7, 3), (8, 2), (9, 1)}

In [73]:
def get_intermediate_list(item, xomb, pivot_idx, pivot_idx_end, is_main):
    a = []
    for i in range(pivot_idx, pivot_idx_end):
        inter = i
        length = len(item)
        if inter >= length:
            inter = inter % length
        a.append(item[inter])
        if is_main:
            if i == pivot_idx_end - 1:
                pivot_idx = inter + 1
                pivot_idx_end = inter + xomb[1] + 1
    if is_main:
        return (a, pivot_idx, pivot_idx_end)
    else:
        return a

def generate_combinations(itemset):
    combinations = []
    for item in itemset:
        pairs = generate_sum_pairs(len(item))
        for idx, element in enumerate(item):
            for xomb in pairs:
                pivot_idx = idx
                pivot_idx_end = pivot_idx + xomb[0]
                ## filling key
                a, new_pivot_idx, new_pivot_idx_end = get_intermediate_list(item, xomb, pivot_idx, pivot_idx_end, is_main=True)
                ## filling value
                b = get_intermediate_list(item, xomb, new_pivot_idx, new_pivot_idx_end, is_main=False)
                ## pushing
                combinations.append((tuple(a), tuple(b)))
    return set(combinations)

In [74]:
itemset = [['Bear', 'Cat', 'Dog', 'Fox']]
generate_combinations(itemset)

{(('Bear',), ('Cat', 'Dog', 'Fox')),
 (('Bear', 'Cat'), ('Dog', 'Fox')),
 (('Bear', 'Cat', 'Dog'), ('Fox',)),
 (('Cat',), ('Dog', 'Fox', 'Bear')),
 (('Cat', 'Dog'), ('Fox', 'Bear')),
 (('Cat', 'Dog', 'Fox'), ('Bear',)),
 (('Dog',), ('Fox', 'Bear', 'Cat')),
 (('Dog', 'Fox'), ('Bear', 'Cat')),
 (('Dog', 'Fox', 'Bear'), ('Cat',)),
 (('Fox',), ('Bear', 'Cat', 'Dog')),
 (('Fox', 'Bear'), ('Cat', 'Dog')),
 (('Fox', 'Bear', 'Cat'), ('Dog',))}

In [None]:
def association_rule(min_support, min_confidence, file_name):
    data = read_file(file_name)

### FP-Growth Algorithm

### Experiment on the Dataset