# Eclat

In [1]:
import numpy as np

from eclat import Eclat

### Generate data

Generate lists of made-up items (with random names) that have some inheret associate to them (by comparing the similarity of their factors

In [3]:
# item codes
items = np.arange(3,49,2)

# item names
names = []
for i in items:
    name = "".join([chr(i) for i in np.random.randint(65,91,size=4)])
    names.append(name)

# helper methods

def factorize(num):
    # return a list of all factors of given integer
    factors = []
    while(num>1):
        for i in range(2,num+1):
            if (num%i==0):
                factors.append(i)
                num = num//i
                break
    return factors

def compare_factores(a,b):
    # count percentage identity of the numbers in the two given lists 
    # lists are assumed to be ordered
    a = a.copy()
    b = b.copy()
    before = len(a) + len(b)
    after = 0
    while (len(a)>0):
        i = a.pop()
        f = -1
        for idx in range(len(b)):
            j = b[idx]
            if (i==j):
                f = idx
                break
        if (f>-1):
            b.pop(f)
        else:
            after+=1
    after += len(a) + len(b)
    return 1-after/before

def similarity(a,b,minimum=0.05):
    # give two numbers, return the percentage similiarity of their factors
    # normalized with a minimum similarity
    sim = compare_factores(factorize(a),factorize(b))
    sim = (sim+minimum)/(1+minimum)
    return sim

def generate_purchase():
    # generate a 'shopping list' - all items purchased by sampled user
    purch = []

    # inflate, to increase the amount of items purchased
    inflate = np.random.random() + 0.25
    
    # start with one random item
    start = np.random.randint(len(items))
    purch.append(start)
    # shuffle the other items
    shuffled = np.arange(len(items))
    np.random.shuffle(shuffled)
    # add items based on inheret association with already purchased items (so far, one iteration)
    for idx in shuffled:
        if (idx not in purch):
            i = items[idx]
            chance = 0
            counter = 0
            for j in purch:
                chance+= similarity(i,items[j])
                counter+=1
            chance/=counter
            if (np.random.random()*inflate < chance):
                purch.append(idx)
    # convert to names
    purch = [names[i] for i in purch]
    return(purch)  

# data generation
# generate shopping lists
people = []
for i in range(1000):
    people.append(generate_purchase())

### Fit

In [9]:
eclat = Eclat()
eclat.fit(people,0.16,4,6)

### Observe rules

In [11]:
# subset of 4- and 5-item groups
(eclat.groups[-8:])

[{'KPJW', 'QCZK', 'RXPQ', 'WPAZ'},
 {'KPJW', 'QIXI', 'RXPQ', 'WPAZ'},
 {'KPJW', 'RXPQ', 'SOII', 'WPAZ'},
 {'NXOC', 'QCZK', 'QIXI', 'RURM', 'WPAZ'},
 {'NXOC', 'QCZK', 'QIXI', 'RURM', 'RXPQ'},
 {'NXOC', 'QIXI', 'RURM', 'RXPQ', 'WPAZ'},
 {'KPJW', 'NXOC', 'RURM', 'RXPQ', 'WPAZ'},
 {'NXOC', 'QCZK', 'QIXI', 'RXPQ', 'WPAZ'}]

**Making sense of data, based on the inherent associations**

In [12]:
for a in eclat.groups[-8:]:
    print([items[names.index(j)] for j in a])

[33, 27, 3, 15]
[33, 3, 9, 15]
[33, 39, 3, 15]
[21, 27, 45, 3, 9]
[21, 27, 45, 15, 9]
[21, 3, 45, 15, 9]
[21, 33, 3, 45, 15]
[27, 3, 45, 15, 9]


**Important note: the Eclat object recieved only the names, and had no prior knowledge of these built-in relationships**