# Apriori

In [1]:
import numpy as np

from apriori import Apriori

### Generate data

Generate lists of made-up items (with random names) that have some inheret associate to them (by comparing the similarity of their factors

In [2]:
# item codes
items = np.arange(2,50)

# item names
names = []
for i in items:
    name = "".join([chr(i) for i in np.random.randint(65,91,size=4)])
    names.append(name)

# helper methods

def factorize(num):
    # return a list of all factors of given integer
    factors = []
    while(num>1):
        for i in range(2,num+1):
            if (num%i==0):
                factors.append(i)
                num = num//i
                break
    return factors

def compare_factores(a,b):
    # count percentage identity of the numbers in the two given lists 
    # lists are assumed to be ordered
    a = a.copy()
    b = b.copy()
    before = len(a) + len(b)
    after = 0
    while (len(a)>0):
        i = a.pop()
        f = -1
        for idx in range(len(b)):
            j = b[idx]
            if (i==j):
                f = idx
                break
        if (f>-1):
            b.pop(f)
        else:
            after+=1
    after += len(a) + len(b)
    return 1-after/before

def similarity(a,b,minimum=0.05):
    # give two numbers, return the percentage similiarity of their factors
    # normalized with a minimum similarity
    sim = compare_factores(factorize(a),factorize(b))
    sim = (sim+minimum)/(1+minimum)
    return sim

def generate_purchase():
    # generate a 'shopping list' - all items purchased by sampled user
    purch = []

    # inflate, to increase the amount of items purchased
    inflate = np.random.random() + 0.25
    
    # start with one random item
    start = np.random.randint(len(items))
    purch.append(start)
    # shuffle the other items
    shuffled = np.arange(len(items))
    np.random.shuffle(shuffled)
    # add items based on inheret association with already purchased items (so far, one iteration)
    for idx in shuffled:
        if (idx not in purch):
            i = items[idx]
            chance = 0
            counter = 0
            for j in purch:
                chance+= similarity(i,items[j])
                counter+=1
            chance/=counter
            if (np.random.random()*inflate < chance):
                purch.append(idx)
    # convert to names
    purch = [names[i] for i in purch]
    return(purch)  

# data generation
# generate shopping lists
people = []
for i in range(1000):
    people.append(generate_purchase())

# check number of items per sample
lists = np.array([len(x) for x in people])
lists.max(),lists.min(),lists.mean()

(40, 1, 19.002)

### Fit

In [3]:
apriori = Apriori()
apriori.fit(people)

### Observe rules

In [4]:
df = apriori.get_rules(0.03,0.2,1.5)
df

Unnamed: 0,item_a,item_b,support,confidence(left),lift
0,EKCH,ONOT,0.108,0.7152317880794702,1.5057511327988846
1,EKCH,CECT,0.093,0.6158940397350994,1.6511904550538856
2,YCQB,JHEX,0.098,0.7205882352941176,1.5331664580725908
3,EFVK,WMUF,0.089,0.4178403755868544,1.5767561342900167
4,DSIN,PJWZ,0.202,0.5804597701149425,1.5155607574802676
5,MLFO,KMNG,0.066,0.532258064516129,1.5472618154538635
6,ENGQ,UGCZ,0.095,0.5026455026455027,1.5094459538903984
7,ENGQ,KYNI,0.07,0.3703703703703703,1.7146776406035664
8,KYNI,UGCZ,0.119,0.5509259259259259,1.6544322099877655


**Making sense of data, based on the inherent associations**

In [5]:
for a in df.values:
    print(items[names.index(a[0])],items[names.index(a[1])])

11 22
11 33
19 38
25 5
27 9
13 39
49 35
49 7
7 35


Not surprisingly, all items are multiplications of one another.<br>
The reason prime numbers are more common here than other numbers is because the lists that are based on a seeded prime number are shorter, increasing the overall propensity of prime numbers compared to numbers in long lists of many different even numbers (or numbers with many factors).

In [16]:
# more commonly occuring numbers:
df = apriori.get_rules(0.3,0.4,1.3)
for a in df.values:
    print(items[names.index(a[0])],items[names.index(a[1])])

22 32
16 4
32 16
16 8
16 48
34 4
34 2
38 4
38 32
38 14
38 46
32 4
46 4
44 4
26 4
32 8
26 2


**Important note: the apriori recieved only the names, and had no prior knowledge of these built-in relationships**