# Apriori Algorithm for Frequent Item Set Mining and Association Rule Learning 

### Dataset Name : Grocery Dataset 
### Number of transactions : 7501

This dataset consists of the various transactions made at a grocery store by different customers.We are applying the apriori algorithm to find the frequent dataset and association rules so as to place the items frequently purchased together.

In [9]:
import csv
import itertools
from itertools import combinations 

In [10]:
DataFile = open('store_data.csv', 'r')
minsup = 0.01
f2 = "Rules.txt"
f1 = "FItems.txt"
minconf = 0.39

## Function to frequency of individual items
In this function we find the frequency values for each product and eliminate those which have support less than the specified value. 

In [11]:
def L1():

    #Get all 1-itemsets in the list items and their counts in the dictionary counts
    DataCaptured = csv.reader(DataFile, delimiter=',')
    data = list(DataCaptured)
    for e in data:
        e = sorted(e)
    count = {}
    for items in data:
        for item in items:
            if item not in count:
                count[(item)] = 1
            else:
                count[(item)] = count[(item)] + 1
    print("C1 Items", count)
    print("C1 Length : ", len(count))
    print()

    #Check for minSupport
    count2 = {k: v for k, v in count.items() if v >= minsup*7501}
    print("L1 Items : ", count2)
    print("L1 Length : ", len(count2))
    print()

    return count2, data

## Generates the various combinations of items
 This function finds the combinations of items.

In [12]:
def generateCk(Lk_1, flag, data):
    
    Ck = []

    if flag == 1:
        flag = 0
        Ck=list(itertools.combinations(Lk_1,2))
        print()

    else:
        for item in Lk_1:
            k=len(item)
        unique_list = []
        for i in Lk_1:
            for j in i:
                if j not in unique_list:
                    unique_list.append(j)
        Ck = list(itertools.combinations(unique_list,k+1))
        print()
        
    L = generateLk(set(Ck), data)
    return L, flag

## Check minSupport

This function checks the support value of the combinations generated in the previous function.

In [13]:
def generateLk(Ck, data):
    
    count = {}
    for itemset in Ck:
        #print(itemset)
        i = len(itemset)
        for transaction in data:
            if all(e in transaction for e in itemset):
                if itemset not in count:
                    count[itemset] = 1
                else:
                    count[itemset] = count[itemset] + 1
        
            
    count2 = {k: v for k, v in count.items() if v >= minsup*7501}
    
    
    print("C" + str(i) +" Items : ", count)
    print("C" + str(i) +" Length : ", len(count))
    print()
    
    print("L" + str(i) +" Items : ", count2)
    print("L" + str(i) + " Length : ", len(count2))
    print()
    return count2

## Generates association rules

This function generates association rules for the frequent itemset.

In [14]:
def rulegenerator(fitems):
    
    counter = 0 #number of rules
    for itemset in fitems.keys():
        if isinstance(itemset, str):
            continue
        length = len(itemset)

        union_support = fitems[tuple(itemset)]#the freq of all items together
        for i in range(1, length):

            lefts = map(list, itertools.combinations(itemset, i))
            for left in lefts:
                if len(left) == 1:
                    if ''.join(left) in fitems:
                        leftcount = fitems[''.join(left)]
                        
                        conf = union_support / leftcount
                else:
                    if tuple(left) in fitems:
                        leftcount = fitems[tuple(left)]
                        conf = union_support / leftcount
                if conf >= minconf:
                    fo = open(f2, "a+")
                    right = list(itemset[:])
                    for e in left:
                        right.remove(e)
                    fo.write(str(left) + ' (' + str(leftcount) + ')' + ' -> ' + str(right) + ' (' + str(fitems[''.join(right)]) + ')' + ' [' + str(conf) + ']' + '\n')
                    print(str(left) + ' -> ' + str(right) + ' (' + str(conf) + ')')
                    counter = counter + 1
                    
                    fo.close()
                    
    print(counter, "Rules generated")

## Apriori function

Calls the other functions to generate the required output.

In [15]:
def apriori():
    
    L, data = L1()
    flag = 1 #to generate C2
    FreqItems = dict(L)
    while(len(L) != 0):
        fo = open(f1, "a+")
        for k, v in L.items():
            fo.write(str(k) + ' >>> ' + str(v) + '\n\n')
        fo.close()

        L, flag = generateCk(L, flag, data)
        FreqItems.update(L)
        
    rulegenerator(FreqItems)

In [16]:
if __name__ == '__main__':
    apriori()

C1 Items {'shrimp': 536, 'almonds': 153, 'avocado': 250, 'vegetables mix': 193, 'green grapes': 68, 'whole weat flour': 70, 'yams': 86, 'cottage cheese': 239, 'energy drink': 200, 'tomato juice': 228, 'low fat yogurt': 574, 'green tea': 991, 'honey': 356, 'salad': 37, 'mineral water': 1788, 'salmon': 319, 'antioxydant juice': 67, 'frozen smoothie': 475, 'spinach': 53, 'olive oil': 494, 'burgers': 654, 'meatballs': 157, 'eggs': 1348, 'chutney': 31, 'turkey': 469, 'milk': 972, 'energy bar': 203, 'whole wheat rice': 439, 'whole wheat pasta': 221, 'french fries': 1282, 'soup': 379, 'light cream': 117, 'shallot': 58, 'frozen vegetables': 715, 'spaghetti': 1306, 'pet food': 49, 'cookies': 603, 'cooking oil': 383, 'champagne': 351, 'chocolate': 1230, 'chicken': 450, 'oil': 173, 'fresh tuna': 167, 'tomatoes': 513, 'black tea': 107, 'extra dark chocolate': 90, 'protein bar': 139, 'red wine': 211, 'pasta': 118, 'pepper': 199, 'shampoo': 37, 'rice': 141, 'sparkling water': 47, 'ham': 203, 'body s

C3 Items :  {('low fat yogurt', 'champagne', 'honey'): 1, ('spaghetti', 'eggs', 'ground beef'): 67, ('milk', 'green tea', 'grated cheese'): 8, ('shrimp', 'tomatoes', 'fresh bread'): 6, ('turkey', 'cake', 'salmon'): 3, ('spaghetti', 'olive oil', 'salmon'): 18, ('cake', 'grated cheese', 'cereals'): 1, ('cooking oil', 'red wine', 'cereals'): 1, ('shrimp', 'salmon', 'honey'): 6, ('turkey', 'chocolate', 'cake'): 10, ('chicken', 'avocado', 'salmon'): 3, ('green tea', 'ground beef', 'cereals'): 13, ('pancakes', 'chocolate', 'frozen smoothie'): 16, ('milk', 'frozen smoothie', 'cereals'): 9, ('shrimp', 'chocolate', 'herb & pepper'): 12, ('green tea', 'escalope', 'frozen smoothie'): 7, ('soup', 'frozen vegetables', 'frozen smoothie'): 4, ('turkey', 'cookies', 'herb & pepper'): 2, ('pancakes', 'chicken', 'burgers'): 9, ('chocolate', 'chicken', 'tomatoes'): 13, ('ground beef', 'avocado', 'grated cheese'): 6, ('eggs', 'avocado', 'cooking oil'): 2, ('tomatoes', 'cooking oil', 'honey'): 4, ('champagn

C4 Items :  {('french fries', 'ground beef', 'olive oil', 'pancakes'): 2, ('spaghetti', 'chocolate', 'french fries', 'olive oil'): 12, ('spaghetti', 'chocolate', 'ground beef', 'pancakes'): 14, ('spaghetti', 'eggs', 'ground beef', 'olive oil'): 11, ('eggs', 'chocolate', 'french fries', 'olive oil'): 5, ('spaghetti', 'chocolate', 'ground beef', 'milk'): 22, ('mineral water', 'chocolate', 'milk', 'pancakes'): 19, ('chocolate', 'frozen vegetables', 'milk', 'olive oil'): 15, ('eggs', 'chocolate', 'french fries', 'ground beef'): 9, ('chocolate', 'french fries', 'milk', 'olive oil'): 4, ('spaghetti', 'eggs', 'olive oil', 'pancakes'): 10, ('mineral water', 'spaghetti', 'eggs', 'frozen vegetables'): 22, ('mineral water', 'spaghetti', 'frozen vegetables', 'french fries'): 16, ('mineral water', 'eggs', 'chocolate', 'ground beef'): 30, ('mineral water', 'chocolate', 'olive oil', 'pancakes'): 13, ('eggs', 'chocolate', 'milk', 'olive oil'): 10, ('mineral water', 'eggs', 'ground beef', 'milk'): 25, 

<div class="alert alert-block alert-info">Thank You<br>Regards<br>Dimple K Sai & Shripriya Bhat<br>JSS Science and Technology University</div>