# Week 1 - Session 2: Apriori Algorithm

In [4]:
from csv import reader
from collections import defaultdict
from itertools import chain, combinations
from optparse import OptionParser
from utils import *

def apriori(itemSetList, minSup, minConf):
    C1ItemSet = getItemSetFromList(itemSetList)
    # Final result global frequent itemset
    globalFreqItemSet = dict()
    # Storing global itemset with support count
    globalItemSetWithSup = defaultdict(int)

    L1ItemSet = getAboveMinSup(
        C1ItemSet, itemSetList, minSup, globalItemSetWithSup)
    currentLSet = L1ItemSet
    k = 2

    # Calculating frequent item set
    while(currentLSet):
        # Storing frequent itemset
        globalFreqItemSet[k-1] = currentLSet
        # Self-joining Lk
        candidateSet = getUnion(currentLSet, k)
        # Perform subset testing and remove pruned supersets
        candidateSet = pruning(candidateSet, currentLSet, k-1)
        # Scanning itemSet for counting support
        currentLSet = getAboveMinSup(
            candidateSet, itemSetList, minSup, globalItemSetWithSup)
        k += 1

    rules = associationRule(globalFreqItemSet, globalItemSetWithSup, minConf)
    rules.sort(key=lambda x: x[2])

    return globalFreqItemSet, rules

def aprioriFromFile(fname, minSup, minConf):
    C1ItemSet, itemSetList = getFromFile(fname)

    # Final result global frequent itemset
    globalFreqItemSet = dict()
    # Storing global itemset with support count
    globalItemSetWithSup = defaultdict(int)

    L1ItemSet = getAboveMinSup(
        C1ItemSet, itemSetList, minSup, globalItemSetWithSup)
    currentLSet = L1ItemSet
    k = 2

    # Calculating frequent item set
    while(currentLSet):
        # Storing frequent itemset
        globalFreqItemSet[k-1] = currentLSet
        # Self-joining Lk
        candidateSet = getUnion(currentLSet, k)
        # Perform subset testing and remove pruned supersets
        candidateSet = pruning(candidateSet, currentLSet, k-1)
        # Scanning itemSet for counting support
        currentLSet = getAboveMinSup(
            candidateSet, itemSetList, minSup, globalItemSetWithSup)
        k += 1

    rules = associationRule(globalFreqItemSet, globalItemSetWithSup, minConf)
    rules.sort(key=lambda x: x[2])

    return globalFreqItemSet, rules

In [5]:
# TID Items
itemSetList = [['A','B'],
               ['A','B','E'],
               ['A','B','D'],
               ['B','E'], 
               ['A','B','D','E'], 
               ['C','D'], 
               ['B','C','D','F'], 
               ['A','D'], 
               ['A','B','D','F'], 
               ['A','B','C']]
freqItemSet, rules = apriori(itemSetList, minSup=0.2, minConf=0.2)

In [6]:
freqItemSet

{1: {frozenset({'C'}),
  frozenset({'D'}),
  frozenset({'A'}),
  frozenset({'E'}),
  frozenset({'B'}),
  frozenset({'F'})},
 2: {frozenset({'A', 'D'}),
  frozenset({'B', 'C'}),
  frozenset({'D', 'F'}),
  frozenset({'C', 'D'}),
  frozenset({'B', 'F'}),
  frozenset({'A', 'B'}),
  frozenset({'B', 'E'}),
  frozenset({'B', 'D'}),
  frozenset({'A', 'E'})},
 3: {frozenset({'A', 'B', 'D'}),
  frozenset({'B', 'D', 'F'}),
  frozenset({'A', 'B', 'E'})}}

In [7]:
rules

[[{'B'}, {'C'}, 0.25],
 [{'B'}, {'F'}, 0.25],
 [{'B'}, {'D', 'F'}, 0.25],
 [{'B'}, {'A', 'E'}, 0.25],
 [{'A'}, {'E'}, 0.2857142857142857],
 [{'A'}, {'B', 'E'}, 0.2857142857142857],
 [{'D'}, {'F'}, 0.3333333333333333],
 [{'D'}, {'C'}, 0.3333333333333333],
 [{'D'}, {'B', 'F'}, 0.3333333333333333],
 [{'A', 'B'}, {'E'}, 0.3333333333333333],
 [{'B'}, {'E'}, 0.375],
 [{'B'}, {'A', 'D'}, 0.375],
 [{'A'}, {'B', 'D'}, 0.42857142857142855],
 [{'B'}, {'D'}, 0.5],
 [{'D'}, {'A', 'B'}, 0.5],
 [{'A', 'B'}, {'D'}, 0.5],
 [{'B', 'D'}, {'F'}, 0.5],
 [{'A'}, {'D'}, 0.5714285714285714],
 [{'D'}, {'A'}, 0.6666666666666666],
 [{'C'}, {'B'}, 0.6666666666666666],
 [{'C'}, {'D'}, 0.6666666666666666],
 [{'D'}, {'B'}, 0.6666666666666666],
 [{'E'}, {'A'}, 0.6666666666666666],
 [{'E'}, {'A', 'B'}, 0.6666666666666666],
 [{'B', 'E'}, {'A'}, 0.6666666666666666],
 [{'B'}, {'A'}, 0.75],
 [{'A', 'D'}, {'B'}, 0.75],
 [{'B', 'D'}, {'A'}, 0.75],
 [{'A'}, {'B'}, 0.8571428571428571],
 [{'F'}, {'D'}, 1.0],
 [{'F'}, {'B'}, 1.

In [8]:
aprioriFromFile(fname='../dataset/tesco2.csv', minSup=0.2, minConf=0.2)

({1: {frozenset({'apple'}),
   frozenset({'beer'}),
   frozenset({'rice'}),
   frozenset({'milk'}),
   frozenset({'mango'}),
   frozenset({'chicken'})},
  2: {frozenset({'milk', 'rice'}),
   frozenset({'beer', 'rice'}),
   frozenset({'beer', 'milk'}),
   frozenset({'apple', 'rice'}),
   frozenset({'apple', 'beer'}),
   frozenset({'beer', 'chicken'}),
   frozenset({'chicken', 'rice'})},
  3: {frozenset({'beer', 'milk', 'rice'}),
   frozenset({'apple', 'beer', 'rice'}),
   frozenset({'beer', 'chicken', 'rice'})}},
 [[{'beer'}, {'chicken'}, 0.3333333333333333],
  [{'beer'}, {'milk', 'rice'}, 0.3333333333333333],
  [{'beer'}, {'apple', 'rice'}, 0.3333333333333333],
  [{'beer'}, {'chicken', 'rice'}, 0.3333333333333333],
  [{'rice'}, {'milk'}, 0.5],
  [{'milk'}, {'rice'}, 0.5],
  [{'beer'}, {'milk'}, 0.5],
  [{'apple'}, {'rice'}, 0.5],
  [{'rice'}, {'apple'}, 0.5],
  [{'beer'}, {'apple'}, 0.5],
  [{'rice'}, {'chicken'}, 0.5],
  [{'rice'}, {'beer', 'milk'}, 0.5],
  [{'milk'}, {'beer', 'rice'}

In [12]:
aprioriFromFile(fname='../dataset/kaggle.csv', minSup=0.2, minConf=0.2)

({1: {frozenset({'Ghee'}),
   frozenset({'Yougurt'}),
   frozenset({'Tea Powder'}),
   frozenset({'Cheese'}),
   frozenset({'Butter'}),
   frozenset({'Bread'}),
   frozenset({'Coffee Powder'}),
   frozenset({'Panner'}),
   frozenset({'Sugar'}),
   frozenset({'Sweet'}),
   frozenset({'Lassi'}),
   frozenset({'Milk'})},
  2: {frozenset({'Coffee Powder', 'Lassi'}),
   frozenset({'Ghee', 'Milk'}),
   frozenset({'Lassi', 'Milk'}),
   frozenset({'Sugar', 'Yougurt'}),
   frozenset({'Cheese', 'Yougurt'}),
   frozenset({'Bread', 'Coffee Powder'}),
   frozenset({'Lassi', 'Sweet'}),
   frozenset({'Coffee Powder', 'Milk'}),
   frozenset({'Bread', 'Yougurt'}),
   frozenset({'Cheese', 'Coffee Powder'}),
   frozenset({'Coffee Powder', 'Ghee'}),
   frozenset({'Bread', 'Sweet'}),
   frozenset({'Milk', 'Sweet'}),
   frozenset({'Bread', 'Milk'}),
   frozenset({'Butter', 'Yougurt'}),
   frozenset({'Butter', 'Sweet'}),
   frozenset({'Ghee', 'Panner'}),
   frozenset({'Butter', 'Ghee'}),
   frozenset({'Bread