## Generating Association rule using Apriori Algorithm and Brute Force

In [14]:
# connect your personal google drive to store dataset and trained model
#from google.colab import drive
# drive.mount('/content/gdrive/')

In [15]:
!pip install efficient-apriori

In [16]:
import time
import collections
import sys
import csv
from itertools import combinations
from efficient_apriori import apriori

### Define all Functions

In [17]:
def read_data(file_loc):
  file = open(file_loc,'r')
  transactions = list(csv.reader(file, delimiter=','))
  file.close()
  return transactions

In [18]:
def print_itemcount_table(itemDict):
  print ("{:<90} {:<10}".format('ITEM', 'COUNT'))
  for key, value in itemDict.items():
    count= value
    print ("{:<90} {:<10}".format(str(key), count))

def print_itemsupport_table(itemDict):
  print ("{:<90} {:<10}".format('ITEM', 'Support'))
  for key, value in itemDict.items():
    count= value
    print ("{:<90} {:<10}".format(str(key), count))

def print_list(plist):
  for pl in plist:
    print(pl)

def print_dict_set(itemDict):
  item_count = 0
  for key, value in itemDict.items():
    item_count = item_count + len(value)
    for i in value:
      print (i)
  return item_count

In [19]:
def apriori_prune(itemset, minSupport, transactions):
    frequent_itemset = []
    itemcount = {}
    itemsupport = {}
    for item in itemset:
        count = 0
        for trans in transactions:
            if set(item).issubset(set(trans)):
                count += 1
        support = count / len(transactions)
        if support >= minSupport:
            frequent_itemset.append(item)
            itemcount[tuple(item)] = count
            itemsupport[tuple(item)] = support
    return frequent_itemset, itemcount, itemsupport

In [20]:
def generate_candidate_set(frequent_itemset):
    candidate_set = []
    for i in range(len(frequent_itemset)):
        for j in range(i + 1, len(frequent_itemset)):
            itemset1 = frequent_itemset[i]
            itemset2 = frequent_itemset[j]
            if itemset1[:-1] == itemset2[:-1]:
                candidate_set.append(itemset1 + [itemset2[-1]])
    return candidate_set


In [21]:
def generate_rules(transactions, frequent_itemset, itemcount, minConfidence):
    association_rules = []
    for itemset in frequent_itemset:
        for i in range(1, len(itemset)):
            for subset in combinations(itemset, i):
                antecedent = list(subset)
                consequent = list(set(itemset) - set(subset))
                support = itemcount[tuple(itemset)] / len(transactions)
                confidence = itemcount[tuple(itemset)] / itemcount[tuple(antecedent)]
                if confidence >= minConfidence:
                    association_rules.append((antecedent, consequent, support, confidence))
    return association_rules

In [22]:
def cross_verify(filename, minConfidence, minSupport):
  transact = read_data(filename)
  itemsets, rules = apriori(transact, minSupport,minConfidence)
  print('\nCross Verification Results','\n_______\n')
  print('\nFrequent Itemset')
  frequent_itemset_count = print_dict_set(itemsets)
  print('\nFrequent Itemset Total Count : ',frequent_itemset_count)
  print('\nAssociation Rules')
  print_list(rules)
  print('\nAssociation Rules - Total Count : ',len(rules))



In [23]:
def apriori_main(filename, minConfidence, minSupport):

  #Start the timer
  starttime=time.time()

  master_frequent_itemset = []

  # Print all transactions and populate itemset
  itemset =  []
  print('\nAll Input transactions for Database','\n_______\n')
  transactions = read_data(filename)
  for trans in transactions:
    print(trans)
    for item in trans:
      if [item] not in itemset:
        itemset.append([item])

  # Iter 1 - Calculate the Item count and apply apriori pruning logic
  # to remove non frequent items
  iter = 1
  frequent_itemset, itemcount, itemsupport = apriori_prune(itemset, minSupport, transactions)
  master_frequent_itemset.extend(frequent_itemset)
  print('\nL1 - After removing the non frequent items','\n_______\n')
  print_itemcount_table(itemcount)
  #print('\nL1 Support','\n_______\n')
  #print_itemsupport_table(itemsupport)

  # For further iterations as long as there is some frequent_itemset
  iter = 2
  while frequent_itemset:
    candidate_set = generate_candidate_set(frequent_itemset)
    frequent_itemset, itemcount_iter, itemsupport = apriori_prune(candidate_set, minSupport, transactions)
    itemcount.update(itemcount_iter)
    master_frequent_itemset.extend(frequent_itemset)
    print('\nL',iter,' - After removing the non frequent items','\n_______\n')
    print_itemcount_table(itemcount_iter)
    #print('\nL',iter,' - Support','\n_______\n')
    #print_itemsupport_table(itemsupport)
    iter = iter + 1

  print('\nFinal Frequent Itemset','\n_______\n')
  print_list(master_frequent_itemset)
  print('\nFrequent Itemset Total Count : ',len(master_frequent_itemset))

  #Generate Association rules
  association_rules_list = generate_rules(transactions,master_frequent_itemset, itemcount, minConfidence)
  print("\nAssociation Rules",'\n_______\n')
  for antecedent, consequent, support, confidence in association_rules_list:
    print(antecedent, "=>", consequent,"    ", "(Support:", support, "Confidence:", confidence, ")")
  print('\nAssociation Rules Total Count : ',len(association_rules_list))

  #End the timer
  endtime=time.time()
  print("\nTime taken by Apriori Algorithm:",endtime-starttime,"seconds")

  return master_frequent_itemset,association_rules_list



In [24]:
def count_occurrences(itemset, transactions):
    count = 0
    itemcount = 0
    for transaction in transactions:
        if set(itemset).issubset(set(transaction)):
            count += 1
    return count

def generate_frequent_itemset_brute(transactions, minSupport):
    itemset = []
    frequent_itemset = []
    num_transactions = len(transactions)
    itemcount = {}

    # Generate all possible itemsets of different sizes
    for trans in transactions:
        for item in trans:
            if [item] not in itemset:
                itemset.append([item])

    k = 2
    while itemset:
        frequent_itemset_k = []

        # Generate candidate itemsets of size k
        for item in itemset:
            for transaction in transactions:
                if set(item).issubset(set(transaction)):
                    count = count_occurrences(item, transactions)
                    support = count / num_transactions
                    if support >= minSupport:
                        frequent_itemset_k.append(item)
                        itemcount[tuple(item)] = count
                        break

        # If no frequent itemsets of size k found, stop the process
        if not frequent_itemset_k:
            break

        # Append frequent itemsets of size k to the final frequent itemsets list
        frequent_itemset.extend(frequent_itemset_k)

        # Generate new itemsets for the next iteration
        itemset = []
        for itemset1 in frequent_itemset_k:
            for itemset2 in frequent_itemset_k:
                new_itemset = sorted(list(set(itemset1).union(set(itemset2))))
                if len(new_itemset) == k and new_itemset not in itemset:
                    itemset.append(new_itemset)

        # Print the frequent item set list
        print('\nFrequent itemset - k:',k-1,'\n_______\n')
        for i in frequent_itemset_k:
          print(i)

        k += 1

    return frequent_itemset, itemcount

def bruteforce_main(filename, minConfidence, minSupport):

  #Start the timer
  starttime=time.time()

  master_frequent_itemset = []

  # Print all transactions and populate itemset
  itemset =  []
  print('\nAll Input transactions for Database','\n_______\n')
  transactions = read_data(filename)
  for trans in transactions:
    print(trans)
    for item in trans:
      if [item] not in itemset:
        itemset.append([item])

  # Generate Frequent Itemset using Bruteforce
  frequent_itemset, itemcount =generate_frequent_itemset_brute(transactions, minSupport)
  print('\nFinal Frequent Itemset','\n_______\n')
  print_list(frequent_itemset)
  print('\nFrequent Itemset Total Count : ',len(frequent_itemset))

  #Generate Association rules
  association_rules_list = generate_rules(transactions, frequent_itemset, itemcount, minConfidence)
  print("\nAssociation Rules",'\n_______\n')
  for antecedent, consequent, support, confidence in association_rules_list:
    print(antecedent, "=>", consequent,"    ", "(Support:", support, "Confidence:", confidence, ")")
  print('\nAssociation Rules Total Count : ',len(association_rules_list))

  #End the timer
  endtime=time.time()
  print("\nTime taken by Brute Force Method:",endtime-starttime,"seconds")

  return master_frequent_itemset,association_rules_list


### Main Initiator Function

In [25]:
def apriori_initiator(inputfiles,outputfiles):
  counter = 1
  for filename in inputfiles:
   minConfidence = float(input('Enter minimum Confidence:'))
   minSupport = float(input('Enter minimum Support:'))
   orig_stdout = sys.stdout
   file = outputfiles[counter-1]
   sys.stdout = open(file,"w")

   # Accepting user inputs here
   print('\nDatabase',counter,'\n_______\n')
   print('Minimum Confidence: ',minConfidence)
   print('Minimum Support: ',minSupport)

   # Start of the apriori main program
   print('\nApriori Algorithm output','\n______________________\n')
   apriori_main(filename, minConfidence, minSupport)

   # Cross verification using Apriori library
   print('\nCross Verification output','\n______________________\n')
   cross_verify(filename, minConfidence, minSupport)

   # Start of the brute force main program
   print('\nBrute Force Method output','\n______________________\n')
   bruteforce_main(filename, minConfidence, minSupport)


   counter = counter+1
   sys.stdout.close()
   sys.stdout=orig_stdout

### Commands to invoke the Initiator function
#### Input Files and Output Files are needed for the function to be invoked

In [26]:
# Command For Passing in the Input/Output file names to the initiator program
#outputfiles=['DB1Output.txt','DB2Output.txt', 'DB3Output.txt',
#             'DB4Output.txt','DB5Output.txt']
#inputfiles=['GroceryDB1.csv','GroceryDB2.csv','GroceryDB3.csv',
#            'GroceryDB4.csv','GroceryDB5.csv']
#
## Five input datasets
## Five output datasets - Each will have the printed transactions, frequent itemsets
## and generated rules for user chosen minimum support and minimum confidence
#apriori_initiator(inputfiles,outputfiles)


In [27]:
# Command For Passing in the Input/Output file names to the initiator program
outputfiles=['FinalExamPut.txt']
inputfiles=['FinalExam.csv']

# Five input datasets
# Five output datasets - Each will have the printed transactions, frequent itemsets
# and generated rules for user chosen minimum support and minimum confidence
apriori_initiator(inputfiles,outputfiles)


Enter minimum Confidence:0.75
Enter minimum Support:0.5


These commands invoke the apriori_initiator function which then prompts for Minimum Support and Minimum Confidence.