
## Basic I/O : Implement basic I/O function that can read the data from the dataset and write the results to a file.


In [130]:
# Loading required libraries
import pandas as pd
import numpy as np
from itertools import chain, combinations
import time

### As part of Basic input, I have created two functions:
    Function 1(read_as_dataframe) - Reads the data from csv file and returns a dataframe
    Function 2(read_as_list) - Reads the data from csv or txt file and returns a list

In [77]:
# read the data from the dataset using read_csv from pandas, read stabilized with engine
def read_as_dataframe(filename):
    cols = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]# at most 11 columns in the dataset, can be adopted to all datasets
    df = pd.read_csv(filename, names = cols, engine = 'python')
    return df

In [78]:
# Reading Data - flexible to use txt or csv file which reads the line and strips the unnecessary data, split and form a list.
# The FM creates a list of Transaction Database
def read_as_list(file):
    list_out = list()
    
    with open(file) as f:
        row_lines = f.readlines()
   
    for i in row_lines:
        single_line = i.strip().strip(",")
        list_out.append(single_line.split(','))
        
    return list_out

#### Reading/Input demonstrated in using the function calls below:

In [79]:
# read the file into df by passing filename(parameter)
df = read_as_dataframe("GroceryStore.csv")
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,Lassi,Coffee Powder,Butter,Yougurt,Ghee,Cheese,,,,,
1,Ghee,Coffee Powder,,,,,,,,,
2,Lassi,Tea Powder,Butter,Cheese,,,,,,,
3,Cheese,Tea Powder,Panner,Coffee Powder,Butter,Bread,,,,,
4,Cheese,Yougurt,Coffee Powder,Sugar,Butter,Sweet,,,,,


In [80]:
# read the file to create a list
input_list = read_as_list('GroceryStore.csv')
input_list

[['Lassi', 'Coffee Powder', 'Butter', 'Yougurt', 'Ghee', 'Cheese'],
 ['Ghee', 'Coffee Powder'],
 ['Lassi', 'Tea Powder', 'Butter', 'Cheese'],
 ['Cheese', 'Tea Powder', 'Panner', 'Coffee Powder', 'Butter', 'Bread'],
 ['Cheese', 'Yougurt', 'Coffee Powder', 'Sugar', 'Butter', 'Sweet'],
 ['Sugar', 'Tea Powder', 'Ghee', 'Sweet', 'Panner', 'Milk'],
 ['Sweet', 'Coffee Powder'],
 ['Butter', 'Ghee', 'Panner'],
 ['Sweet', 'Tea Powder', 'Butter', 'Yougurt', 'Sugar', 'Cheese'],
 ['Panner', 'Ghee'],
 ['Milk', 'Panner', 'Tea Powder', 'Sweet', 'Bread'],
 ['Ghee',
  'Coffee Powder',
  'Milk',
  'Yougurt',
  'Lassi',
  'Sugar',
  'Butter',
  'Panner'],
 ['Butter', 'Coffee Powder', 'Panner', 'Sweet', 'Ghee', 'Lassi'],
 ['Bread',
  'Lassi',
  'Coffee Powder',
  'Tea Powder',
  'Sweet',
  'Ghee',
  'Sugar',
  'Panner'],
 ['Milk', 'Sweet', 'Butter', 'Sugar', 'Lassi', 'Panner'],
 ['Bread', 'Coffee Powder', 'Tea Powder'],
 ['Butter', 'Ghee', 'Milk', 'Cheese'],
 ['Bread', 'Coffee Powder'],
 ['Cheese', 'Tea Po

In [81]:
# Using the textbook example to demonstrate the functionalities
df_example = read_as_dataframe("ex.csv")
df_example.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,I1,I2,I5,,,,,,,,
1,I2,I4,,,,,,,,,
2,I2,I3,,,,,,,,,
3,I1,I2,I4,,,,,,,,
4,I1,I3,,,,,,,,,


In [82]:
input_list_example = read_as_list('ex.csv')
input_list_example

[['I1', 'I2', 'I5'],
 ['I2', 'I4'],
 ['I2', 'I3'],
 ['I1', 'I2', 'I4'],
 ['I1', 'I3'],
 ['I2', 'I3'],
 ['I1', 'I3'],
 ['I1', 'I2', 'I3', 'I5'],
 ['I1', 'I2', 'I3']]

### Basic Output: Writing a data frame to a file
    
     As part of output, 1. the function is created to write a dataframe into a text file
     2. Writing FP tree into a file

In [83]:
# writing data to a file
def writetoafile(filename,df):
    with open(filename, 'w') as f:
        df.to_csv(filename)

In [84]:
# writing FP tree to a file
def write_tree(filename,fptree):
    file1 = open(filename,"w")
    file1.write(str(fptree))
    file1.close()

In [137]:
def write_list(filename,list_items):
    with open(filename, 'w') as filehandle:
        for list_item in list_items:
            filehandle.write('%s\n' % list_item)

#### Writing as output demonstrated below:

In [None]:
# Writing outputs to file

In [139]:
# writing dataframe values into a file
writetoafile('output_file.txt',df)

In [140]:
# writing frequent itemlists into a file
write_list("freq_itemsets_fp",all_frequent_itemsets_fp)

In [141]:
# write FP tree into a file
write_tree("fptree.txt",fptree)


## Frequent Itemset : Find all possible 2, 3, 4 and 5-itemsets given the parameter of minimum-support.

The Frequent Itemsets are created using three different algorithms which is implemented in this notebook:
    1. Brute-force approach - Creates all possible combinations and prunes using minimum support
    2. Apriori approach - Implements Apriori algorithm to generate frequent itemsets
    3. FP Growth approach - Implements FP Growth algorithm to generate frequent itemsets

### This task provides the implementation of Brute-Force algorithm to generate frequent itemsets - 2,3,4 and 5 itemsets by taking minimum support as input parameter

In [None]:
# I have used a modular approach where the functions can be re-used which increases the scalability and achieves literate coding
# enhancing the readility of the implementation

In [85]:
# function to calculate the frequency of occurence of the item_sets

def freq(df,item_sets):
    count_list = [0] * len(item_sets)
    item_list = df.values.tolist()
    count = 0
    support_set = {}
    for i,k in zip(item_sets,range(len(item_sets))):
        for j in range(len(df)):
                if(set(i).issubset(set(item_list[j]))): 
                         count += 1
        count_list[k] = count
        count = 0
    return count_list

In [87]:
# function to generate frequent items given the item-set, support counts and minimum support

def generate_frequent_itemset(count,comb_list,min_sup):
    freq_list = list()
    infreq_list = list()
    for item,i in zip(comb_list,range(len(comb_list))):
        if count[i]>=min_sup:
            freq_list.append(item)
        else:
            infreq_list.append(item)
    return freq_list,infreq_list

In [88]:
# The brute force algorithm implemented below takes a dataframe, minimum support and number of itemsets as input and returns a 
# frequent item-sets as output

def brute_force_frequent_itemset(number_itemset,df,min_sup):
    
    list_comb = list(((df['0'].append(df['1']).append(df['2']).append(df['3']).append(df['4']).append(df['5']).append(df['6']).append(df['7']).append(df['8']).append(df['9']).append(df['10'])).unique()))
    list_comb.remove(np.nan)
    
    all_freq_itemlist = list()

# Generating n-itemset by obtaining unique values
    for i in range(1,number_itemset+1):
        
        # Generating combinations
        comb = combinations(list_comb,i)
        comb_list = list(comb)
        
        count = freq(df,comb_list)
        freq_list,infreq_list = generate_frequent_itemset(count,comb_list,min_sup)
        
        if freq_list:
            all_freq_itemlist.append(freq_list)
    
    return all_freq_itemlist

### Illustration of Brute-force approach to generate frequent item-sets: 
### The two datasets are used to demonstrate the generation of outputs for all the algorithms.
        1. Grocery store provided as part of coursework
        2. Textbook example to demonstrate the working of different dataset

In [89]:
# Using a textbook example to generate frequent itemsets
# parameter 1 - number of itemsets. example - given 3, the frequent itemsets 1,2,3 are generated
freq_itemsets_example = brute_force_frequent_itemset(3,df_example,2)

In [90]:
freq_itemsets_example

[[('I1',), ('I2',), ('I4',), ('I3',), ('I5',)],
 [('I1', 'I2'),
  ('I1', 'I3'),
  ('I1', 'I5'),
  ('I2', 'I4'),
  ('I2', 'I3'),
  ('I2', 'I5')],
 [('I1', 'I2', 'I3'), ('I1', 'I2', 'I5')]]

In [91]:
freq_itemsets0 = brute_force_frequent_itemset(1,df,1250)

In [92]:
freq_itemsets0

[[('Lassi',),
  ('Ghee',),
  ('Cheese',),
  ('Sugar',),
  ('Sweet',),
  ('Butter',),
  ('Panner',),
  ('Milk',),
  ('Bread',),
  ('Coffee Powder',),
  ('Yougurt',),
  ('Tea Powder',)]]

In [93]:
# Using the dataset provided as part of coursework
# Upto 3-itemsets generation, min_sup is 2300
freq_itemsets1 = brute_force_frequent_itemset(3,df,2300)

In [94]:
freq_itemsets1

[[('Lassi',),
  ('Ghee',),
  ('Cheese',),
  ('Sugar',),
  ('Sweet',),
  ('Butter',),
  ('Panner',),
  ('Milk',),
  ('Bread',),
  ('Coffee Powder',),
  ('Yougurt',),
  ('Tea Powder',)],
 [('Lassi', 'Ghee'),
  ('Lassi', 'Cheese'),
  ('Lassi', 'Sugar'),
  ('Lassi', 'Sweet'),
  ('Lassi', 'Butter'),
  ('Lassi', 'Panner'),
  ('Lassi', 'Milk'),
  ('Lassi', 'Bread'),
  ('Lassi', 'Coffee Powder'),
  ('Lassi', 'Yougurt'),
  ('Lassi', 'Tea Powder'),
  ('Ghee', 'Cheese'),
  ('Ghee', 'Sugar'),
  ('Ghee', 'Sweet'),
  ('Ghee', 'Butter'),
  ('Ghee', 'Panner'),
  ('Ghee', 'Milk'),
  ('Ghee', 'Bread'),
  ('Ghee', 'Coffee Powder'),
  ('Ghee', 'Yougurt'),
  ('Ghee', 'Tea Powder'),
  ('Cheese', 'Sugar'),
  ('Cheese', 'Sweet'),
  ('Cheese', 'Butter'),
  ('Cheese', 'Panner'),
  ('Cheese', 'Milk'),
  ('Cheese', 'Bread'),
  ('Cheese', 'Coffee Powder'),
  ('Cheese', 'Yougurt'),
  ('Cheese', 'Tea Powder'),
  ('Sugar', 'Sweet'),
  ('Sugar', 'Butter'),
  ('Sugar', 'Panner'),
  ('Sugar', 'Milk'),
  ('Sugar', 'Bread

In [95]:
freq_itemsets2 = brute_force_frequent_itemset(5,df,280)

In [96]:
freq_itemsets2

[[('Lassi',),
  ('Ghee',),
  ('Cheese',),
  ('Sugar',),
  ('Sweet',),
  ('Butter',),
  ('Panner',),
  ('Milk',),
  ('Bread',),
  ('Coffee Powder',),
  ('Yougurt',),
  ('Tea Powder',)],
 [('Lassi', 'Ghee'),
  ('Lassi', 'Cheese'),
  ('Lassi', 'Sugar'),
  ('Lassi', 'Sweet'),
  ('Lassi', 'Butter'),
  ('Lassi', 'Panner'),
  ('Lassi', 'Milk'),
  ('Lassi', 'Bread'),
  ('Lassi', 'Coffee Powder'),
  ('Lassi', 'Yougurt'),
  ('Lassi', 'Tea Powder'),
  ('Ghee', 'Cheese'),
  ('Ghee', 'Sugar'),
  ('Ghee', 'Sweet'),
  ('Ghee', 'Butter'),
  ('Ghee', 'Panner'),
  ('Ghee', 'Milk'),
  ('Ghee', 'Bread'),
  ('Ghee', 'Coffee Powder'),
  ('Ghee', 'Yougurt'),
  ('Ghee', 'Tea Powder'),
  ('Cheese', 'Sugar'),
  ('Cheese', 'Sweet'),
  ('Cheese', 'Butter'),
  ('Cheese', 'Panner'),
  ('Cheese', 'Milk'),
  ('Cheese', 'Bread'),
  ('Cheese', 'Coffee Powder'),
  ('Cheese', 'Yougurt'),
  ('Cheese', 'Tea Powder'),
  ('Sugar', 'Sweet'),
  ('Sugar', 'Butter'),
  ('Sugar', 'Panner'),
  ('Sugar', 'Milk'),
  ('Sugar', 'Bread

In [70]:
freq_itemsets3 = brute_force_frequent_itemset(5,df,280)

In [71]:
freq_itemsets3

[[('Lassi',),
  ('Ghee',),
  ('Cheese',),
  ('Sugar',),
  ('Sweet',),
  ('Butter',),
  ('Panner',),
  ('Milk',),
  ('Bread',),
  ('Coffee Powder',),
  ('Yougurt',),
  ('Tea Powder',)],
 [('Lassi', 'Ghee'),
  ('Lassi', 'Cheese'),
  ('Lassi', 'Sugar'),
  ('Lassi', 'Sweet'),
  ('Lassi', 'Butter'),
  ('Lassi', 'Panner'),
  ('Lassi', 'Milk'),
  ('Lassi', 'Bread'),
  ('Lassi', 'Coffee Powder'),
  ('Lassi', 'Yougurt'),
  ('Lassi', 'Tea Powder'),
  ('Ghee', 'Cheese'),
  ('Ghee', 'Sugar'),
  ('Ghee', 'Sweet'),
  ('Ghee', 'Butter'),
  ('Ghee', 'Panner'),
  ('Ghee', 'Milk'),
  ('Ghee', 'Bread'),
  ('Ghee', 'Coffee Powder'),
  ('Ghee', 'Yougurt'),
  ('Ghee', 'Tea Powder'),
  ('Cheese', 'Sugar'),
  ('Cheese', 'Sweet'),
  ('Cheese', 'Butter'),
  ('Cheese', 'Panner'),
  ('Cheese', 'Milk'),
  ('Cheese', 'Bread'),
  ('Cheese', 'Coffee Powder'),
  ('Cheese', 'Yougurt'),
  ('Cheese', 'Tea Powder'),
  ('Sugar', 'Sweet'),
  ('Sugar', 'Butter'),
  ('Sugar', 'Panner'),
  ('Sugar', 'Milk'),
  ('Sugar', 'Bread

In [168]:
freq_itemsets_big = brute_force_frequent_itemset(12,df,50)

In [169]:
freq_itemsets_big

[[('Lassi',),
  ('Ghee',),
  ('Cheese',),
  ('Sugar',),
  ('Sweet',),
  ('Butter',),
  ('Panner',),
  ('Milk',),
  ('Bread',),
  ('Coffee Powder',),
  ('Yougurt',),
  ('Tea Powder',)],
 [('Lassi', 'Ghee'),
  ('Lassi', 'Cheese'),
  ('Lassi', 'Sugar'),
  ('Lassi', 'Sweet'),
  ('Lassi', 'Butter'),
  ('Lassi', 'Panner'),
  ('Lassi', 'Milk'),
  ('Lassi', 'Bread'),
  ('Lassi', 'Coffee Powder'),
  ('Lassi', 'Yougurt'),
  ('Lassi', 'Tea Powder'),
  ('Ghee', 'Cheese'),
  ('Ghee', 'Sugar'),
  ('Ghee', 'Sweet'),
  ('Ghee', 'Butter'),
  ('Ghee', 'Panner'),
  ('Ghee', 'Milk'),
  ('Ghee', 'Bread'),
  ('Ghee', 'Coffee Powder'),
  ('Ghee', 'Yougurt'),
  ('Ghee', 'Tea Powder'),
  ('Cheese', 'Sugar'),
  ('Cheese', 'Sweet'),
  ('Cheese', 'Butter'),
  ('Cheese', 'Panner'),
  ('Cheese', 'Milk'),
  ('Cheese', 'Bread'),
  ('Cheese', 'Coffee Powder'),
  ('Cheese', 'Yougurt'),
  ('Cheese', 'Tea Powder'),
  ('Sugar', 'Sweet'),
  ('Sugar', 'Butter'),
  ('Sugar', 'Panner'),
  ('Sugar', 'Milk'),
  ('Sugar', 'Bread


## Associated Rule : Find all interesting association rules from the frequent item-sets given the parameter of minimum-confidence.

The association rules are created using only the frequent item-sets generated using various algorithms as mentioned above.
The associatioin function takes dataframe(database of transactions), frequent_itemsets and the minimum-confidence to generate
interesting association rules

In [148]:
# Frequency - Count value of itemsets is determined by reusing the function created for brute-force approach, the count is used
# to calculate association rules, the scan of frequent itemsets is started from the maximum frequent sets and working upwards 
# till 1-frequent itemsets to generate all possible association rules given the minimum confidence percentage value.

def associations(df,itemsets,min_confidence_percent,filename='test_association_values.txt'): 

    file_association = open(filename,'w')
    confidence = float(min_confidence_percent)/100
    calc_confidence = 0.0
    
#   Determining count of values in frequent itemsets
    count = list()
    for k in itemsets:
        c = freq(df,k)
        count.append(c)
        
#   Computing association rules
    n = len(itemsets)-1
    n1 = n 
    n2 = n
    for i in range(n):
        for k in range(len(itemsets[n2])):
            item_count = count[n2][k]
            item = itemsets[n2][k]
            n1 = n2
            for l in range(n2):
                for j in range(len(itemsets[n1-1])):
                    lower_item_count = count[n1-1][j]
                    lower_item = itemsets[n1-1][j]
                    if set(lower_item).issubset(set(item)):
                        calc_confidence = item_count/lower_item_count
                        if calc_confidence >= confidence:
                            print("rule:"+str(lower_item)+"->"+str((set(item)-set(lower_item)))+"->"+str(calc_confidence*100))
                            file_association.write("\nrule:"+str(lower_item)+"->"+str((set(item)-set(lower_item)))+"->"+str(calc_confidence*100))
                n1 = n1-1
        n2 = n2-1

### Illustration of Association generation from frequent item-sets: 

In [99]:
# Using textbook example to produce the Association rules
# parameter - frequent itemsets and minimum confidence
associations(df_example,freq_itemsets_example,50)

rule:('I1', 'I2')->{'I3'}->50.0
rule:('I1', 'I3')->{'I2'}->50.0
rule:('I2', 'I3')->{'I1'}->50.0
rule:('I1', 'I2')->{'I5'}->50.0
rule:('I1', 'I5')->{'I2'}->100.0
rule:('I2', 'I5')->{'I1'}->100.0
rule:('I5',)->{'I1', 'I2'}->100.0
rule:('I1',)->{'I2'}->66.66666666666666
rule:('I2',)->{'I1'}->57.14285714285714
rule:('I1',)->{'I3'}->66.66666666666666
rule:('I3',)->{'I1'}->66.66666666666666
rule:('I5',)->{'I1'}->100.0
rule:('I4',)->{'I2'}->100.0
rule:('I2',)->{'I3'}->57.14285714285714
rule:('I3',)->{'I2'}->66.66666666666666
rule:('I5',)->{'I2'}->100.0


In [34]:
# associations(df,freq_itemsets2,50)

In [149]:
# Computing Associations for minimum confidence of 50 percent for 1,2,3,4 and 5 frequent item-sets
associations(df,freq_itemsets2,50)

rule:('Lassi', 'Ghee', 'Cheese', 'Bread')->{'Coffee Powder'}->50.78534031413613
rule:('Lassi', 'Ghee', 'Cheese', 'Coffee Powder')->{'Bread'}->50.696864111498265
rule:('Lassi', 'Cheese', 'Bread', 'Coffee Powder')->{'Ghee'}->50.172413793103445
rule:('Lassi', 'Ghee', 'Butter', 'Milk')->{'Sugar'}->50.08695652173913
rule:('Lassi', 'Ghee', 'Butter', 'Yougurt')->{'Sugar'}->50.357142857142854
rule:('Ghee', 'Sugar', 'Panner', 'Milk')->{'Lassi'}->50.0
rule:('Lassi', 'Ghee', 'Sugar', 'Panner')->{'Bread'}->50.255536626916516
rule:('Lassi', 'Ghee', 'Sugar', 'Bread')->{'Panner'}->51.393728222996515
rule:('Lassi', 'Sugar', 'Panner', 'Bread')->{'Ghee'}->50.513698630136986
rule:('Ghee', 'Sugar', 'Panner', 'Bread')->{'Lassi'}->50.34129692832765
rule:('Ghee', 'Sugar', 'Milk', 'Bread')->{'Lassi'}->50.35460992907801
rule:('Lassi', 'Ghee', 'Sugar', 'Tea Powder')->{'Milk'}->51.056338028169016
rule:('Lassi', 'Ghee', 'Milk', 'Tea Powder')->{'Sugar'}->50.6993006993007
rule:('Ghee', 'Sugar', 'Milk', 'Tea Powder'

In [100]:
freq_itemsets4 = brute_force_frequent_itemset(5,df,5000)

In [101]:
freq_itemsets4

[[('Lassi',),
  ('Ghee',),
  ('Cheese',),
  ('Sugar',),
  ('Sweet',),
  ('Butter',),
  ('Panner',),
  ('Milk',),
  ('Bread',),
  ('Coffee Powder',),
  ('Yougurt',),
  ('Tea Powder',)]]

In [102]:
associations(df,freq_itemsets4,50)

In [103]:
freq_itemsets5 = brute_force_frequent_itemset(5,df,1250)

In [104]:
freq_itemsets5

[[('Lassi',),
  ('Ghee',),
  ('Cheese',),
  ('Sugar',),
  ('Sweet',),
  ('Butter',),
  ('Panner',),
  ('Milk',),
  ('Bread',),
  ('Coffee Powder',),
  ('Yougurt',),
  ('Tea Powder',)],
 [('Lassi', 'Ghee'),
  ('Lassi', 'Cheese'),
  ('Lassi', 'Sugar'),
  ('Lassi', 'Sweet'),
  ('Lassi', 'Butter'),
  ('Lassi', 'Panner'),
  ('Lassi', 'Milk'),
  ('Lassi', 'Bread'),
  ('Lassi', 'Coffee Powder'),
  ('Lassi', 'Yougurt'),
  ('Lassi', 'Tea Powder'),
  ('Ghee', 'Cheese'),
  ('Ghee', 'Sugar'),
  ('Ghee', 'Sweet'),
  ('Ghee', 'Butter'),
  ('Ghee', 'Panner'),
  ('Ghee', 'Milk'),
  ('Ghee', 'Bread'),
  ('Ghee', 'Coffee Powder'),
  ('Ghee', 'Yougurt'),
  ('Ghee', 'Tea Powder'),
  ('Cheese', 'Sugar'),
  ('Cheese', 'Sweet'),
  ('Cheese', 'Butter'),
  ('Cheese', 'Panner'),
  ('Cheese', 'Milk'),
  ('Cheese', 'Bread'),
  ('Cheese', 'Coffee Powder'),
  ('Cheese', 'Yougurt'),
  ('Cheese', 'Tea Powder'),
  ('Sugar', 'Sweet'),
  ('Sugar', 'Butter'),
  ('Sugar', 'Panner'),
  ('Sugar', 'Milk'),
  ('Sugar', 'Bread

In [105]:
associations(df,freq_itemsets5,50)

rule:('Lassi', 'Panner')->{'Sweet'}->50.66079295154186
rule:('Sweet', 'Panner')->{'Lassi'}->50.49900199600798


In [106]:
freq_itemsets6 = brute_force_frequent_itemset(5,df,2456)

In [107]:
freq_itemsets6

[[('Lassi',),
  ('Ghee',),
  ('Cheese',),
  ('Sugar',),
  ('Sweet',),
  ('Butter',),
  ('Panner',),
  ('Milk',),
  ('Bread',),
  ('Coffee Powder',),
  ('Yougurt',),
  ('Tea Powder',)],
 [('Lassi', 'Ghee'),
  ('Lassi', 'Cheese'),
  ('Lassi', 'Sugar'),
  ('Lassi', 'Sweet'),
  ('Lassi', 'Butter'),
  ('Lassi', 'Panner'),
  ('Lassi', 'Milk'),
  ('Lassi', 'Bread'),
  ('Lassi', 'Coffee Powder'),
  ('Lassi', 'Yougurt'),
  ('Ghee', 'Cheese'),
  ('Ghee', 'Sugar'),
  ('Ghee', 'Sweet'),
  ('Ghee', 'Butter'),
  ('Ghee', 'Panner'),
  ('Ghee', 'Milk'),
  ('Ghee', 'Bread'),
  ('Ghee', 'Coffee Powder'),
  ('Ghee', 'Yougurt'),
  ('Cheese', 'Sugar'),
  ('Cheese', 'Sweet'),
  ('Cheese', 'Butter'),
  ('Cheese', 'Panner'),
  ('Cheese', 'Milk'),
  ('Cheese', 'Bread'),
  ('Cheese', 'Coffee Powder'),
  ('Cheese', 'Yougurt'),
  ('Sugar', 'Sweet'),
  ('Sugar', 'Butter'),
  ('Sugar', 'Panner'),
  ('Sugar', 'Milk'),
  ('Sugar', 'Bread'),
  ('Sugar', 'Coffee Powder'),
  ('Sugar', 'Yougurt'),
  ('Sugar', 'Tea Powder

In [108]:
associations(df,freq_itemsets6,45)

rule:('Lassi',)->{'Ghee'}->46.226067746686304
rule:('Ghee',)->{'Lassi'}->45.57168784029038
rule:('Lassi',)->{'Cheese'}->45.72901325478645
rule:('Cheese',)->{'Lassi'}->45.36157779401023
rule:('Lassi',)->{'Sugar'}->46.078792341678934
rule:('Sugar',)->{'Lassi'}->45.658518788763224
rule:('Lassi',)->{'Sweet'}->47.42268041237113
rule:('Sweet',)->{'Lassi'}->46.98157942732081
rule:('Lassi',)->{'Butter'}->46.0419734904271
rule:('Butter',)->{'Lassi'}->45.63035942346288
rule:('Lassi',)->{'Panner'}->45.968335787923415
rule:('Panner',)->{'Lassi'}->45.867009551800145
rule:('Lassi',)->{'Milk'}->46.741531664212076
rule:('Milk',)->{'Lassi'}->45.946435034382915
rule:('Lassi',)->{'Bread'}->46.134020618556704
rule:('Bread',)->{'Lassi'}->45.69657184536835
rule:('Lassi',)->{'Coffee Powder'}->46.24447717231222
rule:('Coffee Powder',)->{'Lassi'}->45.59811218006898
rule:('Lassi',)->{'Yougurt'}->45.39764359351988
rule:('Cheese',)->{'Ghee'}->45.14243973703433
rule:('Ghee',)->{'Sugar'}->45.66243194192378
rule:('S

In [172]:
associations(df,freq_itemsets_big,55)

rule:('Lassi', 'Ghee', 'Cheese', 'Sugar', 'Panner', 'Coffee Powder')->{'Bread'}->55.00000000000001
rule:('Lassi', 'Ghee', 'Cheese', 'Butter', 'Milk', 'Yougurt')->{'Coffee Powder'}->56.36363636363636
rule:('Lassi', 'Ghee', 'Sugar', 'Panner', 'Milk', 'Coffee Powder')->{'Bread'}->55.3030303030303
rule:('Ghee', 'Sugar', 'Panner', 'Milk', 'Bread', 'Coffee Powder')->{'Lassi'}->57.48031496062992
rule:('Ghee', 'Sweet', 'Panner', 'Milk', 'Bread', 'Tea Powder')->{'Lassi'}->55.172413793103445
rule:('Ghee', 'Sweet', 'Panner', 'Bread', 'Coffee Powder', 'Tea Powder')->{'Lassi'}->57.407407407407405
rule:('Ghee', 'Sweet', 'Panner', 'Coffee Powder', 'Yougurt', 'Tea Powder')->{'Lassi'}->56.310679611650485
rule:('Lassi', 'Sweet', 'Butter', 'Milk', 'Coffee Powder', 'Tea Powder')->{'Sugar'}->57.85123966942148
rule:('Sugar', 'Sweet', 'Panner', 'Milk', 'Coffee Powder', 'Yougurt')->{'Lassi'}->56.14035087719298
rule:('Sugar', 'Butter', 'Panner', 'Milk', 'Bread', 'Coffee Powder')->{'Lassi'}->55.46218487394958
r

## Task 4
## Apriori Algorithm : Use Apriori algorithm for finding frequent itemsets.

The modular approach is again adopted here to split different functions as a module and each module can be combined to 
effectively implement the Apriori algorithm

In [155]:
# overriding frozen to unfreeze

class MyFrozenSet(frozenset):
    def repr(self):
        return '([{}])'.format(', '.join(map(repr, self)))

In [156]:
# unfreeze and display frozensets(not needed)

def unfreeze(item_sets):
    temp = list()
    for i in item_sets:
        temp.append([MyFrozenSet(j) for j in i])
    return temp

In [15]:
# join the sets Lk * Lk

def join(itemset,n):
    set1 = set()
    set2 = set()
    for i in itemset:
        for j in itemset:
            # joining sets to generate length of size n using all the possible subsets
            if( len(i.union(j)) == n ):
                set1 = [i.union(j)]
                set2 = set2.union(set1)
    return set2

In [16]:
# Generate Candidate set by removing infrequent subsets

def candidate(infreq_itemset,joined_set):
    flag = 0
    candidate_set = set()
    for i in joined_set:
        flag = 0
        for j in infreq_itemset:
            if(frozenset.issubset(j,i)):
                # if the joined set contains infrequent subset it is flagged to be removed
                flag = 1
        if flag == 0:
            # only the unflagged frequent sets are added into candidate set
            candidate_set.add(i)
    return candidate_set

In [17]:
# Prune the Candidate set to obtain frequent itemsets

def prune(count,itemset,sup_count=2):
    freq_itemset = set()
    infreq_itemset = set()
    for item,i in zip(itemset,range(len(itemset))):
        # if the count is greater than support count, it is added into frequent itemsets
        if count[i]>=sup_count:
            freq_itemset.add(item)
        else:
            infreq_itemset.add(item)
    return freq_itemset,infreq_itemset 

In [18]:
def apriori(number_itemset,df,min_sup):
    
# Generating 1-itemset by obtaining unique values and pruning using min_support

# Obtaining unique values

    comb1_list = list((df['0'].append(df['1']).append(df['2']).append(df['3']).append(df['4'])).unique())
    comb1_list.remove(np.nan)
    comb1_set = set()
    for item in comb1_list:
        if item:
            comb1_set.add(frozenset([item]))

# pruning using min_support
    
    count1 = freq(df,comb1_set)
    freq_itemset1,infreq_itemset1 = prune(count1,comb1_set,min_sup)
   
    if number_itemset == 1:
        return freq_itemset1
    
# Generating n-itemset
    all_freq_itemset = list()
    all_freq_itemset.append(list(freq_itemset1))
    freq_itemset = freq_itemset1
    infreq_itemset = infreq_itemset1
    comb_set = comb1_set
    
    for i in range(2,number_itemset+1):
        # joining frequent itemsets
        joined_set = join(freq_itemset,i)
        # Candidate set is created by removing infrequent itemsets
        candidate_set = candidate(infreq_itemset,joined_set)
        # support counts of candidate set is obtained
        count = freq(df,candidate_set)
        # support count is used to prune by comparing with minimum support
        freq_itemset,infreq_itemset = prune(count,candidate_set,min_sup)
        
        if freq_itemset:
            all_freq_itemset.append(list(freq_itemset))
        
    return all_freq_itemset

### Illustration of Apriori algorithm to generate frequent item-sets: 

In [74]:
f_itemset_apriori_example = apriori(3,df_example,2)


In [75]:
f_itemset_apriori_example

[[frozenset({'I5'}),
  frozenset({'I3'}),
  frozenset({'I4'}),
  frozenset({'I1'}),
  frozenset({'I2'})],
 [frozenset({'I2', 'I5'}),
  frozenset({'I2', 'I3'}),
  frozenset({'I2', 'I4'}),
  frozenset({'I1', 'I3'}),
  frozenset({'I1', 'I5'}),
  frozenset({'I1', 'I2'})],
 [frozenset({'I1', 'I2', 'I3'}), frozenset({'I1', 'I2', 'I5'})]]

In [72]:
f_itemset_apriori = apriori(5,df,280)

In [73]:
f_itemset_apriori

[[frozenset({'Sugar'}),
  frozenset({'Milk'}),
  frozenset({'Butter'}),
  frozenset({'Bread'}),
  frozenset({'Lassi'}),
  frozenset({'Panner'}),
  frozenset({'Cheese'}),
  frozenset({'Yougurt'}),
  frozenset({'Coffee Powder'}),
  frozenset({'Ghee'}),
  frozenset({'Sweet'}),
  frozenset({'Tea Powder'})],
 [frozenset({'Sugar', 'Sweet'}),
  frozenset({'Butter', 'Coffee Powder'}),
  frozenset({'Panner', 'Tea Powder'}),
  frozenset({'Bread', 'Yougurt'}),
  frozenset({'Cheese', 'Yougurt'}),
  frozenset({'Coffee Powder', 'Yougurt'}),
  frozenset({'Bread', 'Milk'}),
  frozenset({'Bread', 'Sweet'}),
  frozenset({'Butter', 'Cheese'}),
  frozenset({'Lassi', 'Milk'}),
  frozenset({'Lassi', 'Sugar'}),
  frozenset({'Bread', 'Tea Powder'}),
  frozenset({'Milk', 'Sugar'}),
  frozenset({'Cheese', 'Tea Powder'}),
  frozenset({'Lassi', 'Tea Powder'}),
  frozenset({'Coffee Powder', 'Sugar'}),
  frozenset({'Tea Powder', 'Yougurt'}),
  frozenset({'Coffee Powder', 'Milk'}),
  frozenset({'Panner', 'Sugar'}),


In [109]:
associations(df,f_itemset_apriori,50)

rule:frozenset({'Coffee Powder', 'Bread', 'Ghee', 'Panner'})->{'Lassi'}->51.5358361774744
rule:frozenset({'Bread', 'Ghee', 'Lassi', 'Panner'})->{'Coffee Powder'}->50.249584026622294
rule:frozenset({'Coffee Powder', 'Panner', 'Ghee', 'Lassi'})->{'Bread'}->50.58626465661642
rule:frozenset({'Coffee Powder', 'Bread', 'Ghee', 'Lassi'})->{'Panner'}->50.92748735244519
rule:frozenset({'Panner', 'Lassi', 'Cheese', 'Yougurt'})->{'Sweet'}->51.286449399656945
rule:frozenset({'Sweet', 'Lassi', 'Cheese', 'Yougurt'})->{'Panner'}->51.02389078498294
rule:frozenset({'Sweet', 'Panner', 'Cheese', 'Yougurt'})->{'Lassi'}->50.42158516020236
rule:frozenset({'Sweet', 'Panner', 'Lassi', 'Yougurt'})->{'Cheese'}->50.85034013605442
rule:frozenset({'Coffee Powder', 'Lassi', 'Milk', 'Butter'})->{'Sugar'}->50.77989601386482
rule:frozenset({'Coffee Powder', 'Sugar', 'Lassi', 'Butter'})->{'Milk'}->51.493848857644984
rule:frozenset({'Coffee Powder', 'Panner', 'Ghee', 'Cheese'})->{'Bread'}->50.26642984014209
rule:frozens


## FP-Growth Algorithm: Use FP-Growth algorithm for finding frequent itemsets.

In [19]:
#class of FP TREE node
class TreeNode:
    def __init__(self, node_name,count,parentnode):
        self.name = node_name
        self.count = count
        self.node_link = None
        self.parent = parentnode
        self.children = {}
        
    def __str__(self, level=0):
        ret = "\t"*level+repr(self.name)+"\n"
        for child in self.children:
            ret += (self.children[child]).__str__(level+1)
        return ret

    def __repr__(self):
        return '<tree node representation>'
    
    def increment_counter(self, count):
        self.count += count

In [20]:
# Reading Data - flexible to use txt or csv file which reads the line and strips the unnecessary data, split and form a list.
# The FM creates a list of Transaction Database
def read_as_list(file):
    list_out = list()
    
    with open(file) as f:
        row_lines = f.readlines()
   
    1for i in row_lines:
        single_line = i.strip().strip(",")
        list_out.append(single_line.split(','))
        
    return list_out

In [21]:
# To convert initial transaction into frozenset
# Creating a frozen dictionary of Database(transactions) and counting the occurences of transaction - to be used to generate frequent itemsets
def create_frozen_set(database_list):
    dict_frozen_set = {}
    for Tx in database_list:
        if frozenset(Tx) in dict_frozen_set.keys():
            dict_frozen_set[frozenset(Tx)] += 1
        else:
            dict_frozen_set[frozenset(Tx)] = 1
    return dict_frozen_set

In [22]:
#The FP Tree is created using ordered sets
def add_tree_nodes(item_set, fptree, header_table, count):
    if item_set[0] in fptree.children:
        fptree.children[item_set[0]].increment_counter(count)
    else:
        fptree.children[item_set[0]] = TreeNode(item_set[0], count, fptree)

        if header_table[item_set[0]][1] == None:
            header_table[item_set[0]][1] = fptree.children[item_set[0]]
        else:
            add_node_link(header_table[item_set[0]][1], fptree.children[item_set[0]])

    if len(item_set) > 1:
        add_tree_nodes(item_set[1::], fptree.children[item_set[0]], header_table, count)

In [23]:
#The node link is added
def add_node_link(previous_node, next_node):
    while (previous_node.node_link != None):
        previous_node = previous_node.node_link

    previous_node.node_link = next_node

In [24]:
# Generate Frequent Pattern tree
def generate_FP_tree(dict_frozen_set, min_sup):
            
# Creating header table - get previous counter using 'get' and then add that value to the row in consideration to obatin count of each unique item in DB    
    header_table = {}
    for frozen_set in dict_frozen_set:
        for key_item in frozen_set:
            header_table[key_item] = header_table.get(key_item,0) + dict_frozen_set[frozen_set]

# pruning using min_sup to retain only frequent 1-itemsets
    for i in list(header_table):
        if header_table[i] < min_sup:
            del(header_table[i])

# Obtaining only keys which are frequent itemsets
    frequent_itemset = set(header_table.keys())

    if len(frequent_itemset) == 0:
        return None, None

    for j in header_table:
        header_table[j] = [header_table[j], None]
        
    Tree = TreeNode('Null',1,None)
    for item_set,count in dict_frozen_set.items():
        frequent_tx = {}
        for item in item_set:
            if item in frequent_itemset:
                frequent_tx[item] = header_table[item][0]
        if len(frequent_tx) > 0:
            #the transaction itemsets are ordered with respect to support
            ordered_itemset = [v[0] for v in sorted(frequent_tx.items(), key=lambda p: p[1], reverse=True)]
            #the nodes are updated into tree
            add_tree_nodes(ordered_itemset, Tree, header_table, count)
    return Tree, header_table

#### Mining Frequent item-sets by using generated FP Tree

In [25]:
#FP Tree is traversed upwards
def traverse_fptree(leaf_Node, prefix_path):
    if leaf_Node.parent != None:
        prefix_path.append(leaf_Node.name)
        traverse_fptree(leaf_Node.parent, prefix_path)

In [26]:
#returns conditional pattern base(prefix paths)
def find_prefix_path(base_path, tree_node):
    Conditional_patterns_base = {}

    while tree_node != None:
        prefix_path = []
        traverse_fptree(tree_node, prefix_path)
        if len(prefix_path) > 1:
            Conditional_patterns_base[frozenset(prefix_path[1:])] = tree_node.count
        tree_node = tree_node.node_link

    return Conditional_patterns_base

In [27]:
#Condtional FP Tree and Condtional Pattern Base is recursively mined
def mining(fptree, header_table, min_sup, prefix, frequent_itemset):
    FPGen = [v[0] for v in sorted(header_table.items(),key=lambda p: p[1][0])]
    for base_path in FPGen:
        all_frequentset = prefix.copy()
        all_frequentset.add(base_path)
        #appending frequent itemset
        frequent_itemset.append(all_frequentset)
        #obtain conditional pattern bases for itemsets
        Conditional_pattern_bases = find_prefix_path(base_path, header_table[base_path][1])
        #Conditional FP Tree generation
        Conditional_FPTree, Conditional_header = generate_FP_tree(Conditional_pattern_bases,min_sup)

        if Conditional_header != None:
            mining(Conditional_FPTree, Conditional_header, min_sup, all_frequentset, frequent_itemset)

### Illustration of FP Growth algorithm to generate frequent item-sets: 

In [40]:
# Creating FP Tree and header table for the example dataset
# parameters - input list(frozenset), minimum support count value
fptree_example, header_table_example = generate_FP_tree(create_frozen_set(input_list_example), 2)

In [41]:
# Display of Tree showing it as object - tree node representation
fptree_example

<tree node representation>

In [42]:
# printing string values of a tree
str(fptree_example)

"'Null'\n\t'I2'\n\t\t'I1'\n\t\t\t'I5'\n\t\t\t'I4'\n\t\t\t'I3'\n\t\t\t\t'I5'\n\t\t'I4'\n\t\t'I3'\n\t'I1'\n\t\t'I3'\n"

In [44]:
# printing FP Tree
print(fptree_example)

'Null'
	'I2'
		'I1'
			'I5'
			'I4'
			'I3'
				'I5'
		'I4'
		'I3'
	'I1'
		'I3'



In [46]:
# Displaying header table
header_table_example

{'I5': [2, <tree node representation>],
 'I1': [6, <tree node representation>],
 'I2': [7, <tree node representation>],
 'I4': [2, <tree node representation>],
 'I3': [6, <tree node representation>]}

In [50]:
# function call to write FP tree into a file
write_tree("fptreeexample.txt",fptree_example)

In [51]:
# Mining to obtain frequent itemsets
all_frequent_itemsets_example = []
#call function to mine all ferquent itemsets
mining(fptree_example, header_table_example, 2, set([]), all_frequent_itemsets_example)

In [52]:
# Display frequent itemsets
all_frequent_itemsets_example

[{'I5'},
 {'I1', 'I5'},
 {'I2', 'I5'},
 {'I1', 'I2', 'I5'},
 {'I4'},
 {'I2', 'I4'},
 {'I1'},
 {'I1', 'I2'},
 {'I3'},
 {'I2', 'I3'},
 {'I1', 'I2', 'I3'},
 {'I1', 'I3'},
 {'I2'}]

In [114]:
associations(df,[all_frequent_itemsets_example],50)

##### Testing on Dataset

In [115]:
fptree, header_table = generate_FP_tree(create_frozen_set(input_list), 200)

In [116]:
fptree

<tree node representation>

In [117]:
str(fptree)

"'Null'\n\t'Ghee'\n\t\t'Coffee Powder'\n\t\t\t'Yougurt'\n\t\t\t\t'Butter'\n\t\t\t\t\t'Cheese'\n\t\t\t\t\t\t'Lassi'\n\t\t\t\t\t\t\t'Tea Powder'\n\t\t\t\t\t\t'Tea Powder'\n\t\t\t\t\t\t'Panner'\n\t\t\t\t\t\t\t'Lassi'\n\t\t\t\t\t\t\t\t'Tea Powder'\n\t\t\t\t\t\t\t'Tea Powder'\n\t\t\t\t\t'Panner'\n\t\t\t\t\t\t'Tea Powder'\n\t\t\t\t\t\t'Lassi'\n\t\t\t\t\t\t\t'Tea Powder'\n\t\t\t\t\t'Lassi'\n\t\t\t\t\t\t'Tea Powder'\n\t\t\t\t\t'Tea Powder'\n\t\t\t\t'Bread'\n\t\t\t\t\t'Sweet'\n\t\t\t\t\t\t'Sugar'\n\t\t\t\t\t\t\t'Cheese'\n\t\t\t\t\t\t\t\t'Lassi'\n\t\t\t\t\t\t\t\t\t'Tea Powder'\n\t\t\t\t\t\t\t\t'Panner'\n\t\t\t\t\t\t\t\t\t'Lassi'\n\t\t\t\t\t\t\t\t\t\t'Tea Powder'\n\t\t\t\t\t\t\t\t\t'Tea Powder'\n\t\t\t\t\t\t\t\t'Tea Powder'\n\t\t\t\t\t\t\t'Butter'\n\t\t\t\t\t\t\t\t'Tea Powder'\n\t\t\t\t\t\t\t\t'Lassi'\n\t\t\t\t\t\t\t\t\t'Tea Powder'\n\t\t\t\t\t\t\t\t'Panner'\n\t\t\t\t\t\t\t\t\t'Lassi'\n\t\t\t\t\t\t\t\t\t\t'Tea Powder'\n\t\t\t\t\t\t\t\t\t'Tea Powder'\n\t\t\t\t\t\t\t\t'Cheese'\n\t\t\t\t\t\t\t\t\t'L

In [118]:
print(fptree)

'Null'
	'Ghee'
		'Coffee Powder'
			'Yougurt'
				'Butter'
					'Cheese'
						'Lassi'
							'Tea Powder'
						'Tea Powder'
						'Panner'
							'Lassi'
								'Tea Powder'
							'Tea Powder'
					'Panner'
						'Tea Powder'
						'Lassi'
							'Tea Powder'
					'Lassi'
						'Tea Powder'
					'Tea Powder'
				'Bread'
					'Sweet'
						'Sugar'
							'Cheese'
								'Lassi'
									'Tea Powder'
								'Panner'
									'Lassi'
										'Tea Powder'
									'Tea Powder'
								'Tea Powder'
							'Butter'
								'Tea Powder'
								'Lassi'
									'Tea Powder'
								'Panner'
									'Lassi'
										'Tea Powder'
									'Tea Powder'
								'Cheese'
									'Lassi'
									'Panner'
										'Tea Powder'
							'Lassi'
								'Tea Powder'
							'Panner'
								'Tea Powder'
								'Lassi'
									'Tea Powder'
						'Cheese'
							'Panner'
								'Lassi'
									'Tea Powder'
							'Tea Powder'
							'Lassi'
								'Tea Powder'
						'Panner'
							'Lassi

In [119]:
header_table

{'Ghee': [5510, <tree node representation>],
 'Yougurt': [5503, <tree node representation>],
 'Butter': [5481, <tree node representation>],
 'Coffee Powder': [5509, <tree node representation>],
 'Lassi': [5432, <tree node representation>],
 'Cheese': [5476, <tree node representation>],
 'Tea Powder': [5383, <tree node representation>],
 'Bread': [5484, <tree node representation>],
 'Panner': [5444, <tree node representation>],
 'Sugar': [5482, <tree node representation>],
 'Sweet': [5483, <tree node representation>],
 'Milk': [5526, <tree node representation>]}

In [120]:
# function call to write FP tree into a file
write_tree("fptree.txt",fptree)

In [121]:
# Mining to obtain frequent itemsets
all_frequent_itemsets_fp = []
#call function to mine all ferquent itemsets
mining(fptree, header_table, 200, set([]), all_frequent_itemsets_fp)

In [122]:
all_frequent_itemsets_fp

[{'Tea Powder'},
 {'Cheese', 'Tea Powder'},
 {'Cheese', 'Ghee', 'Tea Powder'},
 {'Cheese', 'Ghee', 'Sweet', 'Tea Powder'},
 {'Butter', 'Cheese', 'Ghee', 'Sweet', 'Tea Powder'},
 {'Cheese', 'Coffee Powder', 'Ghee', 'Sweet', 'Tea Powder'},
 {'Cheese', 'Ghee', 'Milk', 'Sweet', 'Tea Powder'},
 {'Cheese', 'Ghee', 'Sugar', 'Sweet', 'Tea Powder'},
 {'Cheese', 'Ghee', 'Sweet', 'Tea Powder', 'Yougurt'},
 {'Cheese', 'Ghee', 'Panner', 'Sweet', 'Tea Powder'},
 {'Bread', 'Cheese', 'Ghee', 'Sweet', 'Tea Powder'},
 {'Cheese', 'Ghee', 'Lassi', 'Sweet', 'Tea Powder'},
 {'Cheese', 'Ghee', 'Lassi', 'Tea Powder'},
 {'Butter', 'Cheese', 'Ghee', 'Lassi', 'Tea Powder'},
 {'Cheese', 'Ghee', 'Lassi', 'Tea Powder', 'Yougurt'},
 {'Cheese', 'Ghee', 'Lassi', 'Milk', 'Tea Powder'},
 {'Cheese', 'Coffee Powder', 'Ghee', 'Lassi', 'Tea Powder'},
 {'Cheese', 'Ghee', 'Lassi', 'Sugar', 'Tea Powder'},
 {'Bread', 'Cheese', 'Ghee', 'Lassi', 'Tea Powder'},
 {'Cheese', 'Ghee', 'Lassi', 'Panner', 'Tea Powder'},
 {'Cheese', 'Ghe

 
## Experiment on the Dataset : Apply your associated rule mining algorithms to the dataset and show some interesting rules.

In [53]:
associations(df_example,freq_itemsets_example,50)

rule:('I1', 'I2')->{'I3'}->50.0
rule:('I1', 'I3')->{'I2'}->50.0
rule:('I2', 'I3')->{'I1'}->50.0
rule:('I1', 'I2')->{'I5'}->50.0
rule:('I1', 'I5')->{'I2'}->100.0
rule:('I2', 'I5')->{'I1'}->100.0
rule:('I5',)->{'I1', 'I2'}->100.0
rule:('I1',)->{'I2'}->66.66666666666666
rule:('I2',)->{'I1'}->57.14285714285714
rule:('I1',)->{'I3'}->66.66666666666666
rule:('I3',)->{'I1'}->66.66666666666666
rule:('I5',)->{'I1'}->100.0
rule:('I4',)->{'I2'}->100.0
rule:('I2',)->{'I3'}->57.14285714285714
rule:('I3',)->{'I2'}->66.66666666666666
rule:('I5',)->{'I2'}->100.0


In [54]:
associations(df_example,freq_itemsets_example,70)

rule:('I1', 'I5')->{'I2'}->100.0
rule:('I2', 'I5')->{'I1'}->100.0
rule:('I5',)->{'I1', 'I2'}->100.0
rule:('I5',)->{'I1'}->100.0
rule:('I4',)->{'I2'}->100.0
rule:('I5',)->{'I2'}->100.0


In [55]:
associations(df_example,freq_itemsets_example,60)

rule:('I1', 'I5')->{'I2'}->100.0
rule:('I2', 'I5')->{'I1'}->100.0
rule:('I5',)->{'I1', 'I2'}->100.0
rule:('I1',)->{'I2'}->66.66666666666666
rule:('I1',)->{'I3'}->66.66666666666666
rule:('I3',)->{'I1'}->66.66666666666666
rule:('I5',)->{'I1'}->100.0
rule:('I4',)->{'I2'}->100.0
rule:('I3',)->{'I2'}->66.66666666666666
rule:('I5',)->{'I2'}->100.0


##### Testing on dataset

In [123]:
associations(df,freq_itemsets2,50)

rule:('Lassi', 'Ghee', 'Cheese', 'Bread')->{'Coffee Powder'}->50.78534031413613
rule:('Lassi', 'Ghee', 'Cheese', 'Coffee Powder')->{'Bread'}->50.696864111498265
rule:('Lassi', 'Cheese', 'Bread', 'Coffee Powder')->{'Ghee'}->50.172413793103445
rule:('Lassi', 'Ghee', 'Butter', 'Milk')->{'Sugar'}->50.08695652173913
rule:('Lassi', 'Ghee', 'Butter', 'Yougurt')->{'Sugar'}->50.357142857142854
rule:('Ghee', 'Sugar', 'Panner', 'Milk')->{'Lassi'}->50.0
rule:('Lassi', 'Ghee', 'Sugar', 'Panner')->{'Bread'}->50.255536626916516
rule:('Lassi', 'Ghee', 'Sugar', 'Bread')->{'Panner'}->51.393728222996515
rule:('Lassi', 'Sugar', 'Panner', 'Bread')->{'Ghee'}->50.513698630136986
rule:('Ghee', 'Sugar', 'Panner', 'Bread')->{'Lassi'}->50.34129692832765
rule:('Ghee', 'Sugar', 'Milk', 'Bread')->{'Lassi'}->50.35460992907801
rule:('Lassi', 'Ghee', 'Sugar', 'Tea Powder')->{'Milk'}->51.056338028169016
rule:('Lassi', 'Ghee', 'Milk', 'Tea Powder')->{'Sugar'}->50.6993006993007
rule:('Ghee', 'Sugar', 'Milk', 'Tea Powder'

In [124]:
associations(df,freq_itemsets4,50)

In [125]:
associations(df,freq_itemsets5,45)

rule:('Lassi', 'Sweet')->{'Panner'}->49.107142857142854
rule:('Lassi', 'Panner')->{'Sweet'}->50.66079295154186
rule:('Sweet', 'Panner')->{'Lassi'}->50.49900199600798
rule:('Lassi',)->{'Ghee'}->46.226067746686304
rule:('Ghee',)->{'Lassi'}->45.57168784029038
rule:('Lassi',)->{'Cheese'}->45.72901325478645
rule:('Cheese',)->{'Lassi'}->45.36157779401023
rule:('Lassi',)->{'Sugar'}->46.078792341678934
rule:('Sugar',)->{'Lassi'}->45.658518788763224
rule:('Lassi',)->{'Sweet'}->47.42268041237113
rule:('Sweet',)->{'Lassi'}->46.98157942732081
rule:('Lassi',)->{'Butter'}->46.0419734904271
rule:('Butter',)->{'Lassi'}->45.63035942346288
rule:('Lassi',)->{'Panner'}->45.968335787923415
rule:('Panner',)->{'Lassi'}->45.867009551800145
rule:('Lassi',)->{'Milk'}->46.741531664212076
rule:('Milk',)->{'Lassi'}->45.946435034382915
rule:('Lassi',)->{'Bread'}->46.134020618556704
rule:('Bread',)->{'Lassi'}->45.69657184536835
rule:('Lassi',)->{'Coffee Powder'}->46.24447717231222
rule:('Coffee Powder',)->{'Lassi'}-

In [128]:
associations(df,freq_itemsets6,45)

rule:('Lassi',)->{'Ghee'}->46.226067746686304
rule:('Ghee',)->{'Lassi'}->45.57168784029038
rule:('Lassi',)->{'Cheese'}->45.72901325478645
rule:('Cheese',)->{'Lassi'}->45.36157779401023
rule:('Lassi',)->{'Sugar'}->46.078792341678934
rule:('Sugar',)->{'Lassi'}->45.658518788763224
rule:('Lassi',)->{'Sweet'}->47.42268041237113
rule:('Sweet',)->{'Lassi'}->46.98157942732081
rule:('Lassi',)->{'Butter'}->46.0419734904271
rule:('Butter',)->{'Lassi'}->45.63035942346288
rule:('Lassi',)->{'Panner'}->45.968335787923415
rule:('Panner',)->{'Lassi'}->45.867009551800145
rule:('Lassi',)->{'Milk'}->46.741531664212076
rule:('Milk',)->{'Lassi'}->45.946435034382915
rule:('Lassi',)->{'Bread'}->46.134020618556704
rule:('Bread',)->{'Lassi'}->45.69657184536835
rule:('Lassi',)->{'Coffee Powder'}->46.24447717231222
rule:('Coffee Powder',)->{'Lassi'}->45.59811218006898
rule:('Lassi',)->{'Yougurt'}->45.39764359351988
rule:('Cheese',)->{'Ghee'}->45.14243973703433
rule:('Ghee',)->{'Sugar'}->45.66243194192378
rule:('S

##### The modular approach of coding is done so the function can be reused for different algorithms. Sufficient comments are added to make the code readable


## Run-Time Performance

In [None]:
# Measuring run-time performance of generating frequent itemsets
#1. Brute force approach

In [132]:
start1 = time.time()
freq_itemsets1 = brute_force_frequent_itemset(5,df,200)
end1 = time.time()
print("The time taken by Brute force algorithm implemented: ")
print(end1-start1)

The time taken by Brute force algorithm implemented: 
16.434058904647827


In [None]:
#2. Apriori approach

In [133]:
start2 = time.time()
f_itemset_apriori = apriori(5,df,200)
end2 = time.time()
print("The time taken by Apriori algorithm implemented: ")
print(end2-start2)

The time taken by Apriori algorithm implemented: 
15.487321615219116


In [None]:
#3. FP Growth approach

In [134]:
start3 = time.time()
fptree, header_table = generate_FP_tree(create_frozen_set(input_list), 200)
all_frequent_itemsets_fp = []
mining(fptree, header_table, 200, set([]), all_frequent_itemsets_fp)
end3 = time.time()
print("The time taken by FP Growth algorithm implemented: ")
print(end3-start3)

The time taken by FP Growth algorithm implemented: 
0.6471080780029297


In [135]:
all_frequent_itemsets_fp

[{'Tea Powder'},
 {'Cheese', 'Tea Powder'},
 {'Cheese', 'Ghee', 'Tea Powder'},
 {'Cheese', 'Ghee', 'Sweet', 'Tea Powder'},
 {'Butter', 'Cheese', 'Ghee', 'Sweet', 'Tea Powder'},
 {'Cheese', 'Coffee Powder', 'Ghee', 'Sweet', 'Tea Powder'},
 {'Cheese', 'Ghee', 'Milk', 'Sweet', 'Tea Powder'},
 {'Cheese', 'Ghee', 'Sugar', 'Sweet', 'Tea Powder'},
 {'Cheese', 'Ghee', 'Sweet', 'Tea Powder', 'Yougurt'},
 {'Cheese', 'Ghee', 'Panner', 'Sweet', 'Tea Powder'},
 {'Bread', 'Cheese', 'Ghee', 'Sweet', 'Tea Powder'},
 {'Cheese', 'Ghee', 'Lassi', 'Sweet', 'Tea Powder'},
 {'Cheese', 'Ghee', 'Lassi', 'Tea Powder'},
 {'Butter', 'Cheese', 'Ghee', 'Lassi', 'Tea Powder'},
 {'Cheese', 'Ghee', 'Lassi', 'Tea Powder', 'Yougurt'},
 {'Cheese', 'Ghee', 'Lassi', 'Milk', 'Tea Powder'},
 {'Cheese', 'Coffee Powder', 'Ghee', 'Lassi', 'Tea Powder'},
 {'Cheese', 'Ghee', 'Lassi', 'Sugar', 'Tea Powder'},
 {'Bread', 'Cheese', 'Ghee', 'Lassi', 'Tea Powder'},
 {'Cheese', 'Ghee', 'Lassi', 'Panner', 'Tea Powder'},
 {'Cheese', 'Ghe

From the above analysis, the FP Growth runs faster compared to other two algorithms by recursively computing.
Brute Force works slower by making use of all combinations, Apriori works faster compared to Brute Force by 
making use of only prior information(Frequent sets) instead of all the combinations. It is generally slower
because of database scanning at each step of pruning