# Problem Statement

An organization wanted to mine association rules of frequently bought items from its stores and suggest some recommendations to its customers. 

As a data scientist, you are required to recognize patterns from the available data and evaluate efficacy of methods to obtain patterns. Your activities should include - performing various activities pertaining to the data such as, preparing the dataset for analysis; investigating the relationships in the data set with visualization; identify frequent patterns; formulate association rules and evaluate quality of rules.

### import required modules

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_excel("Dataset.xlsx")

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
2,chutney,,,,,,,,,,,,,,,,,,,
3,turkey,avocado,,,,,,,,,,,,,,,,,,
4,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,


In [4]:
df['Iteams'] = df[df.columns[0:]].apply(lambda x: ','.join(x.dropna().astype(str)), axis=1)

In [5]:
df.head()

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Iteams
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,...,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil,"shrimp,almonds,avocado,vegetables mix,green gr..."
1,burgers,meatballs,eggs,,,,,,,,...,,,,,,,,,,"burgers,meatballs,eggs"
2,chutney,,,,,,,,,,...,,,,,,,,,,chutney
3,turkey,avocado,,,,,,,,,...,,,,,,,,,,"turkey,avocado"
4,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,...,,,,,,,,,,"mineral water,milk,energy bar,whole wheat rice..."


In [6]:
df.drop(['Unnamed: 0','Unnamed: 1','Unnamed: 2','Unnamed: 3','Unnamed: 4','Unnamed: 5','Unnamed: 6','Unnamed: 7','Unnamed: 8','Unnamed: 9','Unnamed: 10','Unnamed: 11','Unnamed: 12','Unnamed: 13','Unnamed: 14','Unnamed: 15', 'Unnamed: 16','Unnamed: 17','Unnamed: 18','Unnamed: 19'],axis=1, inplace=True)

In [7]:
df.head()

Unnamed: 0,Iteams
0,"shrimp,almonds,avocado,vegetables mix,green gr..."
1,"burgers,meatballs,eggs"
2,chutney
3,"turkey,avocado"
4,"mineral water,milk,energy bar,whole wheat rice..."


In [8]:
#get keys and counting values
items_count = {}
def count_data(x):
    data = x.split(",")
    for item in data:
        if item in items_count:
            items_count[item] = items_count[item]+1
        else:
            items_count[item] = 1

df['Iteams'].apply(count_data)
print("items_count")
print(items_count)

items_count
{'shrimp': 536, 'almonds': 153, 'avocado': 250, 'vegetables mix': 193, 'green grapes': 68, 'whole weat flour': 70, 'yams': 86, 'cottage cheese': 239, 'energy drink': 200, 'tomato juice': 228, 'low fat yogurt': 574, 'green tea': 991, 'honey': 356, 'salad': 37, 'mineral water': 1788, 'salmon': 319, 'antioxydant juice': 67, 'frozen smoothie': 475, 'spinach': 53, 'olive oil': 494, 'burgers': 654, 'meatballs': 157, 'eggs': 1348, 'chutney': 31, 'turkey': 469, 'milk': 972, 'energy bar': 203, 'whole wheat rice': 439, 'whole wheat pasta': 221, 'french fries': 1282, 'soup': 379, 'light cream': 117, 'shallot': 58, 'frozen vegetables': 715, 'spaghetti': 1306, 'pet food': 49, 'cookies': 603, 'cooking oil': 383, 'champagne': 351, 'chocolate': 1230, 'chicken': 450, 'oil': 173, 'fresh tuna': 167, 'tomatoes': 513, 'black tea': 107, 'extra dark chocolate': 90, 'protein bar': 139, 'red wine': 211, 'pasta': 118, 'pepper': 199, 'shampoo': 37, 'rice': 141, 'sparkling water': 47, 'ham': 203, 'bod

In [9]:
#create dictionay with keys
dictionary = {}
count = 1
for item in items_count:
    dictionary[item] = count
    count = count+1
print(dictionary)

{'shrimp': 1, 'almonds': 2, 'avocado': 3, 'vegetables mix': 4, 'green grapes': 5, 'whole weat flour': 6, 'yams': 7, 'cottage cheese': 8, 'energy drink': 9, 'tomato juice': 10, 'low fat yogurt': 11, 'green tea': 12, 'honey': 13, 'salad': 14, 'mineral water': 15, 'salmon': 16, 'antioxydant juice': 17, 'frozen smoothie': 18, 'spinach': 19, 'olive oil': 20, 'burgers': 21, 'meatballs': 22, 'eggs': 23, 'chutney': 24, 'turkey': 25, 'milk': 26, 'energy bar': 27, 'whole wheat rice': 28, 'whole wheat pasta': 29, 'french fries': 30, 'soup': 31, 'light cream': 32, 'shallot': 33, 'frozen vegetables': 34, 'spaghetti': 35, 'pet food': 36, 'cookies': 37, 'cooking oil': 38, 'champagne': 39, 'chocolate': 40, 'chicken': 41, 'oil': 42, 'fresh tuna': 43, 'tomatoes': 44, 'black tea': 45, 'extra dark chocolate': 46, 'protein bar': 47, 'red wine': 48, 'pasta': 49, 'pepper': 50, 'shampoo': 51, 'rice': 52, 'sparkling water': 53, 'ham': 54, 'body spray': 55, 'pancakes': 56, 'grated cheese': 57, 'white wine': 58,

In [10]:
comma_splitted_df = df.Iteams.apply(lambda x: x.split(','))

In [11]:
comma_splitted_df

0       [shrimp, almonds, avocado, vegetables mix, gre...
1                              [burgers, meatballs, eggs]
2                                               [chutney]
3                                       [turkey, avocado]
4       [mineral water, milk, energy bar, whole wheat ...
                              ...                        
7496                    [butter, light mayo, fresh bread]
7497    [burgers, frozen vegetables, eggs, french frie...
7498                                            [chicken]
7499                                [escalope, green tea]
7500    [eggs, frozen smoothie, yogurt cake, low fat y...
Name: Iteams, Length: 7501, dtype: object

In [12]:
numbered_col = []
for i in range(len(comma_splitted_df)):
    list_numbered = list(map(lambda x: dictionary[x], comma_splitted_df[i]))
    sort_numbered = sorted(list_numbered)
    numbered_col.append(sort_numbered)
numbered_col

[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
 [21, 22, 23],
 [24],
 [3, 25],
 [12, 15, 26, 27, 28],
 [11],
 [29, 30],
 [31, 32, 33],
 [12, 34, 35],
 [30],
 [23, 36],
 [37],
 [15, 21, 23, 25, 38],
 [35, 37, 39],
 [15, 16],
 [15],
 [1, 11, 13, 38, 40, 41, 42],
 [23, 25],
 [15, 16, 23, 25, 35, 41, 43, 44, 45, 46],
 [13, 22, 26, 30, 47],
 [1, 23, 40, 48, 49, 50, 51],
 [52, 53],
 [12, 15, 35, 54, 55, 56],
 [1, 3, 13, 21, 49, 57, 58, 59],
 [23],
 [3, 26, 31, 35, 60, 61],
 [15, 16, 18, 26, 27, 35, 45, 62, 63],
 [53],
 [15, 23, 30, 40, 41],
 [7, 15, 34, 35],
 [32, 64, 65, 66],
 [3, 15, 23, 40],
 [25, 30, 67],
 [34, 40, 68],
 [37],
 [16, 35, 63, 69],
 [27, 30],
 [15, 48, 62],
 [8, 15, 70],
 [12, 39, 69],
 [35],
 [13, 15, 18, 28, 34, 35, 43, 63],
 [35],
 [22, 31, 53, 71],
 [63],
 [3, 12, 30, 31, 55, 56, 71, 72],
 [15, 41, 73, 74],
 [15, 75],
 [3, 76],
 [12, 21, 45],
 [12, 35, 40, 58, 72],
 [15, 23, 43],
 [35, 76],
 [35, 40],
 [30, 39, 63],
 [65, 77],
 [15, 20, 25, 26

### create dataframe from mumbered items:

In [13]:
dict_data = {'items': numbered_col}
df = pd.DataFrame(dict_data)

In [14]:
df.head()

Unnamed: 0,items
0,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14..."
1,"[21, 22, 23]"
2,[24]
3,"[3, 25]"
4,"[12, 15, 26, 27, 28]"


In [15]:
items =[]
for i in range(len(df)):
    for j in range(len(df['items'][i])):
        items.append(df['items'][i][j])
items

[1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 3,
 25,
 12,
 15,
 26,
 27,
 28,
 11,
 29,
 30,
 31,
 32,
 33,
 12,
 34,
 35,
 30,
 23,
 36,
 37,
 15,
 21,
 23,
 25,
 38,
 35,
 37,
 39,
 15,
 16,
 15,
 1,
 11,
 13,
 38,
 40,
 41,
 42,
 23,
 25,
 15,
 16,
 23,
 25,
 35,
 41,
 43,
 44,
 45,
 46,
 13,
 22,
 26,
 30,
 47,
 1,
 23,
 40,
 48,
 49,
 50,
 51,
 52,
 53,
 12,
 15,
 35,
 54,
 55,
 56,
 1,
 3,
 13,
 21,
 49,
 57,
 58,
 59,
 23,
 3,
 26,
 31,
 35,
 60,
 61,
 15,
 16,
 18,
 26,
 27,
 35,
 45,
 62,
 63,
 53,
 15,
 23,
 30,
 40,
 41,
 7,
 15,
 34,
 35,
 32,
 64,
 65,
 66,
 3,
 15,
 23,
 40,
 25,
 30,
 67,
 34,
 40,
 68,
 37,
 16,
 35,
 63,
 69,
 27,
 30,
 15,
 48,
 62,
 8,
 15,
 70,
 12,
 39,
 69,
 35,
 13,
 15,
 18,
 28,
 34,
 35,
 43,
 63,
 35,
 22,
 31,
 53,
 71,
 63,
 3,
 12,
 30,
 31,
 55,
 56,
 71,
 72,
 15,
 41,
 73,
 74,
 15,
 75,
 3,
 76,
 12,
 21,
 45,
 12,
 35,
 40,
 58,
 72,
 15,
 23,
 43,
 35,
 76,
 35,
 40,


### Creating First Candidate(C1):

In [16]:
#Get unique element from list/array
unique_items = set(items)

#convert to list
unique_list = list(unique_items)

count_unique =[]
for value in unique_list:
    count_unique.append((value, items.count(value)))
count_unique

[(1, 536),
 (2, 153),
 (3, 250),
 (4, 193),
 (5, 68),
 (6, 70),
 (7, 86),
 (8, 239),
 (9, 200),
 (10, 228),
 (11, 574),
 (12, 991),
 (13, 356),
 (14, 37),
 (15, 1788),
 (16, 319),
 (17, 67),
 (18, 475),
 (19, 53),
 (20, 494),
 (21, 654),
 (22, 157),
 (23, 1348),
 (24, 31),
 (25, 469),
 (26, 972),
 (27, 203),
 (28, 439),
 (29, 221),
 (30, 1282),
 (31, 379),
 (32, 117),
 (33, 58),
 (34, 715),
 (35, 1306),
 (36, 49),
 (37, 603),
 (38, 383),
 (39, 351),
 (40, 1230),
 (41, 450),
 (42, 173),
 (43, 167),
 (44, 513),
 (45, 107),
 (46, 90),
 (47, 139),
 (48, 211),
 (49, 118),
 (50, 199),
 (51, 37),
 (52, 141),
 (53, 47),
 (54, 203),
 (55, 86),
 (56, 713),
 (57, 393),
 (58, 124),
 (59, 61),
 (60, 149),
 (61, 323),
 (62, 737),
 (63, 595),
 (64, 371),
 (65, 106),
 (66, 82),
 (67, 160),
 (68, 58),
 (69, 45),
 (70, 608),
 (71, 243),
 (72, 253),
 (73, 193),
 (74, 63),
 (75, 65),
 (76, 181),
 (77, 204),
 (78, 101),
 (79, 47),
 (80, 79),
 (81, 36),
 (82, 205),
 (83, 131),
 (84, 226),
 (85, 36),
 (86, 1

In [17]:
# creating data frame
condidate1_df = pd.DataFrame(count_unique, columns=['itemset', 'sup'])

In [18]:
condidate1_df.head()

Unnamed: 0,itemset,sup
0,1,536
1,2,153
2,3,250
3,4,193
4,5,68


### Creating First Frequest itemset(L1):

In [19]:
def filter_sup(candidate):
    minimum_sup =2
    filtering = candidate['sup'] > minimum_sup
    freq = candidate[filtering]
    return freq

#filter
freq_itemset1 = filter_sup(condidate1_df)
freq_itemset1.head()

Unnamed: 0,itemset,sup
0,1,536
1,2,153
2,3,250
3,4,193
4,5,68


In [20]:
freq_itemset1.shape

(119, 2)

### Creating Second Candidate(C2):

In [40]:
def self_join(prev_freq_itemset):
    self_join_candidate = []
    #print("-------",len(prev_freq_itemset['itemset']))
    for i in range(len(prev_freq_itemset['itemset'])):
        for j in range((i+1), len(prev_freq_itemset['itemset'])):
            itemset_i = prev_freq_itemset['itemset'][i]
            #print("------", j, i)
            itemset_j = prev_freq_itemset['itemset'][j]
            if(type(itemset_i) == np.int64 and type(itemset_j) == np.int64):
                itemset_i = {itemset_i}
                itemset_j = {itemset_j}
            union_candidate = itemset_i.union(itemset_j)

            if union_candidate not in self_join_candidate:
                self_join_candidate.append(union_candidate)
    return self_join_candidate

In [22]:
candidate2_list = self_join(freq_itemset1)

In [23]:
candidate2_list

[{1, 2},
 {1, 3},
 {1, 4},
 {1, 5},
 {1, 6},
 {1, 7},
 {1, 8},
 {1, 9},
 {1, 10},
 {1, 11},
 {1, 12},
 {1, 13},
 {1, 14},
 {1, 15},
 {1, 16},
 {1, 17},
 {1, 18},
 {1, 19},
 {1, 20},
 {1, 21},
 {1, 22},
 {1, 23},
 {1, 24},
 {1, 25},
 {1, 26},
 {1, 27},
 {1, 28},
 {1, 29},
 {1, 30},
 {1, 31},
 {1, 32},
 {1, 33},
 {1, 34},
 {1, 35},
 {1, 36},
 {1, 37},
 {1, 38},
 {1, 39},
 {1, 40},
 {1, 41},
 {1, 42},
 {1, 43},
 {1, 44},
 {1, 45},
 {1, 46},
 {1, 47},
 {1, 48},
 {1, 49},
 {1, 50},
 {1, 51},
 {1, 52},
 {1, 53},
 {1, 54},
 {1, 55},
 {1, 56},
 {1, 57},
 {1, 58},
 {1, 59},
 {1, 60},
 {1, 61},
 {1, 62},
 {1, 63},
 {1, 64},
 {1, 65},
 {1, 66},
 {1, 67},
 {1, 68},
 {1, 69},
 {1, 70},
 {1, 71},
 {1, 72},
 {1, 73},
 {1, 74},
 {1, 75},
 {1, 76},
 {1, 77},
 {1, 78},
 {1, 79},
 {1, 80},
 {1, 81},
 {1, 82},
 {1, 83},
 {1, 84},
 {1, 85},
 {1, 86},
 {1, 87},
 {1, 88},
 {1, 89},
 {1, 90},
 {1, 91},
 {1, 92},
 {1, 93},
 {1, 94},
 {1, 95},
 {1, 96},
 {1, 97},
 {1, 98},
 {1, 99},
 {1, 100},
 {1, 101},
 {1, 1

In [24]:
count_candidates=[]
#set the initial value of secound_count candidate(c2)
for i in range(len(candidate2_list)):
    count_candidates.append((candidate2_list[i],0))
count_candidates

[({1, 2}, 0),
 ({1, 3}, 0),
 ({1, 4}, 0),
 ({1, 5}, 0),
 ({1, 6}, 0),
 ({1, 7}, 0),
 ({1, 8}, 0),
 ({1, 9}, 0),
 ({1, 10}, 0),
 ({1, 11}, 0),
 ({1, 12}, 0),
 ({1, 13}, 0),
 ({1, 14}, 0),
 ({1, 15}, 0),
 ({1, 16}, 0),
 ({1, 17}, 0),
 ({1, 18}, 0),
 ({1, 19}, 0),
 ({1, 20}, 0),
 ({1, 21}, 0),
 ({1, 22}, 0),
 ({1, 23}, 0),
 ({1, 24}, 0),
 ({1, 25}, 0),
 ({1, 26}, 0),
 ({1, 27}, 0),
 ({1, 28}, 0),
 ({1, 29}, 0),
 ({1, 30}, 0),
 ({1, 31}, 0),
 ({1, 32}, 0),
 ({1, 33}, 0),
 ({1, 34}, 0),
 ({1, 35}, 0),
 ({1, 36}, 0),
 ({1, 37}, 0),
 ({1, 38}, 0),
 ({1, 39}, 0),
 ({1, 40}, 0),
 ({1, 41}, 0),
 ({1, 42}, 0),
 ({1, 43}, 0),
 ({1, 44}, 0),
 ({1, 45}, 0),
 ({1, 46}, 0),
 ({1, 47}, 0),
 ({1, 48}, 0),
 ({1, 49}, 0),
 ({1, 50}, 0),
 ({1, 51}, 0),
 ({1, 52}, 0),
 ({1, 53}, 0),
 ({1, 54}, 0),
 ({1, 55}, 0),
 ({1, 56}, 0),
 ({1, 57}, 0),
 ({1, 58}, 0),
 ({1, 59}, 0),
 ({1, 60}, 0),
 ({1, 61}, 0),
 ({1, 62}, 0),
 ({1, 63}, 0),
 ({1, 64}, 0),
 ({1, 65}, 0),
 ({1, 66}, 0),
 ({1, 67}, 0),
 ({1, 68}, 0),
 ({

In [25]:
initial_df_candidate = pd.DataFrame(count_candidates, columns=['itemset','sup'])
initial_df_candidate.head()

Unnamed: 0,itemset,sup
0,"{1, 2}",0
1,"{1, 3}",0
2,"{1, 4}",0
3,"{1, 5}",0
4,"{1, 6}",0


In [26]:
#Let's add it with 1 whenever we found every candidate is a subset from Database D
def count_support(database_dataframe, prev_candidate_list):
    initial_df_candidate['sup'] = 0 #set All value into 0 only for initial value for consistency value when running this cell everytime.
    count_prev_candidate = []

    #Set the Initial value of Previous Candidate
    for i in range(len(prev_candidate_list)):
        count_prev_candidate.append((prev_candidate_list[i], 0))
    
    df_candidate = pd.DataFrame(count_prev_candidate, columns=['itemset', 'sup'])
    print('Database D dataframe\n', database_dataframe)
    print('(Initial) Dataframe from Candidate with All zeros sup\n', df_candidate)
    
    for i in range(len(database_dataframe)):
        for j in range(len(count_prev_candidate)):
            #using issubset() function to check whether every itemset is a subset of Database or not
            if (df_candidate['itemset'][j]).issubset(set(database_dataframe['items'][i])): 
                df_candidate.loc[j, 'sup'] += 1
            
    return df_candidate

In [27]:
count_candidate2_df = count_support(df, candidate2_list)

Database D dataframe
                                                   items
0     [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...
1                                          [21, 22, 23]
2                                                  [24]
3                                               [3, 25]
4                                  [12, 15, 26, 27, 28]
...                                                 ...
7496                                       [61, 77, 84]
7497                           [12, 21, 23, 30, 34, 66]
7498                                               [41]
7499                                           [12, 63]
7500                                   [11, 18, 23, 82]

[7501 rows x 1 columns]
(Initial) Dataframe from Candidate with All zeros sup
          itemset  sup
0         {1, 2}    0
1         {1, 3}    0
2         {1, 4}    0
3         {1, 5}    0
4         {1, 6}    0
...          ...  ...
7016  {116, 118}    0
7017  {116, 119}    0
7018  {117, 118}    0
7019  

In [28]:
count_candidate2_df

Unnamed: 0,itemset,sup
0,"{1, 2}",17
1,"{1, 3}",23
2,"{1, 4}",20
3,"{1, 5}",8
4,"{1, 6}",7
...,...,...
7016,"{116, 118}",0
7017,"{116, 119}",0
7018,"{117, 118}",0
7019,"{117, 119}",1


### Creating Second Frequest itemset(L2):

In [29]:
#Filter the itemset based on minimum support (occurences of items)
freq_itemset2 = filter_sup(count_candidate2_df)
freq_itemset2

Unnamed: 0,itemset,sup
0,"{1, 2}",17
1,"{1, 3}",23
2,"{1, 4}",20
3,"{1, 5}",8
4,"{1, 6}",7
...,...,...
6869,"{104, 102}",3
6881,"{116, 102}",5
6885,"{104, 103}",3
6895,"{114, 103}",3


In [38]:
freq_itemset2_reset = freq_itemset2.reset_index(drop=True)
freq_itemset2_reset.head()

Unnamed: 0,itemset,sup
0,"{1, 2}",17
1,"{1, 3}",23
2,"{1, 4}",20
3,"{1, 5}",8
4,"{1, 6}",7


## Creating the Third Candidate (C3) - Using the Candidate Forming Technique
* Self join
* Pruning

## Self Join

In [None]:
pd.set_option("display.max_rows",50)
print(freq_itemset2_reset)
self_join_result = self_join(freq_itemset2_reset)
print('self join result')
print(self_join_result)

         itemset  sup
0         {1, 2}   17
1         {1, 3}   23
2         {1, 4}   20
3         {1, 5}    8
4         {1, 6}    7
...          ...  ...
3803  {104, 102}    3
3804  {116, 102}    5
3805  {104, 103}    3
3806  {114, 103}    3
3807  {104, 114}    4

[3808 rows x 2 columns]


## Pruning

In [None]:
def get_subset(candidate):
    temp = []
    final = []
    for i in range(len(candidate)):
        for j in range(len(candidate)):
            if i != j:
                temp.append(candidate[j])
        temp_set = set(temp)
        final.append(temp_set)
        temp.clear()
    print('Subset from {} : {}'.format(candidate, final))
    return final

def pruning(candidate_set, prev_freq_itemset):
    print('Candidate set', candidate_set)
    temp = []
    
    for idx, value in enumerate(candidate_set):
        list_candidate = list(value)
        temp_candidate = (get_subset(list_candidate))
        
        for temp_item in temp_candidate:
            print('Temp item', temp_item)
            check = temp_item == prev_freq_itemset['itemset']
            print('\nCheck candidate from Previous Frequent Itemset\n', check)
            
            if any(check) == False:
                print(any(check))
                print('Val', value)
            else:
                print('\nAll of {} subset contained in \n{}'.format(candidate_set, prev_freq_itemset))
                if value not in temp:
                    temp.append(value)
                
    return temp

In [None]:
freq_itemset2_reset

In [None]:
self_join_result

In [None]:
for i in range(len(self_join_result)):
    get_subset(list(self_join_result[i]))

In [None]:
freq_itemset2_reset

In [None]:
for item in subset:
    print(item)
    check = item == freq_itemset2_reset['itemset']
    print('Check', any(check))

In [None]:
self_join_result

In [None]:
candidate3_list = pruning(self_join_result, freq_itemset2_reset)

In [None]:
candidate3_list

## Creating the Third Frequent Itemset (L3)

In [None]:
#Then check the newest candidate value
candidate3_list

In [None]:
count_candidate3_df = count_support(df, candidate3_list)

In [None]:
count_candidate3_df

In [None]:
freq_itemset3 = filter_sup(count_candidate3_df)

In [None]:
freq_itemset3

## All Frequent Itemset

In [None]:
#Let'see each frequent itemset (L)
freq_itemset1

In [None]:
freq_itemset2

In [None]:
freq_itemset3

In [None]:
frequent_itemset = pd.concat([freq_itemset1, freq_itemset2, freq_itemset3], axis=0)

In [None]:
frequent_itemset

In [None]:
#Reset the index
frequent_itemset_final = frequent_itemset.reset_index(drop=True)

## Final Output of Freq. Itemset (L1-L3)

In [None]:
frequent_itemset_final