In [253]:
# Import libraries
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules


In [254]:
# Read groceries.csv file
df= pd.read_csv("groceries.csv")


In [255]:
# Head of the dataframe
df.head()

Unnamed: 0,Item(s),Item 1,Item 2,Item 3,Item 4,Item 5,Item 6,Item 7,Item 8,Item 9,...,Item 23,Item 24,Item 25,Item 26,Item 27,Item 28,Item 29,Item 30,Item 31,Item 32
0,4,citrus fruit,semi-finished bread,margarine,ready soups,,,,,,...,,,,,,,,,,
1,3,tropical fruit,yogurt,coffee,,,,,,,...,,,,,,,,,,
2,1,whole milk,,,,,,,,,...,,,,,,,,,,
3,4,pip fruit,yogurt,cream cheese,meat spreads,,,,,,...,,,,,,,,,,
4,4,other vegetables,whole milk,condensed milk,long life bakery product,,,,,,...,,,,,,,,,,


In [256]:
# List the columns in dataframe
df.columns

Index(['Item(s)', 'Item 1', 'Item 2', 'Item 3', 'Item 4', 'Item 5', 'Item 6',
       'Item 7', 'Item 8', 'Item 9', 'Item 10', 'Item 11', 'Item 12',
       'Item 13', 'Item 14', 'Item 15', 'Item 16', 'Item 17', 'Item 18',
       'Item 19', 'Item 20', 'Item 21', 'Item 22', 'Item 23', 'Item 24',
       'Item 25', 'Item 26', 'Item 27', 'Item 28', 'Item 29', 'Item 30',
       'Item 31', 'Item 32'],
      dtype='object')

In [257]:
# Number of rows in dataframe
df.shape[0]

9835

### Data Cleaning
The dataframe contains a lot of NaN values. However its difficult to work with them. 
Hence its important to remove them 

In [258]:
df= df.fillna(0)

We are considering only a subset of the entire dataset as in the later stage, the number of two frequent itemset or three frequent dataset will be huge and will take a lot of time to compute the result. 
For demonstration purpose considering say 300 rows

In [259]:
df=df[0:300]

In [260]:
df.head()

Unnamed: 0,Item(s),Item 1,Item 2,Item 3,Item 4,Item 5,Item 6,Item 7,Item 8,Item 9,...,Item 23,Item 24,Item 25,Item 26,Item 27,Item 28,Item 29,Item 30,Item 31,Item 32
0,4,citrus fruit,semi-finished bread,margarine,ready soups,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,3,tropical fruit,yogurt,coffee,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,whole milk,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,pip fruit,yogurt,cream cheese,meat spreads,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,other vegetables,whole milk,condensed milk,long life bakery product,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Introduction
Now lets find the co orccurance pattern of the dataframe
We use association rule mining for this. 
Data matrices used for it are Support, Confidence and Lift
1. Support is the frequency of items brought. Support =  freq(A,B)/N
2. Confidence: how often items occur together. Confidence =  freq(A,B)/freq(A)
3. Lift is strength of the rule. Lift= support/(supp(A) * supp(B)
if denominator is more then randomness is more rather than occurance of association

Now, for Apriori algorithm we use the items that are most used to find the association rules. Important thing to note here is that subset of the most used itemset should also be a frequent itemset. 


### Single Itemset

In [263]:
# Now lets count the number of unique single itemsets
#Approach: For each item we are creating a dictionary to get their count
Itemset_single= list()
Itemset_single_count={}
i=0
while(i<df.shape[0]):
    j=0
    while(j<df.shape[1]):
        if df.iloc[i][j] != 0 and df.iloc[i][j] not in Itemset_single:
            Itemset_single.append(df.iloc[i][j])
            Itemset_single_count[df.iloc[i][j]]=0
        j = j +1
    i = i + 1

In [262]:
print(len(Itemset_single))
print(Itemset_single_count)

152
{4: 0, 'citrus fruit': 0, 'semi-finished bread': 0, 'margarine': 0, 'ready soups': 0, 3: 0, 'tropical fruit': 0, 'yogurt': 0, 'coffee': 0, 1: 0, 'whole milk': 0, 'pip fruit': 0, 'cream cheese': 0, 'meat spreads': 0, 'other vegetables': 0, 'condensed milk': 0, 'long life bakery product': 0, 5: 0, 'butter': 0, 'rice': 0, 'abrasive cleaner': 0, 'rolls/buns': 0, 'UHT-milk': 0, 'bottled beer': 0, 'liquor (appetizer)': 0, 'potted plants': 0, 2: 0, 'cereals': 0, 'white bread': 0, 'bottled water': 0, 'chocolate': 0, 9: 0, 'curd': 0, 'flour': 0, 'dishes': 0, 'beef': 0, 'frankfurter': 0, 'soda': 0, 'chicken': 0, 'sugar': 0, 'fruit/vegetable juice': 0, 'newspapers': 0, 'packaged fruit/vegetables': 0, 'specialty bar': 0, 'butter milk': 0, 'pastry': 0, 'processed cheese': 0, 'detergent': 0, 11: 0, 'root vegetables': 0, 'frozen dessert': 0, 'sweet spreads': 0, 'salty snack': 0, 'waffles': 0, 'candy': 0, 'bathroom cleaner': 0, 'canned beer': 0, 'sausage': 0, 6: 0, 'brown bread': 0, 'shopping bags

In [266]:
# Counting the itemset
i=0
while(i<df.shape[0]):
    j=0
    while(j<df.shape[1]):
        if df.iloc[i][j] != 0 :
            Itemset_single_count[df.iloc[i][j]]=Itemset_single_count[df.iloc[i][j]]+1
        j=j +1
    i = i +1

In [267]:
print(Itemset_single_count)
#After updating the count, we can see that count for each item set

{4: 68, 'citrus fruit': 64, 'semi-finished bread': 12, 'margarine': 36, 'ready soups': 2, 3: 74, 'tropical fruit': 58, 'yogurt': 72, 'coffee': 30, 1: 150, 'whole milk': 166, 'pip fruit': 26, 'cream cheese': 26, 'meat spreads': 6, 'other vegetables': 120, 'condensed milk': 6, 'long life bakery product': 12, 5: 54, 'butter': 28, 'rice': 6, 'abrasive cleaner': 6, 'rolls/buns': 136, 'UHT-milk': 12, 'bottled beer': 32, 'liquor (appetizer)': 10, 'potted plants': 6, 2: 98, 'cereals': 10, 'white bread': 10, 'bottled water': 74, 'chocolate': 22, 9: 18, 'curd': 44, 'flour': 14, 'dishes': 14, 'beef': 30, 'frankfurter': 44, 'soda': 94, 'chicken': 10, 'sugar': 36, 'fruit/vegetable juice': 42, 'newspapers': 44, 'packaged fruit/vegetables': 14, 'specialty bar': 18, 'butter milk': 20, 'pastry': 40, 'processed cheese': 8, 'detergent': 22, 11: 12, 'root vegetables': 62, 'frozen dessert': 6, 'sweet spreads': 2, 'salty snack': 14, 'waffles': 22, 'candy': 12, 'bathroom cleaner': 6, 'canned beer': 48, 'saus

### Frequent Item set
Frequent dataset is the one whose value is greater than a threshold value. 
For our dataset, we are manually setting a threshold, which is 5.

In [268]:
Thresold_value= 5
Itemset_single_count1=Itemset_single_count
print("Before with no thresold:",len(Itemset_single_count1))
for i in Itemset_single_count1.keys():
    if Itemset_single_count1[i]< Thresold_value:
        Itemset_single_count1[i]=-1
Itemset_single_count_updated={}
for i in Itemset_single_count1.keys():
    if Itemset_single_count1[i]!=-1:# Thresold_value:
        Itemset_single_count_updated[i]=Itemset_single_count1[i]
print("After with thresold 5:",len(Itemset_single_count_updated))

Before with no thresold: 152
After with thresold 5: 103


In [269]:
Itemset_single_df= pd.DataFrame()
Itemset_single_df['Itemset']=Itemset_single_count_updated.keys()
Itemset_single_df['Count']=Itemset_single_count_updated.values()

### Final single itemset

In [270]:
Itemset_single_df

Unnamed: 0,Itemset,Count
0,4,68
1,citrus fruit,64
2,semi-finished bread,12
3,margarine,36
4,3,74
...,...,...
98,popcorn,6
99,cake bar,8
100,12,6
101,meat,10


### Generating Two frequent itemset

In [271]:
import itertools
Itemset_double_df=pd.DataFrame(list(itertools.combinations(Itemset_single_df['Itemset'], 2)))

In [272]:
Itemset_double_df=Itemset_double_df.rename(columns={0:'Itemset1',1:'Itemset2'})

In [273]:
Itemset_double_df.shape

(5253, 2)

In [274]:
Itemset_double_df

Unnamed: 0,Itemset1,Itemset2
0,4,citrus fruit
1,4,semi-finished bread
2,4,margarine
3,4,3
4,4,tropical fruit
...,...,...
5248,cake bar,meat
5249,cake bar,tea
5250,12,meat
5251,12,tea


In [275]:
# Now lets count the number of unique two frequent itemsets
#Approach: For each item we are creating a dictionary to get their count
Itemset_double_count={}
i=0
while(i<len(Itemset_double_df)):
    Itemset_double_count[i]=0
    i=i+1

In [276]:
Itemset_double_count

{0: 0,
 1: 0,
 2: 0,
 3: 0,
 4: 0,
 5: 0,
 6: 0,
 7: 0,
 8: 0,
 9: 0,
 10: 0,
 11: 0,
 12: 0,
 13: 0,
 14: 0,
 15: 0,
 16: 0,
 17: 0,
 18: 0,
 19: 0,
 20: 0,
 21: 0,
 22: 0,
 23: 0,
 24: 0,
 25: 0,
 26: 0,
 27: 0,
 28: 0,
 29: 0,
 30: 0,
 31: 0,
 32: 0,
 33: 0,
 34: 0,
 35: 0,
 36: 0,
 37: 0,
 38: 0,
 39: 0,
 40: 0,
 41: 0,
 42: 0,
 43: 0,
 44: 0,
 45: 0,
 46: 0,
 47: 0,
 48: 0,
 49: 0,
 50: 0,
 51: 0,
 52: 0,
 53: 0,
 54: 0,
 55: 0,
 56: 0,
 57: 0,
 58: 0,
 59: 0,
 60: 0,
 61: 0,
 62: 0,
 63: 0,
 64: 0,
 65: 0,
 66: 0,
 67: 0,
 68: 0,
 69: 0,
 70: 0,
 71: 0,
 72: 0,
 73: 0,
 74: 0,
 75: 0,
 76: 0,
 77: 0,
 78: 0,
 79: 0,
 80: 0,
 81: 0,
 82: 0,
 83: 0,
 84: 0,
 85: 0,
 86: 0,
 87: 0,
 88: 0,
 89: 0,
 90: 0,
 91: 0,
 92: 0,
 93: 0,
 94: 0,
 95: 0,
 96: 0,
 97: 0,
 98: 0,
 99: 0,
 100: 0,
 101: 0,
 102: 0,
 103: 0,
 104: 0,
 105: 0,
 106: 0,
 107: 0,
 108: 0,
 109: 0,
 110: 0,
 111: 0,
 112: 0,
 113: 0,
 114: 0,
 115: 0,
 116: 0,
 117: 0,
 118: 0,
 119: 0,
 120: 0,
 121: 0,
 122: 0,
 12

In [277]:
# Counting the itemset
for i in Itemset_double_count.keys():
    j=0
    while(j<len(df)):
            if set(Itemset_double_df.loc[i]).issubset(set(df.loc[j])):
                Itemset_double_count[i]= Itemset_double_count[i]+1
            j=j+1


In [278]:
Itemset_double_count
#After updating the count, we can see the actual count

{0: 5,
 1: 3,
 2: 2,
 3: 0,
 4: 0,
 5: 6,
 6: 1,
 7: 0,
 8: 10,
 9: 2,
 10: 4,
 11: 1,
 12: 7,
 13: 2,
 14: 2,
 15: 0,
 16: 3,
 17: 0,
 18: 0,
 19: 7,
 20: 2,
 21: 4,
 22: 0,
 23: 0,
 24: 0,
 25: 1,
 26: 0,
 27: 4,
 28: 2,
 29: 0,
 30: 2,
 31: 0,
 32: 0,
 33: 1,
 34: 1,
 35: 6,
 36: 0,
 37: 4,
 38: 1,
 39: 4,
 40: 2,
 41: 1,
 42: 1,
 43: 2,
 44: 1,
 45: 1,
 46: 0,
 47: 3,
 48: 0,
 49: 0,
 50: 1,
 51: 0,
 52: 0,
 53: 1,
 54: 5,
 55: 0,
 56: 0,
 57: 2,
 58: 2,
 59: 0,
 60: 0,
 61: 0,
 62: 1,
 63: 1,
 64: 0,
 65: 0,
 66: 1,
 67: 1,
 68: 1,
 69: 3,
 70: 0,
 71: 0,
 72: 0,
 73: 0,
 74: 2,
 75: 1,
 76: 1,
 77: 0,
 78: 0,
 79: 1,
 80: 0,
 81: 0,
 82: 2,
 83: 0,
 84: 0,
 85: 0,
 86: 0,
 87: 0,
 88: 0,
 89: 1,
 90: 0,
 91: 1,
 92: 0,
 93: 0,
 94: 0,
 95: 0,
 96: 0,
 97: 0,
 98: 1,
 99: 0,
 100: 2,
 101: 0,
 102: 1,
 103: 5,
 104: 3,
 105: 7,
 106: 6,
 107: 1,
 108: 0,
 109: 12,
 110: 2,
 111: 5,
 112: 0,
 113: 8,
 114: 0,
 115: 0,
 116: 3,
 117: 3,
 118: 2,
 119: 0,
 120: 9,
 121: 2,
 122: 0,
 

### Frequent Item set
Randomly selecting 5 as the threshold for two frequent dataset

In [279]:
Thresold_value_double= 5
Itemset_double_count1=Itemset_double_count
print("Before with no thresold:",len(Itemset_double_count1))
for i in Itemset_double_count1.keys():
    if Itemset_double_count1[i]< Thresold_value_double:
        Itemset_double_count1[i]=-1
Itemset_double_count_updated={}
for i in Itemset_double_count1.keys():
    if Itemset_double_count1[i]!=-1:# Thresold_value:
        Itemset_double_count_updated[i]=Itemset_double_count1[i]
print("After with thresold 5:",len(Itemset_double_count_updated))

Before with no thresold: 5253
After with thresold 5: 162


In [280]:
Itemset_double_count_updated

{0: 5,
 5: 6,
 8: 10,
 12: 7,
 19: 7,
 35: 6,
 54: 5,
 103: 5,
 105: 7,
 106: 6,
 109: 12,
 111: 5,
 113: 8,
 120: 9,
 128: 5,
 131: 5,
 138: 6,
 140: 5,
 148: 5,
 308: 10,
 312: 5,
 319: 6,
 402: 5,
 406: 12,
 410: 5,
 417: 9,
 433: 6,
 452: 5,
 500: 6,
 503: 13,
 504: 5,
 507: 11,
 514: 5,
 522: 5,
 529: 5,
 532: 5,
 542: 7,
 565: 5,
 599: 19,
 603: 11,
 606: 5,
 610: 8,
 618: 9,
 621: 8,
 626: 9,
 638: 6,
 645: 7,
 647: 5,
 710: 6,
 792: 5,
 815: 6,
 833: 13,
 882: 8,
 883: 5,
 885: 26,
 888: 10,
 889: 6,
 892: 24,
 894: 7,
 898: 5,
 900: 12,
 902: 6,
 903: 9,
 906: 6,
 907: 8,
 908: 16,
 910: 10,
 911: 8,
 912: 5,
 916: 7,
 919: 5,
 920: 18,
 923: 6,
 927: 11,
 928: 8,
 929: 8,
 931: 6,
 932: 6,
 933: 6,
 935: 5,
 936: 5,
 937: 8,
 939: 5,
 940: 10,
 942: 6,
 943: 9,
 948: 5,
 955: 8,
 977: 6,
 1250: 8,
 1254: 17,
 1256: 7,
 1262: 11,
 1265: 6,
 1270: 10,
 1272: 6,
 1273: 6,
 1281: 5,
 1282: 15,
 1285: 5,
 1289: 5,
 1290: 6,
 1292: 7,
 1295: 5,
 1298: 5,
 1299: 5,
 1300: 6,
 1302: 

In [281]:
# Now creating dataframe for Two frequent itemset
Itemset_double_df1= pd.DataFrame()
for i in Itemset_double_count_updated.keys():
    Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
    

  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.

  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.

  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.append(Itemset_double_df.loc[ [i] ,: ])
  Itemset_double_df1=Itemset_double_df1.

### Final Two Frequent Itemset

In [282]:
Itemset_double_df1

Unnamed: 0,Itemset1,Itemset2
0,4,citrus fruit
5,4,yogurt
8,4,whole milk
12,4,other vegetables
19,4,rolls/buns
...,...,...
4127,sausage,shopping bags
4163,sausage,sliced cheese
4173,6,shopping bags
4182,6,berries


### Fk-1 * F1

In [283]:
temp=[]
for i in Itemset_double_count_updated.keys():
    j=0
    while(j<len(Itemset_single_df)):
        if(Itemset_double_df1.loc[i].Itemset1!=Itemset_single_df.loc[j].Itemset):
            if Itemset_double_df1.loc[i].Itemset2!= Itemset_single_df.loc[j].Itemset:
                temp.append([Itemset_double_df1.loc[i].Itemset1,Itemset_double_df1.loc[i].Itemset2,Itemset_single_df.loc[j].Itemset])
        j = j +1
                

In [284]:
Itemset_triple_df= pd.DataFrame(temp)

In [285]:
Itemset_triple_df

Unnamed: 0,0,1,2
0,4,citrus fruit,semi-finished bread
1,4,citrus fruit,margarine
2,4,citrus fruit,3
3,4,citrus fruit,tropical fruit
4,4,citrus fruit,yogurt
...,...,...,...
16357,hygiene articles,napkins,popcorn
16358,hygiene articles,napkins,cake bar
16359,hygiene articles,napkins,12
16360,hygiene articles,napkins,meat


In [286]:
Itemset_triple_df.shape

(16362, 3)

In [287]:
# Now lets count the number of unique three frequent itemsets
#Approach: For each item we are creating a dictionary to get their count
Itemset_triple_count={}
i=0
while(i<len(Itemset_triple_df)):
    Itemset_triple_count[i]=0
    i=i+1

In [232]:
Itemset_triple_count

{0: 0,
 1: 0,
 2: 0,
 3: 0,
 4: 0,
 5: 0,
 6: 0,
 7: 0,
 8: 0,
 9: 0,
 10: 0,
 11: 0,
 12: 0,
 13: 0,
 14: 0,
 15: 0,
 16: 0,
 17: 0,
 18: 0,
 19: 0,
 20: 0,
 21: 0,
 22: 0,
 23: 0,
 24: 0,
 25: 0,
 26: 0,
 27: 0,
 28: 0,
 29: 0,
 30: 0,
 31: 0,
 32: 0,
 33: 0,
 34: 0,
 35: 0,
 36: 0,
 37: 0,
 38: 0,
 39: 0,
 40: 0,
 41: 0,
 42: 0,
 43: 0,
 44: 0,
 45: 0,
 46: 0,
 47: 0,
 48: 0,
 49: 0,
 50: 0,
 51: 0,
 52: 0,
 53: 0,
 54: 0,
 55: 0,
 56: 0,
 57: 0,
 58: 0,
 59: 0,
 60: 0,
 61: 0,
 62: 0,
 63: 0,
 64: 0,
 65: 0,
 66: 0,
 67: 0,
 68: 0,
 69: 0,
 70: 0,
 71: 0,
 72: 0,
 73: 0,
 74: 0,
 75: 0,
 76: 0,
 77: 0,
 78: 0,
 79: 0,
 80: 0,
 81: 0,
 82: 0,
 83: 0,
 84: 0,
 85: 0,
 86: 0,
 87: 0,
 88: 0,
 89: 0,
 90: 0,
 91: 0,
 92: 0,
 93: 0,
 94: 0,
 95: 0,
 96: 0,
 97: 0,
 98: 0,
 99: 0,
 100: 0,
 101: 0,
 102: 0,
 103: 0,
 104: 0,
 105: 0,
 106: 0,
 107: 0,
 108: 0,
 109: 0,
 110: 0,
 111: 0,
 112: 0,
 113: 0,
 114: 0,
 115: 0,
 116: 0,
 117: 0,
 118: 0,
 119: 0,
 120: 0,
 121: 0,
 122: 0,
 12

In [288]:
# Counting the itemset
for i in Itemset_triple_count.keys():
    j=0
    while(j<len(df)):
            if set(Itemset_triple_df.loc[i]).issubset(set(df.loc[j])):
                Itemset_triple_count[i]= Itemset_triple_count[i]+1
            j=j+1

In [237]:
Itemset_triple_count
#After updating the count, we can see the actual count

{0: 1,
 1: 1,
 2: 0,
 3: 0,
 4: 3,
 5: 0,
 6: 0,
 7: 0,
 8: 0,
 9: 1,
 10: 1,
 11: 0,
 12: 0,
 13: 0,
 14: 0,
 15: 1,
 16: 0,
 17: 0,
 18: 0,
 19: 0,
 20: 0,
 21: 1,
 22: 0,
 23: 0,
 24: 0,
 25: 0,
 26: 0,
 27: 1,
 28: 0,
 29: 0,
 30: 0,
 31: 0,
 32: 0,
 33: 0,
 34: 1,
 35: 0,
 36: 1,
 37: 0,
 38: 0,
 39: 0,
 40: 0,
 41: 0,
 42: 0,
 43: 0,
 44: 0,
 45: 0,
 46: 0,
 47: 0,
 48: 0,
 49: 0,
 50: 0,
 51: 0,
 52: 0,
 53: 0,
 54: 0,
 55: 0,
 56: 1,
 57: 0,
 58: 0,
 59: 0,
 60: 0,
 61: 0,
 62: 0,
 63: 0,
 64: 0,
 65: 0,
 66: 0,
 67: 0,
 68: 0,
 69: 0,
 70: 0,
 71: 0,
 72: 0,
 73: 0,
 74: 0,
 75: 3,
 76: 0,
 77: 0,
 78: 0,
 79: 0,
 80: 0,
 81: 0,
 82: 1,
 83: 1,
 84: 2,
 85: 1,
 86: 0,
 87: 0,
 88: 0,
 89: 0,
 90: 0,
 91: 0,
 92: 0,
 93: 0,
 94: 0,
 95: 0,
 96: 2,
 97: 0,
 98: 0,
 99: 1,
 100: 0,
 101: 0,
 102: 1,
 103: 0,
 104: 0,
 105: 0,
 106: 0,
 107: 0,
 108: 0,
 109: 1,
 110: 1,
 111: 1,
 112: 1,
 113: 0,
 114: 0,
 115: 0,
 116: 0,
 117: 0,
 118: 0,
 119: 0,
 120: 0,
 121: 0,
 122: 0,
 12

### Frequent Item set
Randomly selecting 5 as the threshold for two frequent dataset

In [289]:
Thresold_value_triple= 5
Itemset_triple_count1=Itemset_triple_count
print("Before with no thresold:",len(Itemset_triple_count1))
for i in Itemset_triple_count1.keys():
    if Itemset_triple_count1[i]< Thresold_value_triple:
        Itemset_triple_count1[i]=-1
Itemset_triple_count_updated={}
for i in Itemset_triple_count1.keys():
    if Itemset_triple_count1[i]!=-1:# Thresold_value:
        Itemset_triple_count_updated[i]=Itemset_triple_count1[i]
print("After with thresold 5:",len(Itemset_triple_count_updated))


Before with no thresold: 16362
After with thresold 5: 72


### Final Fk-1 * F1  Itemset

In [293]:
Itemset_triple_count_updated

{1028: 6,
 1230: 5,
 1321: 6,
 1325: 5,
 1937: 5,
 2129: 5,
 2835: 5,
 2934: 5,
 2940: 5,
 3139: 5,
 3843: 5,
 3849: 6,
 3856: 6,
 3864: 5,
 3867: 7,
 3893: 5,
 3947: 6,
 4149: 6,
 4250: 5,
 4351: 7,
 4755: 5,
 5263: 5,
 5459: 5,
 5460: 6,
 5463: 5,
 5472: 8,
 5488: 7,
 5490: 6,
 5500: 8,
 5520: 5,
 5758: 6,
 5760: 5,
 5763: 6,
 5769: 8,
 5783: 5,
 5791: 6,
 5793: 5,
 5810: 7,
 6066: 5,
 6079: 5,
 6268: 7,
 6577: 7,
 6584: 6,
 6678: 6,
 6685: 5,
 7183: 8,
 7392: 7,
 7581: 5,
 8395: 5,
 8897: 5,
 9091: 5,
 9099: 8,
 9116: 5,
 9311: 5,
 9503: 7,
 9604: 6,
 9907: 8,
 10816: 5,
 11927: 5,
 11931: 5,
 12230: 6,
 12274: 5,
 12331: 5,
 12937: 7,
 12963: 5,
 13020: 5,
 13790: 5,
 15069: 5,
 15619: 5,
 15710: 5,
 15978: 5,
 16208: 5}

In [295]:
Itemset_triple_count_updated.keys()

dict_keys([1028, 1230, 1321, 1325, 1937, 2129, 2835, 2934, 2940, 3139, 3843, 3849, 3856, 3864, 3867, 3893, 3947, 4149, 4250, 4351, 4755, 5263, 5459, 5460, 5463, 5472, 5488, 5490, 5500, 5520, 5758, 5760, 5763, 5769, 5783, 5791, 5793, 5810, 6066, 6079, 6268, 6577, 6584, 6678, 6685, 7183, 7392, 7581, 8395, 8897, 9091, 9099, 9116, 9311, 9503, 9604, 9907, 10816, 11927, 11931, 12230, 12274, 12331, 12937, 12963, 13020, 13790, 15069, 15619, 15710, 15978, 16208])

In [296]:
Itemset_triple_count_updated.values()

dict_values([6, 5, 6, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 5, 7, 5, 6, 6, 5, 7, 5, 5, 5, 6, 5, 8, 7, 6, 8, 5, 6, 5, 6, 8, 5, 6, 5, 7, 5, 5, 7, 7, 6, 6, 5, 8, 7, 5, 5, 5, 5, 8, 5, 5, 7, 6, 8, 5, 5, 5, 6, 5, 5, 7, 5, 5, 5, 5, 5, 5, 5, 5])