### Apriori is a popular algorithm for extracting **frequent itemsets** with applications in **association rule** learning. 
An **itemset** is considered as "frequent" if it meets a user-specified support threshold. For instance, if the support threshold is set to 0.5 (50%), a frequent itemset is defined as a set of items that occur together in at least 50% of all transactions in the database.

Apriori Algo explained [here](http://rasbt.github.io/mlxtend/user_guide/frequent_patterns/apriori/) (code below)

In [None]:
pip install mlxtend
pip install apriori

In [9]:
dataset = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
           ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
           ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]

import pandas as pd
from mlxtend.preprocessing import TransactionEncoder

te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)
df

Unnamed: 0,Apple,Corn,Dill,Eggs,Ice cream,Kidney Beans,Milk,Nutmeg,Onion,Unicorn,Yogurt
0,False,False,False,True,False,True,True,True,True,False,True
1,False,False,True,True,False,True,False,True,True,False,True
2,True,False,False,True,False,True,True,False,False,False,False
3,False,True,False,False,False,True,True,False,False,True,True
4,False,True,False,True,True,True,False,False,True,False,False


In [12]:
from mlxtend.frequent_patterns import apriori
apriori(df, min_support=0.6, use_colnames=True)

Unnamed: 0,support,itemsets
0,0.8,(Eggs)
1,1.0,(Kidney Beans)
2,0.6,(Milk)
3,0.6,(Onion)
4,0.6,(Yogurt)
5,0.8,"(Eggs, Kidney Beans)"
6,0.6,"(Eggs, Onion)"
7,0.6,"(Kidney Beans, Milk)"
8,0.6,"(Kidney Beans, Onion)"
9,0.6,"(Kidney Beans, Yogurt)"


---

### Another sample Association Rule Mining code using [Apriori Algorithm in Python](https://stackabuse.com/association-rule-mining-via-apriori-algorithm-in-python/)

---

---

### Now let's apply this algo on Fabby data

In [2]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder

In [4]:
te = TransactionEncoder()

# File created by combining transaction summary,transaction details and products 
dir_path = "//Users/subashnadar/Google Drive/Fabby/"
fulldata=pd.read_csv(dir_path+'AprioriData.csv')

In [5]:
#filtering the dataset for a 'Customer'
dataset=fulldata.loc[fulldata['customer_id'] == 100001]
dataset= dataset['product_name.y']
print(dataset)

37                                          Melkesjokolade
149                                         Melkesjokolade
232                          Melkesjokolade,Melkesjokolade
556                                         Melkesjokolade
683                    Melkesjokolade,Havre Fras,Dove Soap
715                               Melkesjokolade,Tine Milk
783                                         Melkesjokolade
1050               Melkesjokolade,Trixie Wicker Cat Basket
1195       Melkesjokolade,Melkesjokolade,Crest,Harringtons
1261     Melkesjokolade,Tine Milk,Uriage,Dove Soap,Dove...
1422                                        Melkesjokolade
1505     Melkesjokolade,Tine Milk,Tine Milk,Tine Milk,C...
1544                           Melkesjokolade,Sine Moistur
1546                                 Melkesjokolade,Uriage
1563     Melkesjokolade,Fiske Boller,Tine Milk,Crest,Ha...
1698                    Melkesjokolade,Tine Milk,Dove Soap
1702         Melkesjokolade,Melkesjokolade,Crest,Coca Co

In [6]:
#convert List of transactions List of lists
def extractAsLists(lst): 
    res = [] 
    for el in lst: 
        sub = el.split(',') 
        res.append(sub) 
      
    return(res) 
                 
print(extractAsLists(dataset)) 
dataset = extractAsLists(dataset)

te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)

[['Melkesjokolade'], ['Melkesjokolade'], ['Melkesjokolade', 'Melkesjokolade'], ['Melkesjokolade'], ['Melkesjokolade', 'Havre Fras', 'Dove Soap'], ['Melkesjokolade', 'Tine Milk'], ['Melkesjokolade'], ['Melkesjokolade', 'Trixie Wicker Cat Basket'], ['Melkesjokolade', 'Melkesjokolade', 'Crest', 'Harringtons'], ['Melkesjokolade', 'Tine Milk', 'Uriage', 'Dove Soap', 'Dove Soap', 'Crest', 'Harringtons'], ['Melkesjokolade'], ['Melkesjokolade', 'Tine Milk', 'Tine Milk', 'Tine Milk', 'Crest'], ['Melkesjokolade', 'Sine Moistur'], ['Melkesjokolade', 'Uriage'], ['Melkesjokolade', 'Fiske Boller', 'Tine Milk', 'Crest', 'Harringtons'], ['Melkesjokolade', 'Tine Milk', 'Dove Soap'], ['Melkesjokolade', 'Melkesjokolade', 'Crest', 'Coca Cola'], ['Melkesjokolade', 'Corn Flakes', 'Tine Milk', 'Uriage', 'Trixie Wicker Cat Basket'], ['Melkesjokolade', 'Havre Fras'], ['Melkesjokolade', 'Sine Moistur'], ['Melkesjokolade'], ['Melkesjokolade', 'Corn Flakes', 'Tine Milk'], ['Melkesjokolade', 'Fiske Boller'], ['Mel

In [7]:
# df.to_csv(dir_path+'AprioriDatasampledata.csv',encoding='utf-8',index=False)
from mlxtend.frequent_patterns import apriori

frequent_itemsets = apriori(df, min_support=0.1, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
print(frequent_itemsets) 


# result = frequent_itemsets[ (frequent_itemsets['length'] == 3) &
#                    (frequent_itemsets['support'] >= 0.1) ]
# print(frequent_itemsets)
# result.to_csv(dir_path+'Aprioriresult.csv',encoding='utf-8',index=False)

     support                                    itemsets  length
0   0.152047                               (Corn Flakes)       1
1   0.157895                                 (Corn ahoy)       1
2   0.210526                                     (Crest)       1
3   0.105263                                 (Dove Soap)       1
4   0.122807                              (Fiske Boller)       1
5   0.175439                               (Harringtons)       1
6   1.000000                            (Melkesjokolade)       1
7   0.204678                                 (Tine Milk)       1
8   0.157895                  (Trixie Wicker Cat Basket)       1
9   0.128655                                    (Uriage)       1
10  0.152047               (Melkesjokolade, Corn Flakes)       2
11  0.157895                 (Melkesjokolade, Corn ahoy)       2
12  0.210526                     (Melkesjokolade, Crest)       2
13  0.105263                 (Melkesjokolade, Dove Soap)       2
14  0.122807             

In [8]:
result = frequent_itemsets[ (frequent_itemsets['length'] == 3) &
                   (frequent_itemsets['support'] >= 0.1) ]
print(frequent_itemsets)
result.to_csv(dir_path+'Aprioriresult.csv',encoding='utf-8',index=False)

     support                                    itemsets  length
0   0.152047                               (Corn Flakes)       1
1   0.157895                                 (Corn ahoy)       1
2   0.210526                                     (Crest)       1
3   0.105263                                 (Dove Soap)       1
4   0.122807                              (Fiske Boller)       1
5   0.175439                               (Harringtons)       1
6   1.000000                            (Melkesjokolade)       1
7   0.204678                                 (Tine Milk)       1
8   0.157895                  (Trixie Wicker Cat Basket)       1
9   0.128655                                    (Uriage)       1
10  0.152047               (Melkesjokolade, Corn Flakes)       2
11  0.157895                 (Melkesjokolade, Corn ahoy)       2
12  0.210526                     (Melkesjokolade, Crest)       2
13  0.105263                 (Melkesjokolade, Dove Soap)       2
14  0.122807             