# Python-ML-Models :: Apriori - implementation from scratch

---

## Information

**Goal**: Understand the inner works of the *apriori* algorithm.

## Implementation


### Libraries

In [64]:
from __future__ import division
import pandas as pd
import numpy as np
from itertools import combinations

import warnings
warnings.filterwarnings("ignore")

### Defining Apriori class

In [65]:
class Apriori(object):
    def __init__(self, minSupport, sparse_matrix=False):
        self.minSupport = minSupport
        self.sparse_matrix = sparse_matrix
        
    def __trans_from_sparse(self, dta):
        nsp = []
        for i in range(dta.shape[0]):
            nsp.append(dta.ix[i,1:][lambda x: x ==1].index.tolist())
        return nsp
        
    def __simple_support(self, products, num_trsc):
        return { k:(v/num_trsc) for k,v in list(products.items())}
    
    def __support(self, dta, k, b):
        products = {k:0 for k in dta}
        for tr in dta:
            for e in b:
                total = 0
                for v in tr:
                    if (v in e): total += 1
                if(total == k): products[tr] += 1
        return self.__simple_support(products,len(b))
    
    def __filter_minSupport(self, support):
        return dict(filter(lambda x: x[1] >= self.minSupport, support.items()))
    
    def __compItemSet(self, dta, k, base):
        comb = list( combinations(dta.keys(), k) )
        com_supp = self.__support(comb,k,base)
        fcspp = self.__filter_minSupport(com_supp)
        if(len(fcspp) == 0):
            return dta
        else:
            k += 1
            self.__compItemSet(fcspp,k,base)
            return fcspp
                
    def freqItemSet(self, base):
        num_transactions = base.shape[0]
        
        if(self.sparse_matrix):
            base = self.__trans_from_sparse(base)
            products = {k:0 for k in np.unique(np.hstack(base))}
            for p in products.keys():
                for t in base:
                    if (p in t):
                        products[p] += 1
            
        else:
            base = np.array([ i.split(",") for i in base])
            products = {k:0 for k in np.unique(np.hstack(base))}
            for p in products.keys():
                for t in base:
                    if (p in t):
                        products[p] += 1
                        
        sup = self.__simple_support(products,num_transactions)
        sup = self.__filter_minSupport(sup)
            
        k = 2
        sup = self.__compItemSet(sup,k,base) 
                
        return sup

### Loading data

#### Sparse data

In [66]:
data_s = pd.read_csv("transactions_sparse.csv")
data_s.head()

Unnamed: 0,transaction_ID,milk,bread,butter,beer,diapers
0,1,1,1,0,0,0
1,2,0,0,1,0,0
2,3,0,0,0,1,1
3,4,1,1,1,0,0
4,5,0,1,0,0,0


#### Non-Sparse data

In [67]:
data_nsp = pd.read_csv("transactions.csv",sep=';')
data_nsp.head()

Unnamed: 0,transaction_ID,products
0,1,"milk,bread"
1,2,butter
2,3,"beer,diapers"
3,4,"milk,bread,butter"
4,5,bread


### Applying Apriori

#### to sparse-data

In [68]:
apr = Apriori(0.2, sparse_matrix=True) #set minimum support = 0.2 and sparse_marix = True

res = apr.freqItemSet(data_s)
res

{('beer', 'diapers'): 0.2,
 ('bread', 'butter'): 0.2,
 ('bread', 'milk'): 0.4,
 ('butter', 'milk'): 0.2}

#### to non-sparse data

In [69]:
apr = Apriori(0.2) #set minimum support = 0.2 and let sparse_marix as its default value (= False)

res = apr.freqItemSet(data_nsp.ix[:,1])
res

{('beer', 'diapers'): 0.2,
 ('bread', 'butter'): 0.2,
 ('bread', 'milk'): 0.4,
 ('butter', 'milk'): 0.2}

In [70]:
apr = Apriori(0.4) #set minimum support = 0.4 and let sparse_marix as its default value (= False)

res = apr.freqItemSet(data_nsp.ix[:,1])
res

{('bread', 'milk'): 0.4}