In [6]:
import pandas as pd

df = pd.DataFrame(pd.read_csv('banco.tsv', '\t'))
df

Unnamed: 0,CS_SEXO,NU_IDADE_N,CS_RACA,CS_ESCOL_N,SG_UF,ID_MN_RESI,CS_ZONA,SURTO_SG,NOSOCOMIAL,FEBRE,...,DOR_ABD,FADIGA,PERD_OLFT,PERD_PALA,TOMO_RES,TP_TES_AN,RES_AN,RES_IGG,RES_IGM,RES_IGA
0,M,79,1,2,SP,MAIRIPORA,1,2,2,1,...,9,9,9,9,1,1,5,9,9,4
1,M,3,4,9,MG,BELO HORIZONTE,1,2,2,1,...,2,2,2,2,6,1,4,4,4,4
2,M,78,1,2,RS,ALVORADA,1,2,2,2,...,2,2,2,2,6,2,1,4,4,4
3,M,58,4,2,SP,OSASCO,1,2,2,1,...,9,9,9,9,9,1,4,4,4,4
4,F,73,1,1,SP,OSASCO,1,1,2,1,...,2,2,2,2,9,1,4,4,4,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1576,M,72,1,2,RS,ALVORADA,1,2,2,2,...,2,2,2,2,9,2,2,4,4,4
1577,F,55,4,3,SP,OSASCO,1,1,2,2,...,2,2,2,2,9,1,4,4,4,4
1578,M,64,9,9,SP,ITANHAEM,1,2,2,2,...,2,2,2,2,1,1,4,4,4,4
1579,F,87,4,0,MG,TURMALINA,3,2,2,2,...,2,1,2,2,6,2,2,2,2,4


In [7]:
from mlxtend.frequent_patterns import apriori

class Apriori:
    """Apriori Class. Its has Apriori steps."""
    threshold = 0.5
    df = None

    def __init__(self, df, threshold=None, transform_bol=False):
        """Apriori Constructor. 

        :param pandas.DataFrame df: transactions dataset (1 or 0).
        :param float threshold: set threshold for min_support.
        :return: Apriori instance.
        :rtype: Apriori
        """

        self._validate_df(df)

        self.df = df
        if threshold is not None:
            self.threshold = threshold

        if transform_bol:
            self._transform_bol()

    def _validate_df(self, df=None):
        """Validade if df exists. 

        :param pandas.DataFrame df: transactions dataset (1 or 0).
        :return: 
        :rtype: void
        """

        if df is None:
            raise Exception("df must be a valid pandas.DataDrame.")


    def _transform_bol(self):
        """Transform (1 or 0) dataset to (True or False). 

        :return: 
        :rtype: void
        """

        for column in self.df.columns:
            self.df[column] = self.df[column].apply(lambda x: True if x == 1 else False)


    def _apriori(self, use_colnames=False, max_len=None, count=True):
        """Call apriori mlxtend.frequent_patterns function. 

        :param bool use_colnames: Flag to use columns name in final DataFrame.
        :param int max_len: Maximum length of itemsets generated.
        :param bool count: Flag to count length of the itemsets.
        :return: apriori DataFrame.
        :rtype: pandas.DataFrame
        """
    
        apriori_df = apriori(
                    self.df, 
                    min_support=self.threshold,
                    use_colnames=use_colnames, 
                    max_len=max_len
                )
        if count:
            apriori_df['length'] = apriori_df['itemsets'].apply(lambda x: len(x))

        return apriori_df

    def run(self, use_colnames=False, max_len=None, count=True):
        """Apriori Runner Function.

        :param bool use_colnames: Flag to use columns name in final DataFrame.
        :param int max_len: Maximum length of itemsets generated.
        :param bool count: Flag to count length of the itemsets.
        :return: apriori DataFrame.
        :rtype: pandas.DataFrame
        """

        return self._apriori(
                        use_colnames=use_colnames,
                        max_len=max_len,
                        count=count
                    )

    def filter(self, apriori_df, length, threshold):
        """Filter Apriori DataFrame by length and  threshold.

        :param pandas.DataFrame apriori_df: Apriori DataFrame.
        :param int length: Length of itemsets required.
        :param float threshold: Minimum threshold nrequired.
        :return: apriori filtered DataFrame.
        :rtype:pandas.DataFrame
        """
        
        if 'length' not in apriori_df.columns:
            raise Exception("apriori_df has no length. Please run the Apriori with count=True.")

        return apriori_df[ (apriori_df['length'] == length) & (apriori_df['support'] >= threshold) ]

In [8]:
# Running Apriori 

if 'ID' in df.columns: del df['ID'] # ID is not relevant to apriori 

apriori_runner = Apriori(df, threshold=0.4, transform_bol=True)
apriori_df = apriori_runner.run(use_colnames=True)
apriori_df

Unnamed: 0,support,itemsets,length
0,0.440860,(CS_RACA),1
1,0.942441,(CS_ZONA),1
2,0.580645,(FEBRE),1
3,0.691335,(TOSSE),1
4,0.743201,(DISPNEIA),1
...,...,...,...
206,0.478178,"(AMOSTRA, HOSPITAL, SATURACAO, CS_ZONA, DESC_R...",5
207,0.401012,"(AMOSTRA, HOSPITAL, TP_TES_AN, CS_ZONA, DESC_R...",5
208,0.403542,"(AMOSTRA, TOSSE, DISPNEIA, HOSPITAL, DESC_RESP)",5
209,0.449715,"(AMOSTRA, DISPNEIA, HOSPITAL, SATURACAO, DESC_...",5


In [None]:
# Showing only pairs with support granter than 0.41
apriori_runner.filter(apriori_df, length=2, threshold=0.41)