In [1]:
import pandas as pd

df = pd.read_csv('banco.tsv', '\t')
df_dummy = pd.get_dummies(df)
aux = df_dummy[["EVOLUCAO", "FEBRE", "TOSSE", "DISPNEIA", "FADIGA", "DESC_RESP", "PERD_OLFT", "PERD_PALA"]]
new_df = pd.get_dummies(aux)

new_df.loc[(new_df['EVOLUCAO'] == 9), 'EVOLUCAO'] = 1
new_df.loc[(new_df['EVOLUCAO'] == 2) | (new_df['EVOLUCAO'] == 3), 'EVOLUCAO'] = 0

new_df.loc[(new_df['FEBRE'] == 9), 'FEBRE'] = 1
new_df.loc[(new_df['FEBRE'] == 2), 'FEBRE'] = 0

new_df.loc[(new_df['TOSSE'] == 9), 'TOSSE'] = 1
new_df.loc[(new_df['TOSSE'] == 2), 'TOSSE'] = 0

new_df.loc[(new_df['DISPNEIA'] == 9), 'DISPNEIA'] = 1
new_df.loc[(new_df['DISPNEIA'] == 2), 'DISPNEIA'] = 0

new_df.loc[(new_df['FADIGA'] == 9), 'FADIGA'] = 1
new_df.loc[(new_df['FADIGA'] == 2), 'FADIGA'] = 0

new_df.loc[(new_df['DESC_RESP'] == 9), 'DESC_RESP'] = 1
new_df.loc[(new_df['DESC_RESP'] == 2), 'DESC_RESP'] = 0

new_df.loc[(new_df['PERD_OLFT'] == 9), 'PERD_OLFT'] = 1
new_df.loc[(new_df['PERD_OLFT'] == 2), 'PERD_OLFT'] = 0

new_df.loc[(new_df['PERD_PALA'] == 9), 'PERD_PALA'] = 1
new_df.loc[(new_df['PERD_PALA'] == 2), 'PERD_PALA'] = 0


new_df


Unnamed: 0,EVOLUCAO,FEBRE,TOSSE,DISPNEIA,FADIGA,DESC_RESP,PERD_OLFT,PERD_PALA
0,0,1,1,1,1,1,1,1
1,1,1,1,1,0,1,0,0
2,0,0,1,1,0,1,0,0
3,1,1,1,1,1,1,1,1
4,1,1,1,0,0,1,0,0
...,...,...,...,...,...,...,...,...
1576,0,0,1,1,0,1,0,0
1577,1,0,1,0,0,1,0,0
1578,0,0,1,1,0,1,0,0
1579,1,0,1,0,1,0,0,0


In [2]:
from mlxtend.frequent_patterns import apriori, association_rules

class Apriori:
    threshold = 0.5
    df = None

    def __init__(self, df, threshold=None, transform_bol=False):
        """Apriori Constructor. 

        :param pandas.DataFrame df: transactions dataset (1 or 0).
        :param float threshold: set threshold for min_support.
        :return: Apriori instance.
        :rtype: Apriori
        """

        self._validate_df(df)

        self.df = df
        if threshold is not None:
            self.threshold = threshold

        if transform_bol:
            self._transform_bol()

    def _validate_df(self, df=None):
        """Validade if df exists. 

        :param pandas.DataFrame df: transactions dataset (1 or 0).
        :return: 
        :rtype: void
        """

        if df is None:
            raise Exception("df must be a valid pandas.DataDrame.")


    def _transform_bol(self):
        """Transform (1 or 0) dataset to (True or False). 

        :return: 
        :rtype: void
        """

        for column in self.df.columns:
            self.df[column] = self.df[column].apply(lambda x: True if x == 1 else False)


    def _apriori(self, use_colnames=False, max_len=None, count=True):
        """Call apriori mlxtend.frequent_patterns function. 

        :param bool use_colnames: Flag to use columns name in final DataFrame.
        :param int max_len: Maximum length of itemsets generated.
        :param bool count: Flag to count length of the itemsets.
        :return: apriori DataFrame.
        :rtype: pandas.DataFrame
        """
    
        apriori_df = apriori(
                    self.df, 
                    min_support=self.threshold,
                    use_colnames=use_colnames, 
                    max_len=max_len
                )
        if count:
            apriori_df['length'] = apriori_df['itemsets'].apply(lambda x: len(x))

        return apriori_df

    def run(self, use_colnames=False, max_len=None, count=True):
        """Apriori Runner Function.

        :param bool use_colnames: Flag to use columns name in final DataFrame.
        :param int max_len: Maximum length of itemsets generated.
        :param bool count: Flag to count length of the itemsets.
        :return: apriori DataFrame.
        :rtype: pandas.DataFrame
        """

        return self._apriori(
                        use_colnames=use_colnames,
                        max_len=max_len,
                        count=count
                    )

    def filter(self, apriori_df, length, threshold):
        """Filter Apriori DataFrame by length and  threshold.

        :param pandas.DataFrame apriori_df: Apriori DataFrame.
        :param int length: Length of itemsets required.
        :param float threshold: Minimum threshold nrequired.
        :return: apriori filtered DataFrame.
        :rtype:pandas.DataFrame
        """
        
        if 'length' not in apriori_df.columns:
            raise Exception("apriori_df has no length. Please run the Apriori with count=True.")

        return apriori_df[ (apriori_df['length'] == length) & (apriori_df['support'] >= threshold) ]

In [16]:
# Running Apriori 

apriori_runner = Apriori(new_df, threshold=0.4, transform_bol=True)
apriori_df = apriori_runner.run(use_colnames=True)
apriori_df
table = association_rules(apriori_df, metric='confidence', min_threshold=0.4)
table = table[['antecedents', 'consequents', 'support', 'confidence']]
table

Unnamed: 0,antecedents,consequents,support,confidence
0,(EVOLUCAO),(TOSSE),0.470588,0.752275
1,(TOSSE),(EVOLUCAO),0.470588,0.672087
2,(EVOLUCAO),(DISPNEIA),0.44592,0.712841
3,(DISPNEIA),(EVOLUCAO),0.44592,0.597458
4,(TOSSE),(FEBRE),0.472486,0.674797
5,(FEBRE),(TOSSE),0.472486,0.79893
6,(FEBRE),(DISPNEIA),0.457306,0.773262
7,(DISPNEIA),(FEBRE),0.457306,0.612712
8,(DESC_RESP),(FEBRE),0.40544,0.620523
9,(FEBRE),(DESC_RESP),0.40544,0.685561


In [17]:
apriori_runner.filter(apriori_df, length=2, threshold=0.5)

Unnamed: 0,support,itemsets,length
10,0.540164,"(TOSSE, DISPNEIA)",2
12,0.552815,"(DESC_RESP, DISPNEIA)",2
