# Association rule mining

In [None]:
import pandas as pd

In [None]:
data = pd.read_csv("Online Retail.csv")
data.head(20)

In [None]:
data.info()

In [None]:
data.dropna(subset=['Description'],inplace=True)

In [None]:
data.info()

In [None]:
data.groupby(['InvoiceNo']).size().describe()

### Data preprocessing

In [None]:
#'Country', 'InvoiceDate','CustomerID'
data_by_invoice = data.groupby(['InvoiceNo','Country'])['Description'].apply(lambda x: ','.join(x)).reset_index()
data_by_invoice

In [None]:
data_by_invoice['Country'].value_counts()

#### Get country data

In [None]:
data_by_invoice_france = data_by_invoice[data_by_invoice.Country == 'France']
data_by_invoice_france.head()

#### Dummy encoding <br>
<img src="dummy.png" width="500">

In [None]:
dummy_data_by_invoice_france = data_by_invoice_france['Description'].str.get_dummies(',')
data_france = pd.concat([data_by_invoice_france, dummy_data_by_invoice_france], axis = 1)
data_france.head(10)

In [None]:
data_france.columns

In [None]:
data[data.Country == 'France'].pivot_table(index='InvoiceNo', columns='Description',aggfunc=any, fill_value=False).astype(int)

### Extract rules with Mlxtend (machine learning extensions) 

Python library of useful tools for the day-to-day data science tasks.

http://rasbt.github.io/mlxtend/api_subpackages/mlxtend.frequent_patterns/

### Find frequent itemsets and rules

In [None]:
from mlxtend.frequent_patterns import apriori, association_rules

frequent_itemsets = apriori(data_france.drop(columns=['InvoiceNo','Country','Description']), 
                            min_support=0.05, use_colnames=True)

frequent_itemsets

In [None]:
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)
rules

### Add antecedent lenght column

In [None]:
rules["antecedent_len"] = rules["antecedents"].apply(lambda x: len(x))
rules

### Filter rules

In [None]:
rules[ 
       (rules['lift'] > 0.7) &
       (rules['support'] > 0.05) &
       (rules['antecedents'] == {'PLASTERS IN TIN WOODLAND ANIMALS'})
     ]

In [None]:
rules[rules['consequents'] == {'POSTAGE'}]