![](../images/itam_logo.png)

M. Sc. Liliana Millán Núñez liliana.millan@itam.mx

Noviembre 2020

Necesitarás instalar en tu pyenv el paquete `mlxtend` con `pip install mlxtend`

#### Ejemplo 1

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
dataset = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
           ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
           ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]

In [None]:
dataset

In [None]:
from mlxtend.preprocessing import TransactionEncoder

In [None]:
# utilizando TransactionEncoder 
te = TransactionEncoder()
txs_formatted = te.fit(dataset).transform(dataset)

In [None]:
txs_formatted

In [None]:
df = pd.DataFrame(txs_formatted, columns=te.columns_)
df

In [None]:
from mlxtend.frequent_patterns import apriori

In [None]:
apriori_df = apriori(df, min_support=0.6, use_colnames=True)
apriori_df

#### Visualización de las reglas

In [None]:
from mlxtend.frequent_patterns import association_rules

In [None]:
rules_df = association_rules(apriori_df, metric="confidence", min_threshold=0.7)
rules_df

In [None]:
rules_df.sort_values(by="lift", ascending=False)

In [None]:
a = sns.scatterplot(x="support", y="lift", data=rules_df, hue="confidence")
a.set_xlim(0,1)
a.set_ylim(0,1.30)
plt.legend(loc="lower right")
plt.title("Association rules")

### Ejemplo 2

Utilizando un subset de las transacciones de *retail online* de UK que puedes encontrar [aquí](https://www.dropbox.com/s/y543ol0k30vn7vv/subset_retail_online_uk.csv?dl=0)

In [None]:
transacciones = pd.read_csv("/home/silil/Documents/itam/mineria_datos_licenciatura/data/subset_retail_online_uk.csv")

In [None]:
transacciones.head()

In [None]:
transacciones.rename(columns={col: col.lower() for col in transacciones.columns.values},
                    inplace=True)

In [None]:
transacciones.shape

¿Cuántos tickets diferentes hay? 

In [None]:
transacciones.groupby(['invoiceno'], as_index=False)['stockcode']\
.count()\
.rename(columns={'stockcode': 'count'})\
.shape

In [None]:
transacciones.groupby(['invoiceno'], as_index=False)['stockcode']\
.count()\
.rename(columns={'stockcode': 'count'})\
.sort_values(by="count", ascending=False)\
.head()

¿Cuántos productos diferentes hay?

In [None]:
transacciones.stockcode.nunique()

In [None]:
# si checamos las descripciones de los productos, ¿cuántos hay?
transacciones.description.nunique()

In [None]:
# list of lists 
txs = transacciones.groupby(['invoiceno'])['stockcode'].apply(lambda x: list(np.unique(x)))

In [None]:
txs.head()

In [None]:
txs_list = txs.values.tolist()

In [None]:
type(txs_list)

In [None]:
txs_list[:3]

In [None]:
# utilizando TransactionEncoder 
te = TransactionEncoder()
txs_formatted = te.fit(txs_list).transform(txs_list)

In [None]:
type(txs_formatted)

In [None]:
txs_formatted.shape

In [None]:
txs_formatted

In [None]:
df = pd.DataFrame(txs_formatted, columns=te.columns_)
df

In [None]:
transacciones.groupby(['stockcode'], as_index=False)['invoiceno']\
.count()\
.rename(columns={'invoiceno': 'count'})\
.sort_values(by="count", ascending=False)\
.head()

In [None]:
2313/df.shape[0]

In [None]:
from mlxtend.frequent_patterns import apriori

In [None]:
apriori_df = apriori(df, min_support=0.05, use_colnames=True)
apriori_df.sort_values(by='support', ascending=False)

In [None]:
rules_df = association_rules(apriori_df, metric="confidence", min_threshold=0.4)
rules_df.sort_values(by="lift", ascending=False)

In [None]:
antecedentes = []

for element in rules_df.antecedents:
    # necesitamos hacer esta parte xq la salida de assertion_rules produce objetos de tipo frozenset
    stock_code = list(element)[0]
    antecedentes.append(transacciones[transacciones.stockcode == stock_code].description.head(1).values[0])

In [None]:
consecuentes = []

for element in rules_df.consequents:
    # necesitamos hacer esta parte xq la salida de assertion_rules produce objetos de tipo frozenset
    stock_code = list(element)[0]
    consecuentes.append(transacciones[transacciones.stockcode == stock_code].description.head(1).values[0])

In [None]:
rules_df['antecedentes'] = antecedentes
rules_df['consecuentes'] = consecuentes

In [None]:
rules_df

In [None]:
a = sns.scatterplot(x="support", y="confidence", data=rules_df, hue="lift")
#a.set_ylim(0,11)
plt.legend(loc="lower right")
plt.title("Association rules")