# Imports

In [92]:
%matplotlib inline
import numpy as np
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori,fpgrowth,association_rules
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [93]:
with open("./data/W8_HW_dataset.csv", "r") as f:
    data = f.read()
    data = data.split("\n")
    data = [i.split(",") for i in data]
data

[['471', '1601', '4056', '4448', '5576', '5832'],
 ['1487'],
 ['2302', '3733', '4639'],
 ['1968', '3885', '4056', '4311', '4839', '5398', '5576', '5832'],
 ['1096', '1528', '2852', '4243', '4519', '4682'],
 ['404', '1230', '1543', '4396', '5576', '5793'],
 ['404', '1096', '1230', '3885', '4396', '4519'],
 ['241', '1487', '2302', '2653', '3136', '3277', '3377', '3733'],
 ['86', '111', '1096', '1313', '1648', '1882', '1997', '4519', '5408'],
 ['772', '4056', '4311', '5576', '5832'],
 ['3885'],
 ['111', '179', '241', '277', '1313', '1475', '1577', '1594', '4094', '4582'],
 ['1737', '1882', '1997', '3885'],
 ['2229', '4115', '4330', '4789', '5161'],
 ['1487', '2653', '3136', '3277', '3377', '3392', '3772', '4398'],
 ['209', '4700', '5276'],
 ['404', '471', '1528', '1601', '3830', '4448'],
 ['471', '1601', '4633'],
 ['11', '1307', '3733', '4056', '4311', '5519', '5576', '5832'],
 ['1531', '1834', '3885', '4243', '4370', '4682'],
 ['686', '1096', '3070', '4519', '5444'],
 ['4056', '4311', '5

# Preprocessing

In [94]:
encoder = TransactionEncoder()
encoded_arr = encoder.fit_transform(data)
df = pd.DataFrame(encoded_arr, columns=encoder.columns_)
df

Unnamed: 0,1096,11,111,1129,1152,1216,1230,1253,1256,127,...,5793,5812,5832,5909,5992,686,709,772,86,994
0,False,False,False,False,False,False,False,False,False,False,...,False,False,True,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,True,False,False,False,False,False,False,False
4,True,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
522656,False,False,False,False,False,False,False,False,False,False,...,False,False,True,False,False,False,False,False,False,False
522657,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
522658,True,False,False,False,True,False,False,False,False,False,...,False,False,True,False,False,True,False,False,False,False
522659,False,False,False,False,False,False,False,False,False,False,...,False,False,False,True,False,False,False,False,False,False


# Model

## Apriori

### frequent_itemsets

In [95]:
frequent_itemssets = apriori(df, min_support=0.1, use_colnames=True)
frequent_itemssets

Unnamed: 0,support,itemsets
0,0.150122,(1096)
1,0.103304,(111)
2,0.123353,(1997)
3,0.11416,(3136)
4,0.14593,(3885)
5,0.132298,(4056)
6,0.103962,(4311)
7,0.13878,(4519)
8,0.156507,(5576)
9,0.141822,(5832)


### frequently shopped together

In [96]:
frequent_itemssets['length'] = frequent_itemssets["itemsets"].apply(lambda x: len(x))
frequent_itemssets

Unnamed: 0,support,itemsets,length
0,0.150122,(1096),1
1,0.103304,(111),1
2,0.123353,(1997),1
3,0.11416,(3136),1
4,0.14593,(3885),1
5,0.132298,(4056),1
6,0.103962,(4311),1
7,0.13878,(4519),1
8,0.156507,(5576),1
9,0.141822,(5832),1


In [97]:
frequent_itemssets[frequent_itemssets["length"] > 1]["itemsets"]

10          (1096, 4519)
11          (5576, 4056)
12          (5832, 4056)
13          (5832, 5576)
14    (5832, 5576, 4056)
Name: itemsets, dtype: object

### 5576 as antecendents

In [98]:
association = association_rules(frequent_itemssets, metric="confidence", min_threshold=0.1)

In [99]:
association[association["antecedents"].apply(lambda x: "5576" in list(x))]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
2,(5576),(4056),0.156507,0.132298,0.121784,0.778142,5.881736,0.101079,3.911066
7,(5576),(5832),0.156507,0.141822,0.122427,0.782249,5.5157,0.100231,3.941103
8,"(5832, 5576)",(4056),0.122427,0.132298,0.1134,0.926267,7.001372,0.097204,11.768231
10,"(5576, 4056)",(5832),0.121784,0.141822,0.1134,0.931157,6.565658,0.096129,12.465706
12,(5576),"(5832, 4056)",0.156507,0.121821,0.1134,0.724572,5.947851,0.094335,3.188418


## Frequency Pattern

### frequent_itemsets

In [100]:
frequent_itemssets_fp = fpgrowth(df, min_support=0.1, use_colnames=True)
frequent_itemssets_fp

Unnamed: 0,support,itemsets
0,0.156507,(5576)
1,0.141822,(5832)
2,0.132298,(4056)
3,0.14593,(3885)
4,0.103962,(4311)
5,0.150122,(1096)
6,0.13878,(4519)
7,0.11416,(3136)
8,0.123353,(1997)
9,0.103304,(111)


### frequently bought together

In [101]:
frequent_itemssets_fp['length'] = frequent_itemssets_fp["itemsets"].apply(lambda x: len(x))
frequent_itemssets_fp

Unnamed: 0,support,itemsets,length
0,0.156507,(5576),1
1,0.141822,(5832),1
2,0.132298,(4056),1
3,0.14593,(3885),1
4,0.103962,(4311),1
5,0.150122,(1096),1
6,0.13878,(4519),1
7,0.11416,(3136),1
8,0.123353,(1997),1
9,0.103304,(111),1


In [102]:
frequent_itemssets_fp[frequent_itemssets_fp["length"] > 1]["itemsets"]

10          (5832, 5576)
11          (5832, 4056)
12          (5576, 4056)
13    (5832, 5576, 4056)
14          (1096, 4519)
Name: itemsets, dtype: object

### 5576 as antecendents

In [103]:
association_fp = association_rules(frequent_itemssets_fp, metric="confidence", min_threshold=0.1)
association_fp

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(5832),(5576),0.141822,0.156507,0.122427,0.863245,5.5157,0.100231,6.167893
1,(5576),(5832),0.156507,0.141822,0.122427,0.782249,5.5157,0.100231,3.941103
2,(5832),(4056),0.141822,0.132298,0.121821,0.858968,6.492676,0.103058,6.152517
3,(4056),(5832),0.132298,0.141822,0.121821,0.920806,6.492676,0.103058,10.836452
4,(5576),(4056),0.156507,0.132298,0.121784,0.778142,5.881736,0.101079,3.911066
5,(4056),(5576),0.132298,0.156507,0.121784,0.920532,5.881736,0.101079,10.614199
6,"(5832, 5576)",(4056),0.122427,0.132298,0.1134,0.926267,7.001372,0.097204,11.768231
7,"(5832, 4056)",(5576),0.121821,0.156507,0.1134,0.930879,5.947851,0.094335,12.203148
8,"(5576, 4056)",(5832),0.121784,0.141822,0.1134,0.931157,6.565658,0.096129,12.465706
9,(5832),"(5576, 4056)",0.141822,0.121784,0.1134,0.799595,6.565658,0.096129,4.38221


In [104]:
association_fp[association_fp["antecedents"].apply(lambda x: "5576" in list(x))]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
1,(5576),(5832),0.156507,0.141822,0.122427,0.782249,5.5157,0.100231,3.941103
4,(5576),(4056),0.156507,0.132298,0.121784,0.778142,5.881736,0.101079,3.911066
6,"(5832, 5576)",(4056),0.122427,0.132298,0.1134,0.926267,7.001372,0.097204,11.768231
8,"(5576, 4056)",(5832),0.121784,0.141822,0.1134,0.931157,6.565658,0.096129,12.465706
10,(5576),"(5832, 4056)",0.156507,0.121821,0.1134,0.724572,5.947851,0.094335,3.188418
