# Prepare libraries

In [1]:
import collections
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import pandas as pd
from apyori import apriori

%matplotlib inline

# Read data

In [2]:
store_data = pd.read_csv('./toy_data.csv')

In [3]:
store_data.head()

Unnamed: 0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
0,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
1,chutney,,,,,,,,,,,,,,,,,,,
2,turkey,avocado,,,,,,,,,,,,,,,,,,
3,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,
4,low fat yogurt,,,,,,,,,,,,,,,,,,,


##### There is no header at the first line.

In [4]:
store_data = pd.read_csv('./toy_data.csv', header=None)

In [5]:
store_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
2,chutney,,,,,,,,,,,,,,,,,,,
3,turkey,avocado,,,,,,,,,,,,,,,,,,
4,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,


In [None]:
print(store_data.shape)

(7501, 20)


##### Change the data set into 2-dimensional list to apply Apriori library. 

In [None]:
transactions = []
for i in range(len(store_data)):
#     transactions.append([str(store_data.iloc[i,j]) for j in range(len(store_data.iloc[i]))])
    temp = []
    for j in range(len(store_data.iloc[i])):
        item = str(store_data.iloc[i][j])
        if item != 'nan':
            temp.append(item)
        transactions.append(temp)

In [None]:
item_count = dict()

for i in range(len(transactions)):
    for j in range(len(transactions[i])):
        cnt = 1
        item = str(transactions[i][j])
        if item in item_count:
            cnt += item_count.get(item)
        item_count[item] = cnt

# sort dictionary by name of item
item_count = collections.OrderedDict(sorted(item_count.items()))

print('Therer are',len(item_count),'items.')

for k in item_count:
    print(k,":", item_count.get(k))

In [None]:
figure(num=None, figsize=(20, 5), dpi=120)
plt.bar(range(len(item_count)), list(item_count.values()), align='center')
plt.xticks(range(len(item_count)), list(item_count.keys()), rotation=90)

plt.show()

# Apriori

**Support **

ex) for item B

support(B) = (transactions containing (B)) / (total transactions)

**Confidence **

ex) The likelihood that an item B is also bought if item A is bought.

confidence(A &rarr; B) = (transactions containing both (A and B)) / (transactions containing A)

**Lift**

ex) The increase in the ratio of sale of B when A i sold.

lift(A &rarr; B) = (confidence (A &rarr; B)) / (support (B))

##### Parmeters of apriori

https://github.com/ymoch/apyori

The default values for parameters are

min_support = 0.1

min_confidence = 0.5

min_lift = 0.0

min_length = None (specifies the minimum number of items)

In [None]:
association_rules = apriori(
    transactions, 
    min_support=0.0045, 
    min_confidence=0.2, 
    min_lift=3,
    min_length=2
)

association_results = list(association_rules)

In [None]:
print(len(association_results))

In [None]:
cnt = 1
for item in association_results:
    items = [i for i in item.items]
    print(cnt)
    print("Rule:\t" + items[0] + " -> " + items[1])
    print(items[0],'\'s tractions:',item_count[items[0]])
    print("Support:\t",item.support)
    print("Confidence:\t", item.ordered_statistics[0][2])
    print("Lift:\t", item.ordered_statistics[0][3])
    print('==============================================')
    cnt += 1