## Association Rules
    - Apriori Algorithm
    - Contains three major components
        1. Support
        2. Confidence
        3. Lift
        
        Example: Suppose Order Transactions data
        
        Support(item1): 
            Transactions containing item1 / Total Transactions
        
        Confidence(item1 -> item2):
            likelyhood that item2 is also bought if item1 is bought
            = (Transactions containing item1 and item 2) / (Transactions containing item1)

        Lift(item1 -> item2):
            refers to increase in ratio of item2 when item1 is sold.
            = Confidence(item1 -> item2) / (support(item2))
        

In [2]:
with open('../data/processed/grocery.txt', 'r') as f:
    data = f.readlines()
data = [x.replace('\n', '').split(',') for x in data]
data[1:4]

[['coke', 'beer'], ['milk', 'beer'], ['coke', 'beer', 'eggs', 'bread', 'milk']]

In [3]:
merged_data = sum(data, [])
unique_items = list(set(merged_data))
print('Items in Transactions:', ', '.join(unique_items))

Items in Transactions: bread, beer, milk, coke, eggs


In [7]:
def cal_support(item: str, data: list) -> tuple:
    """
    Function to calculate support
    Arguemts:
        item -> string
        data -> list of items bought per txn
    Returns
        support -> Tuple containing item and support_value
    """
    
    total_transactions = len(data)
    item_transactions = 0

    for each in data:
        if item in each:
            item_transactions += 1
    
    support_value = item_transactions / total_transactions
    support = (item, support_value)
    return support

In [9]:
def cal_confidence(item1: str, item2: str, data: list) -> tuple:
    """
    Function to calculate confidence
    Arguments:
        item1 -> string
        item2 -> string
        data -> list of items bought per txn
    Returns
        confidence -> Tuple containing item1, item2, confidence value
            of item1 -> item2
    """

    # confidence = count(item1 & item2)/count(item1)
    item1_count, item12_count = 0, 0
    for each in data:
        if item1 in each and item2 in each:
            item12_count += 1
        
        if item1 in each:
            item1_count += 1
    
    conf_val = 0
    if item12_count > 0:
        conf_val = item12_count / item1_count

    return (item1, item2, conf_val)

In [15]:
def cal_lift(item1: str, item2: str, data: list) -> tuple:
    """
    Function to calculate lift
    Arguments:
        item1 -> string
        item2 -> string
        data -> list of items bought per txn
    Returns
        lift -> Tuple containing item1, item2, lift value
            of item1 -> item2
    """

    conf_item1_item2 = cal_confidence(item1, item2, data)[2]
    supp_item2 = cal_support(item2, data)[1]

    lift = conf_item1_item2/supp_item2

    return (item1, item2, lift)

In [18]:
def cal_aripori(item1: str, item2: str, data: list) -> tuple:
    """
    Function to calculate lift
    Arguments:
        item1 -> string
        item2 -> string
        data -> list of items bought per txn
    Returns
        metrics -> entire metrics of item1 and item2
    """

    supp_item1 = cal_support(item1, data)
    supp_item2 = cal_support(item2, data)

    confidence_item12 = cal_confidence(item1, item2, data)
    lift_item12 = cal_lift(item1, item2, data)

    output_str = '''
        Item 1, 2: {}, {}
        Support Item1 : {}
        Support Item2 : {}
        Confidence Item1 -> Item2 : {}
        Lift Item1 -> Item2
    '''.format(
        item1,
        item2,
        supp_item1[1],
        supp_item1[1],
        confidence_item12[2],
        lift_item12[2]
    )
    print (output_str)

In [22]:
from itertools import combinations
all_combinations = list(combinations(unique_items, 2))

for item12 in all_combinations:
    print("-"*20)
    cal_aripori(item12[0], item12[1], data)



--------------------

        Item 1, 2: bread, beer
        Support Item1 : 0.714
        Support Item2 : 0.714
        Confidence Item1 -> Item2 : 0.7240896358543417
        Lift Item1 -> Item2
    
--------------------

        Item 1, 2: bread, milk
        Support Item1 : 0.714
        Support Item2 : 0.714
        Confidence Item1 -> Item2 : 0.7324929971988795
        Lift Item1 -> Item2
    
--------------------

        Item 1, 2: bread, coke
        Support Item1 : 0.714
        Support Item2 : 0.714
        Confidence Item1 -> Item2 : 0.7366946778711485
        Lift Item1 -> Item2
    
--------------------

        Item 1, 2: bread, eggs
        Support Item1 : 0.714
        Support Item2 : 0.714
        Confidence Item1 -> Item2 : 0.7296918767507002
        Lift Item1 -> Item2
    
--------------------

        Item 1, 2: beer, milk
        Support Item1 : 0.715
        Support Item2 : 0.715
        Confidence Item1 -> Item2 : 0.7118881118881119
        Lift Item1 -> Item2
 