# Solutions to Session 10 Exercises
**AIEP Send-off Python 2**

***Mark Edward M. Gonzales***

In [1]:
import itertools

### A. Input Parsing

In [2]:
num_baskets, support_threshold, eval_threshold = input().split(',')
num_baskets = int(num_baskets)
support_threshold = int(support_threshold)
eval_threshold = float(eval_threshold)

10,3,0.6


In [3]:
baskets = []
for _ in range(num_baskets):
    baskets.append(input().split(','))

lettuce,ham
grapes,lettuce,ham
cherry
lettuce,ham,cheese
grapes,lettuce
grapes,lettuce,ham,cherry
cheese
grapes,lettuce,cherry
ham,cheese
grapes,lettuce,ham,cheese


In [4]:
items = set()

for basket in baskets:
    for item in basket:
        items.add(item)

In [5]:
items = list(items)
items

['grapes', 'cheese', 'lettuce', 'cherry', 'ham']

In [6]:
baskets

[['lettuce', 'ham'],
 ['grapes', 'lettuce', 'ham'],
 ['cherry'],
 ['lettuce', 'ham', 'cheese'],
 ['grapes', 'lettuce'],
 ['grapes', 'lettuce', 'ham', 'cherry'],
 ['cheese'],
 ['grapes', 'lettuce', 'cherry'],
 ['ham', 'cheese'],
 ['grapes', 'lettuce', 'ham', 'cheese']]

### B. Frequent Itemsets

In [7]:
def get_support(itemset, baskets):
    ctr = 0
    for basket in baskets:
        if set(itemset).issubset(set(basket)):
            ctr += 1

    return ctr

def get_subsets(itemset, size):
    return list(itertools.combinations(itemset, size))

In [8]:
def get_frequent_itemsets(items, baskets, support_threshold):
    master_set = set(items)
    frequent_itemsets = []

    new_frequent_found = True
    size = 1

    while new_frequent_found:
        subsets = get_subsets(master_set, size)
        master_set = set()

        new_frequent_found = False
        for itemset in subsets:
            support = get_support(itemset, baskets)
            if support >= support_threshold:
                if len(itemset) > 1:
                    frequent_itemsets.append((list(itemset), support))
               
                master_set = master_set.union(set(itemset))
                new_frequent_found = True

        size += 1

    return sorted(frequent_itemsets, key = lambda x: x[1], reverse = True)

get_frequent_itemsets(items, baskets, support_threshold)

[(['lettuce', 'grapes'], 5),
 (['lettuce', 'ham'], 5),
 (['cheese', 'ham'], 3),
 (['grapes', 'ham'], 3),
 (['lettuce', 'grapes', 'ham'], 3)]

### C. Metrics

In [9]:
def get_confidence(antecedent, consequent, baskets):
    return get_support(list(set(antecedent).union(set(consequent))), baskets) / get_support(antecedent, baskets)

def get_interest(antecedent, consequent, baskets):
    return abs(get_confidence(antecedent, consequent, baskets) - get_support(consequent, baskets) / len(baskets))

def get_lift(antecedent, consequent, baskets):
    return get_confidence(antecedent, consequent, baskets) / (get_support(consequent, baskets) / len(baskets))

def get_conviction(antecedent, consequent, baskets):
    return (1 - get_support(consequent, baskets) / len(baskets)) / (1 - get_confidence(antecedent, consequent, baskets))

get_confidence(['grapes', 'lettuce'], ['cherry'], baskets)
get_interest(['grapes', 'lettuce'], ['cherry'], baskets)
get_lift(['grapes', 'lettuce'], ['cherry'], baskets)
get_conviction(['grapes', 'lettuce'], ['cherry'], baskets)

1.1666666666666667

### D. Association Rules

In [10]:
def derive_rules(itemset):
    rules = []
    for size in range(1, len(itemset)):
        subsets = get_subsets(itemset, size)
        for antecedent in subsets:
            rules.append((list(antecedent), list(set(itemset).difference(set(antecedent)))))

    return rules

def rule_to_string(rule):
    antecedent = ", ".join(rule[0])
    consequent = ", ".join(rule[1])

    return f"{{{antecedent}}} -> {{{consequent}}}"

In [11]:
def get_useful_association_rules(frequent_itemsets, baskets, eval_threshold, metric):
    useful_association_rules = []

    for itemset in frequent_itemsets:
        rules = derive_rules(itemset[0])
        for rule in rules:
            if metric.lower() == 'confidence':
                evaluation = get_confidence(rule[0], rule[1], baskets)
            elif metric.lower() == 'interest':
                evaluation = get_interest(rule[0], rule[1], baskets)
            elif metric.lower() == 'lift':
                evaluation = get_lift(rule[0], rule[1], baskets)
            elif metric.lower() == 'conviction':
                evaluation = get_conviction(rule[0], rule[1], baskets)
            else:
                raise Exception("Invalid metric")

            if evaluation >= eval_threshold:
                useful_association_rules.append((rule_to_string(rule), evaluation))

    return sorted(useful_association_rules, key = lambda x: x[1], reverse = True)

get_useful_association_rules(get_frequent_itemsets(items, baskets, support_threshold), baskets, eval_threshold, "confidence")

[('{grapes} -> {lettuce}', 1.0),
 ('{grapes, ham} -> {lettuce}', 1.0),
 ('{ham} -> {lettuce}', 0.8333333333333334),
 ('{cheese} -> {ham}', 0.75),
 ('{lettuce} -> {grapes}', 0.7142857142857143),
 ('{lettuce} -> {ham}', 0.7142857142857143),
 ('{grapes} -> {ham}', 0.6),
 ('{grapes} -> {lettuce, ham}', 0.6),
 ('{lettuce, grapes} -> {ham}', 0.6),
 ('{lettuce, ham} -> {grapes}', 0.6)]