In [50]:
import numpy as np
import template as tmp
import itertools as itr

Integrantes:
    - Juan Manuel Jimenez
    - Ian Chona Ramírez
    - Camilo Andres Franco

1. Write functions sup(D,X,Y=None), conf(D,X,Y), lift(D,X,Y=None), leverage(D,X,Y),
jaccard(D,X,Y), conviction(D,X,Y), oddsRatio(D,X,Y), imp(D,X,Y) that compute
for any rule X −→ Y the respective metric given the dataset D, where D is a list
of lists of item IDs. Note that, for the support and lift, the conclusion is optional.
This is to allow the calculation of support and lift of patterns (itemsets). Add a
function getRuleMetric(D,X,Y,metric) that computes the metric metric (given as a
string name in {sup, conf , lift, leverage, jaccard, conviction, oddsratio, imp}) for the rule
X −→ Y in the data D.


In [51]:
shopData = tmp.readTransactionalDatabase('shop.dat')

In [52]:
def sup(D, X, Y=None):
    support = 0
    for transaction in D:
        hasPremise = False
        hasConclusion = False
        for itemSet in transaction:
            if itemSet == X:
                hasPremise = True
            if Y is not None:
                if itemSet == Y:
                    hasConclusion = True
        if Y is not None:
            if hasPremise and hasConclusion:
                support += 1
        else:
            if hasPremise:
                support += 1 
    return support

# support = sup(shopData[0:100], 25, 52)
# print(support)

In [53]:
def conf(D, X, Y):
    if sup(D, X, None) == 0:
        return 0;
    return sup(D, X, Y)/sup(D, X, None)

In [54]:
def lift(D, X, Y):
    return conf(D, X, Y)/(sup(D, Y, None)/len(D))

In [55]:
def leverage(D, X, Y):
    return (sup(D, X, Y)/len(D)) - (sup(D, X, None)/len(D)) - (sup(D, Y, None)/len(D))

In [56]:
def jaccard(D, X, Y):
    return (sup(D, X, Y)/len(D)) / ((sup(D, X, None)/len(D)) + (sup(D, Y, None)/len(D)) - (sup(D, X, Y)/len(D)))

In [57]:
def conviction(D, X, Y):
    sup_X = sup(D, X, None)
    rsup_Y_comp = (len(D) - sup(D, Y, None))/len(D)
    
    sup_X_Y = sup(D, X, Y)
    
    sup_X_Y_comp = sup_X - sup_X_Y
    
    if sup_X == 0:
        return 1
    conf = sup_X_Y_comp/sup_X
    
    if rsup_Y_comp == 0:
        return np.inf
    
    return conf/rsup_Y_comp

In [58]:
def oddsRatio(D, X, Y):
    sup_X_Y = sup(D, X, Y)
    
    sup_X = sup(D, X, None)
    sup_X_Y_comp = sup_X - sup_X_Y
    
    sup_Y = sup(D, Y, None)
    sup_X_comp_Y = sup_Y - sup_X_Y
    
    sup_Y_comp = len(D) - sup(D, Y, None)
    sup_X_comp_Y_comp = sup_Y_comp - sup_X_Y_comp
    
    if (sup_X_Y_comp*sup_X_comp_Y) == 0:
        return np.inf
    return (sup_X_Y*sup_X_comp_Y_comp)/(sup_X_Y_comp*sup_X_comp_Y)

In [59]:
def imp(D, X, Y):
    conf_X_Y = conf(D, X, Y)
    
    n_digits = len(str(X))
    digits_X = [d for d in str(X)]
    
    maxItem = 0
    maxConf = 0
    for k in range(1,n_digits):
        for i in itr.combinations(digits_X,k):
            itemSet = ''.join(i)
            conf_item = conf(D, int(itemSet), Y)
            if conf_item > maxConf:
                maxItem = int(itemSet)
                maxConf = conf_item
    return conf_X_Y - maxConf

In [60]:
def getRuleMetric(D, X, Y, metric):
    ruleMetrics = {
        "sup": sup(D, X, Y),
        "conf": conf(D, X, Y),
        "lift": lift(D, X, Y),
        "leverage": leverage(D, X, Y),
        "jaccard": jaccard(D, X, Y),
        "conviction": conviction(D, X, Y),
        "oddsratio": oddsRatio(D, X, Y),
        "imp": imp(D, X, Y),
    }
    return ruleMetrics.get(metric, 'Invalid metric')

#"sup, conf , lift, leverage, jaccard, conviction, oddsratio, imp"
premise = 25
conclusion = 52

print('Support: ', str(getRuleMetric(shopData, premise, conclusion, "sup")))
print('Confidence: ', str(getRuleMetric(shopData, premise, conclusion, "conf")))
print('Lift: ', str(getRuleMetric(shopData, premise, conclusion, "lift")))
print('Leverage: ', str(getRuleMetric(shopData, premise, conclusion, "leverage")))
print('Jaccard: ', str(getRuleMetric(shopData, premise, conclusion, "jaccard")))
print('Conviction: ', str(getRuleMetric(shopData, premise, conclusion, "conviction")))
print('OddsRatio: ', str(getRuleMetric(shopData, premise, conclusion, "oddsratio")))
print('Improvement: ', str(getRuleMetric(shopData, premise, conclusion, "imp")))

Support:  439
Confidence:  0.3146953405017921
Lift:  15.86965912767484
Leverage:  -0.02939
Jaccard:  0.1493705341953045
Conviction:  0.6991691844253628
OddsRatio:  28.867162263966875
Improvement:  0.1958246124185826
