# Unsupervised Machine Learning
## Association

In [26]:
import matplotlib.pyplot as plt
import mlxtend.frequent_patterns
import mlxtend.preprocessing
import numpy
import pandas

In [27]:
example = [
    ['milk', 'bread', 'apples', 'cereal', 'jelly', 'cookies', 'salad', 'tomatoes'],
    ['beer', 'milk', 'chips', 'salsa', 'grapes', 'wine', 'potatoes', 'eggs', 'carrots'],
    ['diapers', 'baby formula', 'milk', 'bread', 'chicken', 'asparagus', 'cookies'],
    ['milk', 'cookies', 'chicken', 'asparagus', 'broccoli', 'cereal', 'orange juice'],
    ['steak', 'asparagus', 'broccoli', 'chips', 'salsa', 'ketchup', 'potatoes', 'salad'],
    ['beer', 'salsa', 'asparagus', 'wine', 'cheese', 'crackers', 'strawberries', 'cookies'],
    ['chocolate cake', 'strawberries', 'wine', 'cheese', 'beer', 'milk', 'orange juice'],
    ['chicken', 'peas', 'broccoli', 'milk', 'bread', 'eggs', 'potatoes', 'ketchup', 'crackers'],
    ['eggs', 'bread', 'cheese', 'turkey', 'salad', 'tomatoes', 'wine', 'steak', 'carrots'],
    ['bread', 'milk', 'tomatoes', 'cereal', 'chicken', 'turkey', 'chips', 'salsa', 'diapers']
]

In [28]:
# the number of transactions
N = len(example)
# the frequency of milk
f_x = sum(['milk' in i for i in example])
# the frequency of bread
f_y = sum(['bread' in i for i in example])
# the frequency of milk and bread
f_x_y = sum([
    all(w in i for w in ['milk', 'bread']) 
    for i in example
])
# print out the metrics computed above
print(
    "N = {}\n".format(N) + 
    "MILK Freq(x) = {}\n".format(f_x) + 
    "BREAD Freq(y) = {}\n".format(f_y) + 
    "BOTH Freq(x, y) = {}".format(f_x_y)
)

N = 10
MILK Freq(x) = 7
BREAD Freq(y) = 5
BOTH Freq(x, y) = 4


### Support
Support is simply the probability that a given item set appears in the data

In [29]:
# support (supp)

support = f_x_y / N
print("Probability that BREAD & MILK are bought together = {}".format(round(support, 4)))

Probability that BREAD & MILK are bought together = 0.4


### Confidence

basically the probability that product B is purchased given the purchase of product A


In [30]:
confidence = support / (f_x / N)
print("Probability that BREAD is bought given the purchase of MILK = {}".format(round(confidence, 4)))

Probability that BREAD is bought given the purchase of MILK = 0.5714


### Lift & Leverage

if product A is bought by an individual, can we say anything about whether they will or will not purchase product B with some level of confidence?

When the value is greater than 1, the second item is more likely to be purchased if the first item is purchased

In [31]:
# lift: x -> y

lift = confidence / (f_y / N)
# leverage: x -> y

leverage = support - ((f_x / N) * (f_y / N))
print("Lift = {}".format(round(lift, 4)))
print("Leverage = {}".format(round(leverage, 4)))

print("Shows that BREAD is likely to be bought given the purchase of MILK")

Lift = 1.1429
Leverage = 0.05
Shows that BREAD is likely to be bought given the purchase of MILK


## Conviction

Conviction is the ratio of the expected frequency that X occurs without Y, given that X and Y are independent of the frequency of incorrect predictions. 

In [32]:
# conviction: x -> y

conviction = (1 - (f_y / N)) / (1 - confidence)
conv = format(round(conviction, 4))
print("Conviction = {} " + conv)
print("We can conclude by saying that MILK => BREAD would be incorrect " + conv +" times as often if MILK and BEAD were independent")

Conviction = {} 1.1667
We can conclude by saying that MILK => BREAD would be incorrect 1.1667 times as often if MILK and BEAD were independent
