# 2.1 Association rules from frequent itemsets

In [68]:
import pandas as pd

In [69]:
import itertools

In [70]:
df = pd.read_csv('../datasets/online_retail.csv', usecols=['InvoiceNo', 'Description'])
df = df[df['InvoiceNo'].map(lambda invoiceno: not invoiceno.startswith(('C', 'A')))]
df = df.groupby(by='InvoiceNo').agg({'Description': lambda x: list(set(x))})
df

Unnamed: 0_level_0,Description
InvoiceNo,Unnamed: 1_level_1
536365,"[KNITTED UNION FLAG HOT WATER BOTTLE, CREAM CU..."
536366,"[HAND WARMER RED POLKA DOT, HAND WARMER UNION ..."
536367,"[ASSORTED COLOUR BIRD ORNAMENT, HOME BUILDING ..."
536368,"[BLUE COAT RACK PARIS FASHION, YELLOW COAT RAC..."
536369,[BATH BUILDING BLOCK WORD]
...,...
581583,"[6 CHOCOLATE LOVE HEART T-LIGHTS, LUNCH BAG RE..."
581584,"[RED FLOCK LOVE HEART PHOTO FRAME, 6 CHOCOLATE..."
581585,"[ALARM CLOCK BAKELIKE RED , SET 6 SCHOOL MILK ..."
581586,"[DOORMAT RED RETROSPOT, SET OF 3 HANGING OWLS ..."


In [71]:
descriptions = list(set(itertools.chain.from_iterable(df['Description'])))
d = len(descriptions)
d, descriptions[:10]

(4207,
 [nan,
  'WRAP  PINK FLOCK',
  'CHERRY BLOSSOM LUGGAGE TAG',
  'TRAY, BREAKFAST IN BED',
  'SET OF TEA COFFEE SUGAR TINS PANTRY',
  'SET 10 CARD KRAFT REINDEER 17084',
  'LARGE MINT DIAMANTE HAIRSLIDE',
  'HAPPY BIRTHDAY CARD STRIPEY TEDDY',
  'PSYCHEDELIC TILE HOOK',
  'GLASS AND BEADS BRACELET IVORY'])

In [72]:
vecs = []
for row in df.iloc[:]['Description']:
    vec = [0 for _ in range(d)]
    for item in row:
        vec[(descriptions.index(item))] = 1
    vecs.append(vec)

In [73]:
# TODO Use sparse matrix

In [74]:
assoc_df = pd.DataFrame(data=vecs, columns=descriptions)
assoc_df

Unnamed: 0,NaN,WRAP PINK FLOCK,CHERRY BLOSSOM LUGGAGE TAG,"TRAY, BREAKFAST IN BED",SET OF TEA COFFEE SUGAR TINS PANTRY,SET 10 CARD KRAFT REINDEER 17084,LARGE MINT DIAMANTE HAIRSLIDE,HAPPY BIRTHDAY CARD STRIPEY TEDDY,PSYCHEDELIC TILE HOOK,GLASS AND BEADS BRACELET IVORY,...,HANGING HEART BONHEUR,S/12 MINI RABBIT EASTER,MADRAS NOTEBOOK MEDIUM,ORANGE TV TRAY TABLE,FAIRY DREAMS INCENSE,VINTAGE UNION JACK MEMOBOARD,PAPER LANTERN 5 POINT STUDDED STAR,NURSE'S BAG SOFT TOY,VINTAGE BLUE KITCHEN CABINET,SET/3 TALL GLASS CANDLE HOLDER PINK
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22056,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
22057,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
22058,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
22059,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [75]:
from mlxtend.frequent_patterns import fpgrowth

In [76]:
fi = fpgrowth(assoc_df, 0.01)
fi

Unnamed: 0,support,itemsets
0,0.102443,(3233)
1,0.020806,(3385)
2,0.019446,(2435)
3,0.017225,(3014)
4,0.013916,(1858)
...,...,...
1467,0.012375,"(1299, 2492)"
1468,0.010562,"(1299, 2110)"
1469,0.010516,"(1299, 2527)"
1470,0.010698,"(2464, 595)"


In [77]:
from mlxtend.frequent_patterns import association_rules

In [78]:
rules = association_rules(fi)
rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,"(897, 844)",(3629),0.023344,0.094828,0.018721,0.801942,8.456805,0.016507,4.570231
1,"(208, 3842)",(3916),0.018811,0.04687,0.01514,0.804819,17.171294,0.014258,4.88332
2,"(208, 1819)",(3916),0.01718,0.04687,0.014777,0.860158,18.351985,0.013972,6.815778
3,"(208, 3842, 1819)",(3916),0.012284,0.04687,0.011106,0.904059,19.288633,0.01053,9.934547
4,"(208, 1819, 3580)",(3916),0.011921,0.04687,0.01097,0.920152,19.631988,0.010411,11.936818


In [79]:
for _,row in rules[['antecedents', 'consequents']].iterrows():
    f = lambda i: descriptions[i]
    print("{", "".join(map(f, row['antecedents'])), "} ⟹", "".join(map(f, row['consequents'])))

{ JUMBO BAG PINK POLKADOTJUMBO STORAGE BAG SUKI } ⟹ JUMBO BAG RED RETROSPOT
{ STRAWBERRY CHARLOTTE BAGCHARLOTTE BAG SUKI DESIGN } ⟹ RED RETROSPOT CHARLOTTE BAG
{ STRAWBERRY CHARLOTTE BAGCHARLOTTE BAG PINK POLKADOT } ⟹ RED RETROSPOT CHARLOTTE BAG
{ STRAWBERRY CHARLOTTE BAGCHARLOTTE BAG SUKI DESIGNCHARLOTTE BAG PINK POLKADOT } ⟹ RED RETROSPOT CHARLOTTE BAG
{ STRAWBERRY CHARLOTTE BAGCHARLOTTE BAG PINK POLKADOTWOODLAND CHARLOTTE BAG } ⟹ RED RETROSPOT CHARLOTTE BAG
{ STRAWBERRY CHARLOTTE BAGCHARLOTTE BAG SUKI DESIGNCHARLOTTE BAG PINK POLKADOT } ⟹ WOODLAND CHARLOTTE BAG
{ STRAWBERRY CHARLOTTE BAGCHARLOTTE BAG PINK POLKADOTWOODLAND CHARLOTTE BAG } ⟹ CHARLOTTE BAG SUKI DESIGN
{ STRAWBERRY CHARLOTTE BAGLUNCH BAG  BLACK SKULL. } ⟹ RED RETROSPOT CHARLOTTE BAG
{ STRAWBERRY CHARLOTTE BAGWOODLAND CHARLOTTE BAG } ⟹ RED RETROSPOT CHARLOTTE BAG
{ STRAWBERRY CHARLOTTE BAGCHARLOTTE BAG SUKI DESIGNWOODLAND CHARLOTTE BAG } ⟹ RED RETROSPOT CHARLOTTE BAG
{ JUMBO BAG PINK POLKADOTJUMBO SHOPPER VINTAGE RED PAI