# Association
## Recommender System
Apriori
To use this library, you must install mlxtend library

        pip install mlxtend
        from mlxtend.frequent_patterns import apriori, association_rules

In [1]:
# Import Libraries
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori, association_rules

In [2]:
pd.read_csv('Faceplate.csv').columns

Index(['Transaction', 'Red', 'White', 'Blue', 'Orange', 'Green', 'Yellow'], dtype='object')

In [3]:
# Import Faceplate date
# Use transaction columns as an index
df_faceplate = pd.read_csv('Faceplate.csv', index_col='Transaction')

# Take a look into top 5 rows
df_faceplate.head()

Unnamed: 0_level_0,Red,White,Blue,Orange,Green,Yellow
Transaction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,1,1,0,0,1,0
2,0,1,0,1,0,0
3,0,1,1,0,0,0
4,1,1,0,1,0,0
5,1,0,1,0,0,0


In [4]:
# Create an itemset using Apriori with a min support of .2
itemset = apriori(df_faceplate,use_colnames=True, min_support=.2)

# Top 5 rows
itemset.head()

Unnamed: 0,support,itemsets
0,0.6,(Red)
1,0.7,(White)
2,0.6,(Blue)
3,0.2,(Orange)
4,0.2,(Green)


In [5]:
# Get metrics from association rules with confidence as a metric and min threshold of .5
metrics = association_rules(itemset, metric='confidence', min_threshold=.5)
metrics

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(White),(Red),0.7,0.6,0.4,0.571429,0.952381,-0.02,0.933333
1,(Red),(White),0.6,0.7,0.4,0.666667,0.952381,-0.02,0.9
2,(Blue),(Red),0.6,0.6,0.4,0.666667,1.111111,0.04,1.2
3,(Red),(Blue),0.6,0.6,0.4,0.666667,1.111111,0.04,1.2
4,(Green),(Red),0.2,0.6,0.2,1.0,1.666667,0.08,inf
5,(White),(Blue),0.7,0.6,0.4,0.571429,0.952381,-0.02,0.933333
6,(Blue),(White),0.6,0.7,0.4,0.666667,0.952381,-0.02,0.9
7,(Orange),(White),0.2,0.7,0.2,1.0,1.428571,0.06,inf
8,(Green),(White),0.2,0.7,0.2,1.0,1.428571,0.06,inf
9,"(White, Blue)",(Red),0.4,0.6,0.2,0.5,0.833333,-0.04,0.8


In [6]:
# Sort values by Lift to show which one has better cross selling opportunity
metrics.sort_values('lift', ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
12,"(White, Red)",(Green),0.4,0.2,0.2,0.5,2.5,0.12,1.6
15,(Green),"(White, Red)",0.2,0.4,0.2,1.0,2.5,0.12,inf
4,(Green),(Red),0.2,0.6,0.2,1.0,1.666667,0.08,inf
13,"(White, Green)",(Red),0.2,0.6,0.2,1.0,1.666667,0.08,inf
7,(Orange),(White),0.2,0.7,0.2,1.0,1.428571,0.06,inf
8,(Green),(White),0.2,0.7,0.2,1.0,1.428571,0.06,inf
14,"(Red, Green)",(White),0.2,0.7,0.2,1.0,1.428571,0.06,inf
2,(Blue),(Red),0.6,0.6,0.4,0.666667,1.111111,0.04,1.2
3,(Red),(Blue),0.6,0.6,0.4,0.666667,1.111111,0.04,1.2
0,(White),(Red),0.7,0.6,0.4,0.571429,0.952381,-0.02,0.933333


In [7]:
# Show top 5 rows for with best cross selling opportunity
metrics.sort_values('lift', ascending=False).head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
12,"(White, Red)",(Green),0.4,0.2,0.2,0.5,2.5,0.12,1.6
15,(Green),"(White, Red)",0.2,0.4,0.2,1.0,2.5,0.12,inf
4,(Green),(Red),0.2,0.6,0.2,1.0,1.666667,0.08,inf
13,"(White, Green)",(Red),0.2,0.6,0.2,1.0,1.666667,0.08,inf
7,(Orange),(White),0.2,0.7,0.2,1.0,1.428571,0.06,inf


# Association with a Book dataset

In [8]:
df_books = pd.read_csv('CharlesBookClub.csv')
df_books.head()

Unnamed: 0,Seq#,ID#,Gender,M,R,F,FirstPurch,ChildBks,YouthBks,CookBks,...,ItalCook,ItalAtlas,ItalArt,Florence,Related Purchase,Mcode,Rcode,Fcode,Yes_Florence,No_Florence
0,1,25,1,297,14,2,22,0,1,1,...,0,0,0,0,0,5,4,2,0,1
1,2,29,0,128,8,2,10,0,0,0,...,0,0,0,0,0,4,3,2,0,1
2,3,46,1,138,22,7,56,2,1,2,...,1,0,0,0,2,4,4,3,0,1
3,4,47,1,228,2,1,2,0,0,0,...,0,0,0,0,0,5,1,1,0,1
4,5,51,1,257,10,1,10,0,0,0,...,0,0,0,0,0,5,3,1,0,1


In [9]:
print(df_books.columns)
df_books.drop(['Seq#', 'ID#', 'Gender', 'M', 'R', 'F', 'FirstPurch','Florence', 'Related Purchase',
       'Mcode', 'Rcode', 'Fcode', 'Yes_Florence', 'No_Florence'], axis=1, inplace=True)

Index(['Seq#', 'ID#', 'Gender', 'M', 'R', 'F', 'FirstPurch', 'ChildBks',
       'YouthBks', 'CookBks', 'DoItYBks', 'RefBks', 'ArtBks', 'GeogBks',
       'ItalCook', 'ItalAtlas', 'ItalArt', 'Florence', 'Related Purchase',
       'Mcode', 'Rcode', 'Fcode', 'Yes_Florence', 'No_Florence'],
      dtype='object')


In [16]:
# Convert DF to 0 and 1s, assign any book above 0 a 1
df_books[df_books > 0] = 1

In [18]:
# Take a peek into data.
df_books.head()

Unnamed: 0,ChildBks,YouthBks,CookBks,DoItYBks,RefBks,ArtBks,GeogBks,ItalCook,ItalAtlas,ItalArt
0,0,1,1,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0
2,1,1,1,0,1,0,1,1,0,0
3,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0


In [19]:
# Create an itemset from books
book_itemset = apriori(df_books, use_colnames=True, min_support= 200/4000)

In [32]:
# Take a look into top 5 rows of itemset
book_itemset.head()

Unnamed: 0,support,itemsets
0,0.394,(ChildBks)
1,0.23825,(YouthBks)
2,0.4155,(CookBks)
3,0.25475,(DoItYBks)
4,0.20475,(RefBks)


In [33]:
# See results sorted by highest LIFT
association_rules(book_itemset, metric='confidence', min_threshold=.5).sort_values('lift', ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
64,"(RefBks, YouthBks)","(CookBks, ChildBks)",0.08125,0.2420,0.05525,0.680000,2.809917,0.035588,2.368750
73,"(DoItYBks, RefBks)","(CookBks, ChildBks)",0.09250,0.2420,0.06125,0.662162,2.736207,0.038865,2.243680
60,"(DoItYBks, YouthBks)","(CookBks, ChildBks)",0.10325,0.2420,0.06700,0.648910,2.681448,0.042014,2.158993
80,"(RefBks, GeogBks)","(CookBks, ChildBks)",0.08175,0.2420,0.05025,0.614679,2.539995,0.030467,1.967190
69,"(GeogBks, YouthBks)","(CookBks, ChildBks)",0.10450,0.2420,0.06325,0.605263,2.501087,0.037961,1.920267
...,...,...,...,...,...,...,...,...,...
2,(ChildBks),(CookBks),0.39400,0.4155,0.24200,0.614213,1.478251,0.078293,1.515086
6,(ItalCook),(ChildBks),0.10750,0.3940,0.06025,0.560465,1.422500,0.017895,1.378730
11,(GeogBks),(CookBks),0.26675,0.4155,0.15625,0.585754,1.409758,0.045415,1.410999
5,(GeogBks),(ChildBks),0.26675,0.3940,0.14625,0.548266,1.391538,0.041150,1.341498
