# Playground for Recommender System Slides

- Stephen W. Thomas
- Used for MMA 869, MMAI 869, and GMMA 869

In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns 

from sklearn.metrics import silhouette_score, silhouette_samples
import sklearn.metrics
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering
from sklearn.mixture import GaussianMixture
from sklearn.metrics import accuracy_score, cohen_kappa_score, f1_score, log_loss
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification

import itertools

import scipy

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Example of converting lists into a dataframe

In [12]:
dataset = [['a', 'd'],
           ['a', 'd', 'e'],
           ['b', 'e']]

import pandas as pd
from mlxtend.preprocessing import TransactionEncoder

te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)
df

Unnamed: 0,a,b,d,e
0,True,False,True,False
1,True,False,True,True
2,False,True,False,True


# Read in Data

In [13]:
from mlxtend.frequent_patterns import apriori

%time frequent_itemsets = apriori(df, min_support=0.001, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))

Wall time: 7.5 ms


In [14]:
frequent_itemsets.sort_values(by=['support'], ascending=False).head()

Unnamed: 0,support,itemsets,length
0,0.666667,(a),1
2,0.666667,(d),1
3,0.666667,(e),1
4,0.666667,"(a, d)",2
1,0.333333,(b),1


In [15]:
frequent_itemsets.head(10)

Unnamed: 0,support,itemsets,length
0,0.666667,(a),1
1,0.333333,(b),1
2,0.666667,(d),1
3,0.666667,(e),1
4,0.666667,"(a, d)",2
5,0.333333,"(a, e)",2
6,0.333333,"(b, e)",2
7,0.333333,"(d, e)",2
8,0.333333,"(a, d, e)",3


In [16]:
frequent_itemsets.tail(10)

Unnamed: 0,support,itemsets,length
0,0.666667,(a),1
1,0.333333,(b),1
2,0.666667,(d),1
3,0.666667,(e),1
4,0.666667,"(a, d)",2
5,0.333333,"(a, e)",2
6,0.333333,"(b, e)",2
7,0.333333,"(d, e)",2
8,0.333333,"(a, d, e)",3


In [17]:
frequent_itemsets[ (frequent_itemsets['length'] == 2) &
                   (frequent_itemsets['support'] >= 0.02) ]

Unnamed: 0,support,itemsets,length
4,0.666667,"(a, d)",2
5,0.333333,"(a, e)",2
6,0.333333,"(b, e)",2
7,0.333333,"(d, e)",2


In [18]:
frequent_itemsets[ frequent_itemsets['itemsets'] == {'pastry', 'soda'} ]

Unnamed: 0,support,itemsets,length


In [19]:
from mlxtend.frequent_patterns import association_rules
%time rules = association_rules(frequent_itemsets, min_threshold=0.1)

Wall time: 4.99 ms


In [22]:
rules["antecedent_len"] = rules["antecedents"].apply(lambda x: len(x))
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len
0,(a),(d),0.666667,0.666667,0.666667,1.0,1.5,0.222222,inf,1
1,(d),(a),0.666667,0.666667,0.666667,1.0,1.5,0.222222,inf,1
2,(a),(e),0.666667,0.666667,0.333333,0.5,0.75,-0.111111,0.666667,1
3,(e),(a),0.666667,0.666667,0.333333,0.5,0.75,-0.111111,0.666667,1
4,(b),(e),0.333333,0.666667,0.333333,1.0,1.5,0.111111,inf,1
5,(e),(b),0.666667,0.333333,0.333333,0.5,1.5,0.111111,1.333333,1
6,(d),(e),0.666667,0.666667,0.333333,0.5,0.75,-0.111111,0.666667,1
7,(e),(d),0.666667,0.666667,0.333333,0.5,0.75,-0.111111,0.666667,1
8,"(a, d)",(e),0.666667,0.666667,0.333333,0.5,0.75,-0.111111,0.666667,2
9,"(a, e)",(d),0.333333,0.666667,0.333333,1.0,1.5,0.111111,inf,2


In [21]:
rules[ (rules['antecedent_len'] >= 2) &
       (rules['confidence'] > 0.75) &
       (rules['lift'] > 1.2) ].sort_values(by="support", ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len
9,"(a, e)",(d),0.333333,0.666667,0.333333,1.0,1.5,0.111111,inf,2
10,"(d, e)",(a),0.333333,0.666667,0.333333,1.0,1.5,0.111111,inf,2


In [36]:
rules[ (rules['antecedents'].apply(lambda x: 'Eggs' in x)) & (rules['antecedent_len'] >=2) ]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len
10,"(Kidney Beans, Eggs)",(Onion),0.8,0.6,0.6,0.75,1.25,0.12,1.6,2
11,"(Onion, Eggs)",(Kidney Beans),0.6,1.0,0.6,1.0,1.0,0.0,inf,2
