In [1]:
import pandas as pd
from apyori import apriori

In [2]:
df = pd.read_csv('lab2.csv')

In [3]:
df.head()

Unnamed: 0,ACCOUNT,SERVICE,VISIT
0,500026,CKING,1
1,500026,SVG,2
2,500026,ATM,3
3,500026,ATM,4
4,500075,CKING,1


In [4]:
# Group all Services by the same Account as a list

transactions = dict()
for index, row in df.iterrows():
    if row['ACCOUNT'] not in transactions.keys():
        transactions[row['ACCOUNT']] = list()
    
    transactions[row['ACCOUNT']].append(row['SERVICE'])

# Convert into pandas dataframe
transactions = pd.DataFrame(list(transactions.items()), columns = ['Account','Services'])

# All last item of a transaction in the dataset are repeated
# Remove duplicated services used by the same account (Not required)
# transactions['Services'] = transactions['Services'].apply(lambda x: set(x))

# one hot encoding
# services = pd.get_dummies(df['SERVICE'], drop_first=False)
# df.drop('SERVICE', axis = 1, inplace = True)
# df.drop('VISIT', axis = 1, inplace = True)
# df = pd.concat([df, services], axis = 1)

In [5]:
transactions.head(10)

Unnamed: 0,Account,Services
0,500026,"[CKING, SVG, ATM, ATM]"
1,500075,"[CKING, MMDA, SVG, ATM, TRUST, TRUST]"
2,500129,"[CKING, SVG, IRA, ATM, ATM]"
3,500256,"[CKING, SVG, CKCRD, CKCRD]"
4,500341,"[CKING, SVG, CKCRD, CKCRD]"
5,500350,"[CKING, CD, CD]"
6,500458,"[SVG, ATM, ATM]"
7,500595,"[CKING, SVG, CD, TRUST, TRUST]"
8,500743,"[CKING, SVG, CCRD, HMEQLC, MTG, CKCRD, CKCRD]"
9,500744,"[CKING, CD, CD]"


In [6]:
# Apply apriori algorithm to the services column
association_rules = apriori(transactions['Services'], min_support=0.05)  
association_results = list(association_rules)

In [7]:
# view results in dataframe
pd.DataFrame(association_results)

Unnamed: 0,items,support,ordered_statistics
0,(ATM),0.384558,"[((), (ATM), 0.3845576273307471, 1.0)]"
1,(AUTO),0.092854,"[((), (AUTO), 0.09285446126892755, 1.0)]"
2,(CCRD),0.154799,"[((), (CCRD), 0.154799149042673, 1.0)]"
3,(CD),0.245276,"[((), (CD), 0.24527593542735576, 1.0)]"
4,(CKCRD),0.113002,"[((), (CKCRD), 0.11300212739331748, 1.0)]"
5,(CKING),0.85784,"[((), (CKING), 0.8578400700788387, 1.0)]"
6,(HMEQLC),0.164685,"[((), (HMEQLC), 0.164685270929796, 1.0)]"
7,(IRA),0.108372,"[((), (IRA), 0.10837191840820924, 1.0)]"
8,(MMDA),0.174446,"[((), (MMDA), 0.17444625203353772, 1.0)]"
9,(MTG),0.074334,"[((), (MTG), 0.07433362532849455, 1.0)]"


In [8]:
# For better view, unpack ordered_statistics
# codes below is taken from https://stackoverflow.com/questions/52688220/python-apyori-sorting-by-lift

results = pd.DataFrame(columns=('Items','Antecedent','Consequent','Support','Confidence','Lift'))

Support =[]
Confidence = []
Lift = []
Items = []
Antecedent = []
Consequent=[]

for RelationRecord in association_results:
    for ordered_stat in RelationRecord.ordered_statistics:
        Support.append(RelationRecord.support)
        Items.append(RelationRecord.items)
        Antecedent.append(ordered_stat.items_base)
        Consequent.append(ordered_stat.items_add)
        Confidence.append(ordered_stat.confidence)
        Lift.append(ordered_stat.lift)

results['Items'] = list(map(set, Items))                                   
results['Antecedent'] = list(map(set, Antecedent))
results['Consequent'] = list(map(set, Consequent))
results['Support'] = Support
results['Confidence'] = Confidence
results['Lift']= Lift

results.sort_index(ascending = False, inplace = True)
results.head(50)


Unnamed: 0,Items,Antecedent,Consequent,Support,Confidence,Lift
207,"{CKING, SVG, HMEQLC, ATM}","{CKING, SVG, HMEQLC}",{ATM},0.060944,0.546577,1.421313
206,"{CKING, SVG, HMEQLC, ATM}","{SVG, HMEQLC, ATM}",{CKING},0.060944,1.0,1.165718
205,"{CKING, SVG, HMEQLC, ATM}","{CKING, SVG, ATM}",{HMEQLC},0.060944,0.245217,1.489001
204,"{CKING, SVG, HMEQLC, ATM}","{CKING, HMEQLC, ATM}",{SVG},0.060944,0.714076,1.154163
203,"{CKING, SVG, HMEQLC, ATM}","{SVG, HMEQLC}","{CKING, ATM}",0.060944,0.546577,1.510268
202,"{CKING, SVG, HMEQLC, ATM}","{CKING, SVG}","{HMEQLC, ATM}",0.060944,0.112497,1.31813
201,"{CKING, SVG, HMEQLC, ATM}","{CKING, HMEQLC}","{SVG, ATM}",0.060944,0.370061,1.440407
200,"{CKING, SVG, HMEQLC, ATM}","{SVG, ATM}","{CKING, HMEQLC}",0.060944,0.237214,1.440407
199,"{CKING, SVG, HMEQLC, ATM}","{HMEQLC, ATM}","{CKING, SVG}",0.060944,0.714076,1.31813
198,"{CKING, SVG, HMEQLC, ATM}","{CKING, ATM}","{SVG, HMEQLC}",0.060944,0.168396,1.510268


In [9]:
# Sort by Lift
results.sort_values(by ='Lift', ascending = False, inplace = True)
results.head(50)

Unnamed: 0,Items,Antecedent,Consequent,Support,Confidence,Lift
134,"{CCRD, CKCRD, CKING}","{CCRD, CKING}",{CKCRD},0.055813,0.375737,3.325045
131,"{CCRD, CKCRD, CKING}",{CKCRD},"{CCRD, CKING}",0.055813,0.493909,3.325045
33,"{CCRD, CKCRD}",{CCRD},{CKCRD},0.055813,0.36055,3.190645
130,"{CCRD, CKCRD, CKING}",{CCRD},"{CKING, CKCRD}",0.055813,0.36055,3.190645
34,"{CCRD, CKCRD}",{CKCRD},{CCRD},0.055813,0.493909,3.190645
135,"{CCRD, CKCRD, CKING}","{CKING, CKCRD}",{CCRD},0.055813,0.493909,3.190645
203,"{CKING, SVG, HMEQLC, ATM}","{SVG, HMEQLC}","{CKING, ATM}",0.060944,0.546577,1.510268
198,"{CKING, SVG, HMEQLC, ATM}","{CKING, ATM}","{SVG, HMEQLC}",0.060944,0.168396,1.510268
196,"{CKING, SVG, HMEQLC, ATM}",{HMEQLC},"{CKING, SVG, ATM}",0.060944,0.370061,1.489001
205,"{CKING, SVG, HMEQLC, ATM}","{CKING, SVG, ATM}",{HMEQLC},0.060944,0.245217,1.489001
