In [1]:
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from mlxtend.preprocessing import TransactionEncoder

In [2]:
data = pd.read_csv('./transaction_data.csv')

In [3]:
data.head()

Unnamed: 0,data
0,"Reject,Reject,Cash loans,NO CAR,HOUSE,NO CHILD..."
1,"Accept,Accept,Cash loans,NO CAR,HOUSE,NO CHILD..."
2,"Accept,Accept,Cash loans,CAR,HOUSE,CHILDREN,mo..."
3,"Accept,Accept,Revolving loans,NO CAR,HOUSE,CHI..."
4,"Accept,Accept,Cash loans,NO CAR,HOUSE,NO CHILD..."


In [4]:
items_list = list(data['data'].apply(lambda x:x.split(",") ))

In [5]:
te = TransactionEncoder()
te_ary = te.fit(items_list).transform(items_list)
encode_data = pd.DataFrame(te_ary, columns=te.columns_)
encode_data

Unnamed: 0,Academic degree,Accept,Advertising,Agriculture,Bank,Business Entity Type 1,Business Entity Type 2,Business Entity Type 3,CAR,CHILDREN,...,WEDNESDAY,WORK PHONE,Widow,With parents,Working,XNA,less than credit loan average,less than income average,more than credit loan average,more than income average
0,False,False,False,False,False,False,False,True,False,False,...,True,False,False,False,True,False,True,False,False,True
1,False,True,False,False,False,False,False,False,False,False,...,True,True,False,False,False,False,True,True,False,False
2,False,True,False,False,False,False,False,True,True,True,...,False,False,False,False,False,False,False,False,True,True
3,False,True,False,False,False,False,False,False,False,True,...,False,False,False,False,True,False,True,True,False,False
4,False,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,True,True,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49645,False,True,False,False,False,False,False,False,True,True,...,False,True,False,False,True,False,True,True,False,False
49646,False,False,False,False,False,False,False,False,False,True,...,False,True,False,False,True,False,False,True,True,False
49647,False,False,False,False,False,False,False,True,False,False,...,False,False,False,False,True,False,True,False,False,True
49648,False,False,False,False,False,False,False,True,False,False,...,False,False,False,False,False,False,True,False,False,True


In [6]:
frequent_itemsets = apriori(encode_data, min_support=0.2, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))

In [7]:
frequent_itemsets.sort_values('length', ascending=False)

Unnamed: 0,support,itemsets,length
999,0.204451,"(Secondary / secondary special, House / apartm...",6
987,0.200242,"(Secondary / secondary special, House / apartm...",6
976,0.227029,"(Secondary / secondary special, House / apartm...",6
977,0.236455,"(House / apartment, NO WORK PHONE, Cash loans,...",6
979,0.251641,"(Secondary / secondary special, House / apartm...",6
...,...,...,...
18,0.619053,(less than credit loan average),1
19,0.621793,(less than income average),1
20,0.380947,(more than credit loan average),1
21,0.378207,(more than income average),1


In [8]:
frequent_itemsets.sort_values('support', ascending=False)

Unnamed: 0,support,itemsets,length
4,0.918087,(Cash loans),1
8,0.874220,(House / apartment),1
52,0.803907,"(Cash loans, House / apartment)",2
13,0.783323,(NO WORK PHONE),1
15,0.744592,(Secondary / secondary special),1
...,...,...,...
823,0.200524,"(House / apartment, less than credit loan aver...",5
934,0.200524,"(Secondary / secondary special, House / apartm...",5
151,0.200504,"(Cash loans, Accept, more than credit loan ave...",3
987,0.200242,"(Secondary / secondary special, House / apartm...",6


In [9]:
ar = association_rules(frequent_itemsets, metric='confidence', min_threshold=0.5)

In [10]:
ar = ar.drop(columns=['lift','leverage','conviction','zhangs_metric'])

In [11]:
ar

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence
0,(Accept),(Cash loans),0.500000,0.918087,0.450393,0.900785
1,(Accept),(HOUSE),0.500000,0.688721,0.346667,0.693333
2,(HOUSE),(Accept),0.688721,0.500000,0.346667,0.503348
3,(House / apartment),(Accept),0.874220,0.500000,0.445780,0.509918
4,(Accept),(House / apartment),0.500000,0.874220,0.445780,0.891561
...,...,...,...,...,...,...
7315,"(NO CHILDREN, Secondary / secondary special, H...","(House / apartment, NO CAR, NO WORK PHONE)",0.359557,0.460000,0.204451,0.568620
7316,"(NO CAR, NO CHILDREN, NO WORK PHONE)","(House / apartment, Secondary / secondary spec...",0.385176,0.481853,0.204451,0.530799
7317,"(NO CAR, NO WORK PHONE, HOUSE)","(House / apartment, NO CHILDREN, Secondary / s...",0.377946,0.456375,0.204451,0.540954
7318,"(NO CHILDREN, NO WORK PHONE, HOUSE)","(House / apartment, NO CAR, Secondary / second...",0.394220,0.452085,0.204451,0.518623


In [12]:
ar_Accept = ar[ar.apply(lambda x: True if x.consequents == frozenset({'Accept'}) else False, axis=1)]

In [13]:
ar_Accept.sort_values('confidence', ascending=False).head(3)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence
1544,"(House / apartment, Married, NO WORK PHONE)",(Accept),0.432931,0.5,0.233615,0.539614
16,(more than credit loan average),(Accept),0.380947,0.5,0.205398,0.539177
269,"(Married, NO CHILDREN)",(Accept),0.389366,0.5,0.209587,0.538279


In [14]:
ar_Reject = ar[ar.apply(lambda x: True if x.consequents == frozenset({'Reject'}) else False, axis=1)]

In [15]:
ar_Reject.sort_values('confidence', ascending=False).head(3)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence
2868,"(Cash loans, Working, Secondary / secondary sp...",(Reject),0.399839,0.5,0.233031,0.582813
1318,"(Working, Secondary / secondary special)",(Reject),0.43428,0.5,0.249184,0.573787
1328,"(less than credit loan average, Working)",(Reject),0.36141,0.5,0.206626,0.571723


In [16]:
mydataset = pd.read_csv('mydataset.csv')

In [17]:
mydataset[(mydataset['Family status'] == 'Married') & (mydataset['Housing type'] == 'House / apartment') 
& (mydataset['Flag work phone'] == 'NO WORK PHONE')].Target.value_counts()

Accept    11599
Reject     9896
Name: Target, dtype: int64

In [24]:
mydataset[((mydataset['Family status'] != 'Married') | (mydataset['Housing type'] != 'House / apartment') 
| (mydataset['Flag work phone'] != 'NO WORK PHONE'))].Target.value_counts()

Reject    14929
Accept    13226
Name: Target, dtype: int64

In [18]:
mydataset.head()

Unnamed: 0,Target,Contract type,Flag own car,Flag own house,Children,Income,Credit loan,Income type,Education type,Family status,Housing type,Flag work phone,Weekday appr process start,Organization type
0,Reject,Cash loans,NO CAR,HOUSE,NO CHILDREN,more than income average,less than credit loan average,Working,Secondary / secondary special,Single / not married,House / apartment,NO WORK PHONE,WEDNESDAY,Business Entity Type 3
1,Accept,Cash loans,NO CAR,HOUSE,NO CHILDREN,less than income average,less than credit loan average,State servant,Secondary / secondary special,Married,House / apartment,WORK PHONE,WEDNESDAY,Other
2,Accept,Cash loans,CAR,HOUSE,CHILDREN,more than income average,more than credit loan average,Commercial associate,Higher education,Married,House / apartment,NO WORK PHONE,SUNDAY,Business Entity Type 3
3,Accept,Revolving loans,NO CAR,HOUSE,CHILDREN,less than income average,less than credit loan average,Working,Secondary / secondary special,Married,House / apartment,NO WORK PHONE,MONDAY,Construction
4,Accept,Cash loans,NO CAR,HOUSE,NO CHILDREN,less than income average,less than credit loan average,Pensioner,Secondary / secondary special,Married,House / apartment,NO WORK PHONE,FRIDAY,XNA


In [46]:
mydataset.shape

(49650, 14)

In [47]:
mydataset[(mydataset['Income type'] == 'Working') & (mydataset['Contract type'] == 'Cash loans') 
& (mydataset['Education type'] == 'Secondary / secondary special')].Target.value_counts()

Reject    11570
Accept     8282
Name: Target, dtype: int64

In [25]:
mydataset[(mydataset['Income type'] != 'Working') | (mydataset['Contract type'] != 'Cash loans') 
| (mydataset['Education type'] != 'Secondary / secondary special')].Target.value_counts()

Accept    16543
Reject    13255
Name: Target, dtype: int64