In [438]:
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import fpgrowth, association_rules
from mlxtend.preprocessing import TransactionEncoder
import matplotlib.pyplot as plt

In [439]:
data = pd.read_csv('./Dataset/House_Price_Dataset.csv')
data.head()

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,Seller,Date,Distance,Postcode,...,Bathroom,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Latitude,Longitude,RegionName,PropertyCount
0,Abbotsford,68 Studley St,2,h,,SS,Jellis,03/09/2016,2.5,3067.0,...,1.0,1.0,126.0,,,Yarra City Council,-37.8014,144.9958,Northern Metropolitan,4019.0
1,Abbotsford,85 Turner St,2,h,1480000.0,S,Biggin,03/12/2016,2.5,3067.0,...,1.0,1.0,202.0,,,Yarra City Council,-37.7996,144.9984,Northern Metropolitan,4019.0
2,Abbotsford,25 Bloomburg St,2,h,1035000.0,S,Biggin,04/02/2016,2.5,3067.0,...,1.0,0.0,156.0,79.0,1900.0,Yarra City Council,-37.8079,144.9934,Northern Metropolitan,4019.0
3,Abbotsford,18/659 Victoria St,3,u,,VB,Rounds,04/02/2016,2.5,3067.0,...,2.0,1.0,0.0,,,Yarra City Council,-37.8114,145.0116,Northern Metropolitan,4019.0
4,Abbotsford,5 Charles St,3,h,1465000.0,SP,Biggin,04/03/2017,2.5,3067.0,...,2.0,0.0,134.0,150.0,1900.0,Yarra City Council,-37.8093,144.9944,Northern Metropolitan,4019.0


In [441]:
data = data[~(data['CouncilArea'].isnull())]
data = data[~(data['Seller'].isnull())]
data = data[~(data['Method'].isnull())]

In [443]:
council_areas = np.unique(data["CouncilArea"])

In [444]:
council_areas

array(['Banyule City Council', 'Bayside City Council',
       'Boroondara City Council', 'Brimbank City Council',
       'Cardinia Shire Council', 'Casey City Council',
       'Darebin City Council', 'Frankston City Council',
       'Glen Eira City Council', 'Greater Dandenong City Council',
       'Hobsons Bay City Council', 'Hume City Council',
       'Kingston City Council', 'Knox City Council',
       'Macedon Ranges Shire Council', 'Manningham City Council',
       'Maribyrnong City Council', 'Maroondah City Council',
       'Melbourne City Council', 'Melton City Council',
       'Mitchell Shire Council', 'Monash City Council',
       'Moonee Valley City Council', 'Moorabool Shire Council',
       'Moreland City Council', 'Nillumbik Shire Council',
       'Port Phillip City Council', 'Stonnington City Council',
       'Whitehorse City Council', 'Whittlesea City Council',
       'Wyndham City Council', 'Yarra City Council',
       'Yarra Ranges Shire Council'], dtype=object)

In [375]:
np.unique(data["Seller"]).size

388

In [376]:
seller_gr = data.groupby(['Seller']).size().sort_values(ascending=False).reset_index(name='counts')
seller_gr.head()

Unnamed: 0,Seller,counts
0,Jellis,3359
1,Nelson,3236
2,Barry,3235
3,hockingstuart,2623
4,Marshall,2027


In [377]:
field = "CouncilArea"
cut = data[["Method", "Seller", field]]
cut = cut[(~cut['Method'].isnull()) & (~cut['Seller'].isnull()) & (~cut[field].isnull())]
cut.head()

Unnamed: 0,Method,Seller,CouncilArea
0,SS,Jellis,Yarra City Council
1,S,Biggin,Yarra City Council
2,S,Biggin,Yarra City Council
3,VB,Rounds,Yarra City Council
4,SP,Biggin,Yarra City Council


In [378]:
print(np.unique(cut["Method"]))

['PI' 'PN' 'S' 'SA' 'SN' 'SP' 'SS' 'VB' 'W']


In [379]:
cut["Method"] = cut["Method"].apply(lambda x: 'S' if x.startswith('S') else 'N')

In [380]:
methods = [frozenset(x) for x in np.unique(cut["Method"])]
methods

[frozenset({'N'}), frozenset({'S'})]

In [381]:
np_cut = cut.to_numpy()

In [382]:
te = TransactionEncoder()
te_ary = te.fit(np_cut).transform(np_cut)
df = pd.DataFrame(te_ary, columns=te.columns_)
df.head()

Unnamed: 0,@Realty,A,AIME,ASL,Abercromby's,Ace,Airport,Alex,Alexkarbon,Allan,...,hockingstuart/Marshall,hockingstuart/Sweeney,hockingstuart/Village,hockingstuart/hockingstuart,iHomes,iOne,iProperty,iSell,iTRAK,voglwalpole
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


## Association Rule Part

In [417]:
res = fpgrowth(df, min_support=1/10**7, use_colnames=True, max_len=3)

In [418]:
res.head()

Unnamed: 0,support,itemsets
0,0.757904,(S)
1,0.096345,(Jellis)
2,0.034028,(Yarra City Council)
3,0.025736,(Biggin)
4,0.242096,(N)


In [419]:
res[res['itemsets'].map(len) == 3].sort_values(by=['support'], ascending=False)

Unnamed: 0,support,itemsets
427,0.021777,"(S, Boroondara City Council, Jellis)"
786,0.020285,"(Marshall, Boroondara City Council, S)"
1185,0.017100,"(Nelson, S, Darebin City Council)"
2624,0.014604,"(Nelson, S, Moreland City Council)"
802,0.014059,"(Nelson, S, Moonee Valley City Council)"
...,...,...
2824,0.000029,"(S, Property, Glen Eira City Council)"
2817,0.000029,"(Moreland City Council, N, Kelly)"
2816,0.000029,"(Boroondara City Council, N, Kelly)"
2815,0.000029,"(S, Manningham City Council, Kelly)"


In [420]:
ar = association_rules(res, metric="support", min_threshold=1/10**7)

In [421]:
ar.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(S),(Jellis),0.757904,0.096345,0.071269,0.094034,0.976015,-0.001751,0.997449
1,(Jellis),(S),0.096345,0.757904,0.071269,0.739726,0.976015,-0.001751,0.930157
2,(Jellis),(N),0.096345,0.242096,0.025076,0.260274,1.075088,0.001751,1.024574
3,(N),(Jellis),0.242096,0.096345,0.025076,0.103579,1.075088,0.001751,1.00807
4,(Boroondara City Council),(Jellis),0.10544,0.096345,0.030327,0.287619,2.985311,0.020168,1.2685


In [422]:
ar = ar[(ar['antecedents'].map(len) == 2) 
       & ((ar['consequents'].map(len) == 1) 
            & (ar['consequents'].isin(methods)))
  ].sort_values(by=['antecedent support', 'lift'], ascending=False)

In [423]:
ar.head(100)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
7,"(Boroondara City Council, Jellis)",(N),0.030327,0.242096,0.008550,0.281930,1.164540,0.001208,1.055474
14,"(Boroondara City Council, Jellis)",(S),0.030327,0.757904,0.021777,0.718070,0.947441,-0.001208,0.858708
1562,"(Marshall, Boroondara City Council)",(S),0.026740,0.757904,0.020285,0.758584,1.000896,0.000018,1.002814
1556,"(Marshall, Boroondara City Council)",(N),0.026740,0.242096,0.006456,0.241416,0.997194,-0.000018,0.999105
3257,"(Nelson, Darebin City Council)",(S),0.021662,0.757904,0.017100,0.789404,1.041561,0.000682,1.149573
...,...,...,...,...,...,...,...,...,...
6350,"(HAR, Whittlesea City Council)",(N),0.005566,0.242096,0.001090,0.195876,0.809087,-0.000257,0.942522
3327,"(Darebin City Council, McGrath)",(N),0.005451,0.242096,0.001348,0.247368,1.021780,0.000029,1.007006
3321,"(Darebin City Council, McGrath)",(S),0.005451,0.757904,0.004103,0.752632,0.993043,-0.000029,0.978684
11787,"(Nelson, Brimbank City Council)",(S),0.005021,0.757904,0.004074,0.811429,1.070621,0.000269,1.283840


In [424]:
ar = ar[ar['antecedents'].map({'Boroondara City Council'}.intersection).astype(bool)]
ar['consequents_unpacked'] = (ar.apply(lambda x: 0 if ", ".join(list(x.consequents)) == "N" else 1, axis=1))
ar['ratio'] = (ar.apply(lambda x: 0 if x.consequents_unpacked == "N" else x.support / find_opposite_support(ar, x.antecedents), axis=1))
ar = ar[~(ar['consequents_unpacked'] == 0)].sort_values(['ratio'], ascending=[False])

In [435]:
ar.head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,consequents_unpacked,ratio
8966,"(Boroondara City Council, J)",(S),0.000603,0.757904,0.000574,0.952381,1.256598,0.000117,5.084008,1,20.0
10106,"(Boroondara City Council, Abercromby's)",(S),0.000287,0.757904,0.000258,0.9,1.187485,4.1e-05,2.420956,1,9.0
11320,"(Boroondara City Council, Domain)",(S),0.000258,0.757904,0.00023,0.888889,1.172825,3.4e-05,2.17886,1,8.0
1122,"(Philip, Boroondara City Council)",(S),0.000201,0.757904,0.000172,0.857143,1.130938,2e-05,1.694669,1,6.0
381,"(Nelson, Boroondara City Council)",(S),0.003213,0.757904,0.00264,0.821429,1.083816,0.000204,1.355735,1,4.6
2690,"(Boroondara City Council, Thomson)",(S),0.000287,0.757904,0.00023,0.8,1.055542,1.2e-05,1.210478,1,4.0
1242,"(Boroondara City Council, LITTLE)",(S),0.000545,0.757904,0.00043,0.789474,1.041653,1.7e-05,1.149954,1,3.75
9796,"(Garvey, Boroondara City Council)",(S),0.000402,0.757904,0.000316,0.785714,1.036693,1.1e-05,1.129779,1,3.666667
5370,"(Boroondara City Council, Woodards)",(S),0.001836,0.757904,0.001435,0.78125,1.030803,4.3e-05,1.106723,1,3.571429
1562,"(Marshall, Boroondara City Council)",(S),0.02674,0.757904,0.020285,0.758584,1.000896,1.8e-05,1.002814,1,3.142222


## Functional Implementation

In [None]:
def find_opposite_support(df, antecedents):
    item = df[(df['antecedents'] == antecedents) & (df['consequents_unpacked'] == 0)]
    if not item.empty: 
        return item.iloc[0].support
    else: 
        return 1

In [460]:
def select_seller(data, council_area, metric, min_support_association_rule=1/10**7):
    st = frozenset({council_area})
    res = fpgrowth(data, min_support=1/10**7, use_colnames=True, max_len=3)
    res[res['itemsets'].map(len) == 3].sort_values(by=['support'], ascending=False)
    ar = association_rules(res, metric="support", min_threshold=min_support)
    ar = ar[(ar['antecedents'].map(len) == 2) 
       & ((ar['consequents'].map(len) == 1) 
            & (ar['consequents'].isin(methods)))
    ].sort_values(by=['antecedent support', 'lift'], ascending=False)
    ar['consequents_unpacked'] = (ar.apply(lambda x: 0 if ", ".join(list(x.consequents)) == "N" else 1, axis=1))
    ar = ar[ar['antecedents'].map(st.intersection).astype(bool)]
    if metric == 'ratio':
        ar['ratio'] = (ar.apply(lambda x: 0 if x.consequents_unpacked == "N" else x.support / find_opposite_support(ar, x.antecedents), axis=1))
        ar = ar[~(ar['consequents_unpacked'] == 0)].sort_values(['ratio'], ascending=[False])
    elif metric == 'interest':
        ar = ar[(ar['consequents_unpacked'] == 1)].sort_values(['lift'], ascending=[False])
    else: 
        return ''
    if not ar.empty:
        an = ar.iloc[0].antecedents
        res = an.difference(st)
        if len(res) == 0:
            return ''
        else:
            return list(res)[0]
    else:
        return ''

In [474]:
ratio = []
for x in council_areas:
    ratio.append(select_seller(df, x, 'ratio'))

In [475]:
interest = []
for x in council_areas:
    interest.append(select_seller(df, x, 'interest', 0.0005))

In [476]:
for a, b in zip(ratio, interest):
    print(f'{a} - {b}')

William - William
Thomson - Nick
J - J
Ray - Ray
Ray - 
Harcourts - O'Brien
Biggin - Brad
Harcourts - Harcourts
C21 - C21
Del - C21
Barlow - Barlow
Ray - Ray
C21 - C21
Ray - Ray
Raine - Raine
McGrath - Ray
Jellis - Barry
hockingstuart - hockingstuart
Brad - Caine
hockingstuart - hockingstuart
LJ - 
Biggin - Biggin
Moonee - Moonee
Ryder - 
Rendina - Collins
Morrison - Barry
Pride - Chisholm
Biggin - Biggin
hockingstuart - hockingstuart
RW - RW
LJ - LJ
Woodards - Marshall
Barry - Fletchers
