# Association Mining (Q1, Q2, Q3, Q6a-Q6d)

#### Import libraries (MLXTEND)

In [22]:
import pandas as pd
import numpy as np
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

#### Get association mining data and convert the numeric number to answer format

In [23]:
def getAMData(filename):
    am = pd.read_csv(filename, usecols=["Q1", "Q2", "Q3", "Q4", "Q6a", "Q6b", "Q6c", "Q6d"])
    am["Q1"] = am["Q1"].replace(1, "Q1VS").replace(2, "Q1S").replace(3, "Q1N").replace(4, "Q1D").replace(5, "Q1VD").replace(-98, "Q1DK")
    am["Q4"] = am["Q4"].replace(1, "Liberal").replace(3, "Labor").replace(4, "Greens").replace(101, "Other").replace(-98, "UDV").replace(-99, "UDV").replace(97, "UDV")
    am["Q6a"] = am["Q6a"].replace(1, "Q6aVD").replace(2, "Q6aD").replace(3, "Q6aN").replace(4, "Q6aA").replace(5, "Q6aVA").replace(-98, "Q6aDK").replace(-99, "Q6aR")
    am["Q6b"] = am["Q6b"].replace(1, "Q6bVD").replace(2, "Q6bD").replace(3, "Q6bN").replace(4, "Q6bA").replace(5, "Q6bVA").replace(-98, "Q6bDK").replace(-99, "Q6bR")
    am["Q6c"] = am["Q6c"].replace(1, "Q6cVD").replace(2, "Q6cD").replace(3, "Q6cN").replace(4, "Q6cA").replace(5, "Q6cVA").replace(-98, "Q6cDK").replace(-99, "Q6cR")
    am["Q6d"] = am["Q6d"].replace(1, "Q6dVD").replace(2, "Q6dD").replace(3, "Q6dN").replace(4, "Q6dA").replace(5, "Q6dVA").replace(-98, "Q6dDK").replace(-99, "Q6dR")
    am["Q2"] = am["Q2"].replace(1, "Economy/Jobs").replace(2, "Industrial relations").replace(4, "Housing affordability").replace(5, "Health care").replace(6, "Education").replace(8, "Terrorism").replace(11, "Environment/Global warming").replace(12, "Water management").replace(13, "Immigration").replace(16, "Better government").replace(17, "Law/Crime/Justice system").replace(19, "Values/Moral/Respect").replace(20, "Poverty/Social exclusion/Inequality").replace(28, "Social service").replace(29, "Infrastructure/Planning/Innovation").replace(30, "Alcohol/Drug").replace(32, "Foreign influence/Position in the world").replace(34, "The budget").replace(35, "Racism/Bigotry/Intolerance").replace(36, "Over population").replace(37, "Refugees").replace(96, "Q2O").replace(100, "Q2O").replace(-99, "Q2R").replace(-98, "Q2DK").replace(' ', "Q2DK").replace(14, "Indigenous affairs").replace(15, "Taxation").replace(18, "Ageing population").replace(26, "Trade balance/Oversea job loss").replace(27, "Rural/Farming issues").replace(33, "Family/Community").replace(7, "Defense/National security")
    am["Q3"] = am["Q3"].replace("1", "Economy/Jobs").replace("2", "Industrial relations").replace("4", "Housing affordability").replace("5", "Health care").replace("6", "Education").replace("11", "Environment/Global warming").replace("12", "Water management").replace("13", "Immigration").replace("14", "Indigenous affairs").replace("16", "Better government").replace("17", "Law/Crime/Justice system").replace("19", "Values/Moral/Respect").replace("20", "Poverty/Social exclusion/Inequality").replace("26", "Trade balance/Oversea job loss").replace("28", "Social service").replace("29", "Infrastructure/Planning/Innovation").replace("30", "Alcohol/Drug").replace("32", "Foreign influence/Position in the world").replace("33", "Family/Community").replace("34", "The budget").replace("35", "Racism/Bigotry/Intolerance").replace("36", "Over population").replace("37", "Refugees").replace("96", "Q3O").replace("100", "Q2O").replace("-99", "Q2R").replace("-98", "Q2DK").replace(' ', "Q3DK").replace("15", "Taxation").replace("18", "Ageing population").replace("27", "Rural/Farming issues").replace("7", "Defense/National security").replace("8", "Terrorism")
    
    am.to_csv("AMData.csv")

getAMData("preprocessing_data.csv")

#### Read the data and store in basket

In [24]:
def readfile(filename):
    linelist = []
    with open(filename, 'r') as f:
        for line in f.readlines():
            linestr = line.strip()
            linestrlist = linestr.split(",")
            linelist.append(linestrlist[1:])
    return linelist

basket = readfile("AMData.csv")
basket = basket[1:]
basket[0:2]

[['Q1S',
  'Values/Moral/Respect',
  'Better government',
  'Labor',
  'Q6aN',
  'Q6bA',
  'Q6cA',
  'Q6dA'],
 ['Q1S',
  'Environment/Global warming',
  'Water management',
  'Liberal',
  'Q6aD',
  'Q6bD',
  'Q6cD',
  'Q6dD']]

#### Encoding the values using TransactionEncoder

In [25]:
oht = TransactionEncoder()
basket_oht = oht.fit(basket).transform(basket)
df = pd.DataFrame(basket_oht, columns=oht.columns_)
df.head()

Unnamed: 0,Ageing population,Alcohol/Drug,Better government,Defense/National security,Economy/Jobs,Education,Environment/Global warming,Family/Community,Foreign influence/Position in the world,Greens,...,Refugees,Rural/Farming issues,Social service,Taxation,Terrorism,The budget,Trade balance/Oversea job loss,UDV,Values/Moral/Respect,Water management
0,False,False,True,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,False
1,False,False,False,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,True
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,False
4,False,False,True,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


#### Set minimum support for Apriori algorithm

In [26]:
frequent_itemsets = apriori(df, min_support=0.02, use_colnames=True)
frequent_itemsets.head()

Unnamed: 0,support,itemsets
0,0.033023,(Alcohol/Drug)
1,0.246047,(Better government)
2,0.295349,(Economy/Jobs)
3,0.045116,(Education)
4,0.242791,(Environment/Global warming)


#### See shape of itemsets

In [27]:
frequent_itemsets.shape

(1075, 2)

#### Generate association rules

In [28]:
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.4)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Alcohol/Drug),(Q1S),0.033023,0.528837,0.021395,0.647887,1.225117,0.003931,1.338102
1,(Better government),(Q1D),0.246047,0.329767,0.103256,0.419660,1.272593,0.022118,1.154896
2,(Better government),(Q1S),0.246047,0.528837,0.106512,0.432892,0.818574,-0.023607,0.830817
3,(Q6dVD),(Better government),0.125116,0.246047,0.052093,0.416357,1.692188,0.021309,1.291806
4,(Economy/Jobs),(Q1S),0.295349,0.528837,0.173023,0.585827,1.107764,0.016832,1.137598
...,...,...,...,...,...,...,...,...,...
1273,"(Q6bN, Q6aN, Q1S)","(Q6cN, Q6dN)",0.103256,0.162791,0.043256,0.418919,2.573359,0.026447,1.440779
1274,"(Q6cN, Q6dN, Q1S)","(Q6bN, Q6aN)",0.107442,0.155814,0.043256,0.402597,2.583834,0.026515,1.413094
1275,"(Q6cN, Q6dN, Q6aN)","(Q6bN, Q1S)",0.089767,0.160000,0.043256,0.481865,3.011658,0.028893,1.621200
1276,"(Q6cN, Q6aN, Q1S)","(Q6bN, Q6dN)",0.098605,0.123721,0.043256,0.438679,3.545716,0.031056,1.561102


#### Get association rules that lift >= 1, support >= 0.1 and confidence >=  0.5

In [18]:
rules[(rules['lift'] >= 1) & (rules['support'] >= 0.1) & (rules['confidence'] >= 0.5)]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
4,(Economy/Jobs),(Q1S),0.295349,0.528837,0.173023,0.585827,1.107764,0.016832,1.137598
21,(Liberal),(Q1S),0.28093,0.528837,0.183256,0.652318,1.233495,0.034689,1.355154
37,(Q6aA),(Q1S),0.222791,0.528837,0.131628,0.590814,1.117195,0.013808,1.151464
39,(Q6aN),(Q1S),0.313953,0.528837,0.190698,0.607407,1.148572,0.024667,1.200132
41,(Q6bA),(Q1S),0.182791,0.528837,0.109767,0.600509,1.135527,0.013101,1.179407
43,(Q6bN),(Q1S),0.256279,0.528837,0.16,0.624319,1.180551,0.02447,1.254158
45,(Q6cA),(Q1S),0.250698,0.528837,0.168372,0.671614,1.269983,0.035794,1.434784
47,(Q6cN),(Q1S),0.32186,0.528837,0.2,0.621387,1.175007,0.029788,1.244445
48,(Q6dA),(Q1S),0.176744,0.528837,0.115814,0.655263,1.239064,0.022345,1.366732
50,(Q6dN),(Q1S),0.291163,0.528837,0.18093,0.621406,1.175042,0.026953,1.244506
