In [1]:
import weka.core.jvm as jvm
from weka.core.converters import Loader
import weka.core.converters as converters
from weka.filters import Filter
from weka.associations import Associator
import arff
import pandas as pd
import numpy as np

In [2]:
jvm.start()

# Attribute information:
# Class: {0, 1}, {}
# head_shape: {1, 2, 3}, {round, square, octagon}
# body_shape: {1, 2, 3}, {round, square, octagon}
# is_smiling: {1, 2}, {yes, no}
# holding: {1, 2, 3}, {sword, flag, ballon}
# jacket_color: {1, 2, 3, 4}, {red, yellow, green, blue}
# has_tie: {1, 2}, {yes, no}

column_names = ['Class', 'head_shape', 'body_shape', 'is_smiling', 'holding', 'jacket_color', 'has_tie']
train_set = pd.read_csv('monks-1.csv', names=column_names)
attr_names = ['head_shape', 'body_shape', 'is_smiling', 'holding', 'jacket_color', 'has_tie', 'Class']
train_set = train_set[attr_names]
data = np.array(train_set).tolist()
arff.dump('monk-1.arff', data, names=attr_names)
train_set.head()

DEBUG:weka.core.jvm:Adding bundled jars
DEBUG:weka.core.jvm:Classpath=['/Users/mniu/Applications/anaconda2/lib/python2.7/site-packages/javabridge/jars/rhino-1.7R4.jar', '/Users/mniu/Applications/anaconda2/lib/python2.7/site-packages/javabridge/jars/runnablequeue.jar', '/Users/mniu/Applications/anaconda2/lib/python2.7/site-packages/javabridge/jars/cpython.jar', '/Users/mniu/Applications/anaconda2/lib/python2.7/site-packages/weka/lib/python-weka-wrapper.jar', '/Users/mniu/Applications/anaconda2/lib/python2.7/site-packages/weka/lib/weka.jar']
DEBUG:weka.core.jvm:MaxHeapSize=default
DEBUG:javabridge.jutil:Creating JVM object
DEBUG:javabridge.jutil:Launching VM in non-python thread
DEBUG:javabridge.jutil:Attaching to VM in monitor thread
DEBUG:javabridge.jutil:Signalling caller


Unnamed: 0,head_shape,body_shape,is_smiling,holding,jacket_color,has_tie,Class
0,1,1,1,1,3,1,1
1,1,1,1,1,3,2,1
2,1,1,1,3,2,1,1
3,1,1,1,3,3,2,1
4,1,1,2,1,2,1,1


In [3]:
loader = Loader("weka.core.converters.ArffLoader")
arff_data = loader.load_file('monk-1.arff')
nominal = Filter(classname="weka.filters.unsupervised.attribute.NumericToNominal", options=["-R", "first-last"])
nominal.inputformat(arff_data)
nominal_data = nominal.filter(arff_data)
nominal_data.class_is_last()

In [4]:
rule_number = 1000
conf_min = 0.3

In [5]:
associator = Associator(classname="weka.associations.Apriori", 
                        options=['-N', str(rule_number), '-T', '0', '-C', str(conf_min), '-D', '0.05',
                                 '-U', '1.0', '-M', '0.1', '-S', '-1.0', '-A', '-c', '-1'])
associator.build_associations(nominal_data)

Rule_report = str(associator).split('\n')
for i in range(len(Rule_report)):
    if Rule_report[i] == 'Best rules found:':
        best_rules = Rule_report[i+2:len(Rule_report)-1]
        
actual_rule_number = int(best_rules[len(best_rules)-1][:3])

for i in range(len(best_rules)):
    best_rules[i] = best_rules[i][5:]

In [6]:
best_rules[0]

'jacket_color=1 29 ==> Class=1 29    conf:(1)'

In [7]:
actual_rule_number

67

In [8]:
rule_list_fit = []
for j in range(actual_rule_number):
    a = best_rules[j].split(' ')
    if len(a[len(a)-1])==8:
        a[len(a)-1] = a[len(a)-1][:len(a[len(a)-1])-1]+'.00'+a[len(a)-1][len(a[len(a)-1])-1:]
    elif len(a[len(a)-1])==10:
        a[len(a)-1] = a[len(a)-1][:9]+'0'+a[len(a)-1][9:]
        
    for i in range(len(attr_names)):
        if (attr_names[i] in best_rules[j])==False:
            a.insert(i,attr_names[i]+'= ')
            
    rule_list_fit.append(a)

In [9]:
rule_table = []
for j in range(actual_rule_number):
    n = []
    for i in range(len(attr_names)-1):
        n.append(rule_list_fit[j][i][len(rule_list_fit[j][i])-1:])
    n.extend([rule_list_fit[j][len(attr_names)-1]])
    n.extend(['-->'])
    n.extend(rule_list_fit[j][len(attr_names)+1][len(rule_list_fit[1][10])-1:])
    n.extend([rule_list_fit[j][len(attr_names)+2]])
    n.extend([rule_list_fit[j][len(attr_names)+6][6:10]])
    rule_table.append(n)
    
column_names = train_set.columns.values.tolist()
column_names.insert(6,'Support')
column_names.insert(7,'')
column_names.extend(['Support','Confidence'])
df = pd.DataFrame(rule_table, columns=column_names)
df.to_csv('monk-1_rules.csv')
df.head()

Unnamed: 0,head_shape,body_shape,is_smiling,holding,jacket_color,has_tie,Support,Unnamed: 8,Class,Support.1,Confidence
0,,,,,1.0,,29,-->,1,29,1.0
1,3.0,3.0,,,,,17,-->,1,17,1.0
2,,,1.0,,1.0,,17,-->,1,17,1.0
3,,,,,1.0,1.0,16,-->,1,16,1.0
4,2.0,2.0,,,,,15,-->,1,15,1.0
