In [1]:
import jpype
import jpype.imports
import numpy as np
import tempfile
import os
from sklearn.base import BaseEstimator, ClassifierMixin

In [2]:
# internal config
jar_path = "/LORD-master/lord.jar"
concrete_class_name = "rl.eg.Lord"

In [3]:
class InfoBase:
    def __init__(self, learner):
        self.learner = learner
        self.selector_nlists = learner.getSelectorNlists()
        self.constructing_selectors = list(learner.getConstructingSelectors())
        self.selector_id_records = learner.getSelectorIDRecords()
        self.class_ids = list(learner.getClassIDs())
        self.RuleSearcher = jpype.JClass("rl.RuleSearcher")
        self.INlist = jpype.JClass("rl.INlist")

    def support_count(self, selector_ids):
        nlist_array = jpype.JArray(self.INlist)([self.selector_nlists[i] for i in selector_ids])
        return self.RuleSearcher.calculate_nlist_direct(nlist_array).supportCount()

In [4]:
class LocalRuleClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, metric="MESTIMATE", metric_arg=0.1):
        self.metric = metric
        self.metric_arg = metric_arg

        if not jpype.isJVMStarted():
            jpype.startJVM(classpath=[jar_path])

        self.RuleLearnerClass = jpype.JClass(concrete_class_name)
        self.METRIC_TYPES = jpype.JClass("evaluations.HeuristicMetricFactory$METRIC_TYPES")
        self.IntHolder = jpype.JClass("rl.IntHolder")
        self.learner = self.RuleLearnerClass()

    def _write_temp_csv(self, X, y):
        fd, path = tempfile.mkstemp(suffix=".csv", text=True)
        with os.fdopen(fd, 'w') as tmp:
            for xi, yi in zip(X, y):
                tmp.write(",".join(map(str, list(xi) + [str(yi)])) + "\n")
        return path

    def fit(self, X, y):
        file_path = self._write_temp_csv(X, y)
        return self.fit_csv(file_path, y)

    def fit_csv(self, file_path, y=None):
        metric_enum = getattr(self.METRIC_TYPES, self.metric)
        self.learner.fetch_information(file_path)
        self.learner.learning(metric_enum, float(self.metric_arg))

        class_ids = list(self.learner.getClassIDs())
        self.train_file_ = file_path

        if y is not None:
            unique_labels = list(np.unique(y))
            self.class_id_to_label_ = {cid: unique_labels[i] for i, cid in enumerate(class_ids)}
            self.classes_ = np.array(unique_labels)
        else:
            self.class_id_to_label_ = {cid: str(cid) for cid in class_ids}
            self.classes_ = np.array([str(cid) for cid in class_ids])

        return self

    def predict(self, X):
        results = []
        for row in X:
            row_str = list(map(str, row)) + ["?"]
            holder = self.IntHolder(0)
            self.learner.predict(row_str, holder)

            class_id = holder.value
            label = self.class_id_to_label_.get(class_id, self.classes_[0])
            results.append(label)

        return np.array(results)

    def get_info_base(self):
        return InfoBase(self.learner)

    def set_params(self, **params):
        for key, value in params.items():
            setattr(self, key, value)
        return self

    def get_params(self, deep=True):
        return {
            "metric": self.metric,
            "metric_arg": self.metric_arg
        }

In [5]:
jpype.startJVM(
    jpype.getDefaultJVMPath(),
    "-ea",
    "--enable-native-access=ALL-UNNAMED",
    classpath=[jar_path]
)

In [6]:
X = np.array([
    ["sunny", "hot", "high", "FALSE"],
    ["sunny", "hot", "high", "TRUE"],
    ["overcast", "hot", "high", "FALSE"],
    ["rainy", "mild", "high", "FALSE"]
])
y = ["no", "no", "yes", "yes"]

X_test = np.array([["overcast", "hot", "high", "FALSE"]])

clf = LocalRuleClassifier(
    metric="MESTIMATE",
    metric_arg=0.1
)

clf.fit(X, y)
print(clf.predict(X_test)) 

	SearchRuleThread 3 founds 0 rules, finished in 0 ms
	SearchRuleThread 0 founds 1 rules, finished in 1 ms
	SearchRuleThread 1 founds 1 rules, finished in 0 ms
	SearchRuleThread 2 founds 1 rules, finished in 0 ms
	FilterRuleThread 1 finished in 0 ms
	FilterRuleThread 0 finished in 0 ms
	FilterRuleThread 2 finished in 0 ms
	FilterRuleThread 3 finished in 0 ms
['yes']


In [7]:
import pandas as pd

csv_file = "/Users/ndzaitsev/Desktop/JKU/BT/LORD-master/data/inputs/vote/vote_test_01.csv"
df = pd.read_csv(csv_file)

X = df.iloc[:, :-1].values 
y = df.iloc[:, -1].values   

In [8]:
clf = LocalRuleClassifier(
    metric="MESTIMATE",
    metric_arg=0.1
)

clf.fit(X, y)

	SearchRuleThread 0 founds 2 rules, finished in 2 ms
	SearchRuleThread 1 founds 3 rules, finished in 2 ms
	SearchRuleThread 3 founds 4 rules, finished in 3 ms
	SearchRuleThread 2 founds 4 rules, finished in 2 ms
	FilterRuleThread 0 finished in 0 ms
	FilterRuleThread 3 finished in 0 ms
	FilterRuleThread 2 finished in 0 ms
	FilterRuleThread 1 finished in 0 ms


In [9]:
X_test = [
    ["y", "y", "y", "n", "n", "n", "y", "y", "y", "n", "y", "n", "n", "y", "n", "y"],
    ["y", "y", "n", "y", "y", "y", "n", "n", "n", "y", "n", "y", "y", "y", "n", "y"],
    ["?", "?", "?", "n", "n", "n", "y", "y", "y", "y", "n", "n", "y", "n", "y", "y"],
    ["n", "?", "n", "y", "y", "y", "n", "n", "n", "n", "y", "y", "y", "y", "n", "y"]

    
]

print(clf.predict(X_test)) 

['republican' 'democrat' 'republican' 'democrat']


In [10]:
csv_file = "/Users/ndzaitsev/Desktop/JKU/BT/LORD-master/data/inputs/vote/vote_test_01.csv"
df = pd.read_csv(csv_file)

X = df.iloc[:, :-1].values 
y = df.iloc[:, -1].values   

In [11]:
clf = LocalRuleClassifier(
    metric="MESTIMATE",
    metric_arg=0.1
)

clf.fit_csv(csv_file)

	SearchRuleThread 1 founds 4 rules, finished in 2 ms
	SearchRuleThread 0 founds 4 rules, finished in 4 ms
	SearchRuleThread 2 founds 0 rules, finished in 0 ms
	SearchRuleThread 3 founds 0 rules, finished in 0 ms
	FilterRuleThread 1 finished in 0 ms
	FilterRuleThread 0 finished in 0 ms
	FilterRuleThread 3 finished in 0 ms
	FilterRuleThread 2 finished in 0 ms
