In [5]:
import wittgenstein3 as lw
import pandas as pd

In [2]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = pd.Series(iris.target)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
X_train

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
22,4.6,3.6,1.0,0.2
15,5.7,4.4,1.5,0.4
65,6.7,3.1,4.4,1.4
11,4.8,3.4,1.6,0.2
42,4.4,3.2,1.3,0.2
...,...,...,...,...
71,6.1,2.8,4.0,1.3
106,4.9,2.5,4.5,1.7
14,5.8,4.0,1.2,0.2
92,5.8,2.6,4.0,1.2


In [6]:
ripper_clf = lw.RIPPER(k=2)

In [10]:
ripper_clf.fit(pd.concat([X_train,y_train], axis=1), class_feat = 0, pos_class = 2)

In [11]:
ripper_clf.score(X_test, y_test)

0.9666666666666667

In [12]:
ripper_clf.ruleset_.out_pretty()

[[petalwidth(cm)=2.1] V
[petalwidth(cm)=1.8] V
[petalwidth(cm)=1.9] V
[petalwidth(cm)=2.3] V
[petalwidth(cm)=2.0] V
[petalwidth(cm)=2.5] V
[petallength(cm)=5.6] V
[sepallength(cm)=7.7]]


In [155]:
ripper_clf.replace_rule_at(0, 'petal width (cm)=1.7-2.1')

In [157]:
ripper_clf.remove_rule_at(2)

In [158]:
ripper_clf.ruleset_[0]

<Rule [petalwidth(cm)=1.7-2.1]>

In [161]:
ripper_clf._ensure_has_bin_transformer()

In [162]:
ripper_clf.bin_transformer_

{'sepal length (cm)': ['<4.89', '4.89-5.0', '5.0-5.2', '5.2-5.5', '5.5-5.75', '5.75-6.0', '6.0-6.3', '6.3-6.5', '6.5-6.9', '>6.9'], 'sepal width (cm)': ['<2.5', '2.5-2.7', '2.7-2.8', '2.8-3.0', '3.0-3.1', '3.1-3.23', '3.23-3.4', '3.4-3.61', '>3.61'], 'petal length (cm)': ['<1.4', '1.4-1.5', '1.5-1.7', '1.7-3.96', '3.96-4.25', '4.25-4.6', '4.6-4.93', '4.93-5.3', '5.3-5.7', '>5.7'], 'petal width (cm)': ['<0.2', '0.2-0.4', '0.4-1.1', '1.1-1.3', '1.3-1.5', '1.5-1.7', '1.7-1.9', '1.9-2.1', '>2.1']}

In [183]:
preprocess_params = {
            "X": X_test,
            "class_feat": 0,
            "pos_class": 2,
            "bin_transformer_": ripper_clf.bin_transformer_,
            "user_requested_feature_names": None,
            "selected_features_": ripper_clf.selected_features_,
            "trainset_features_": ripper_clf.trainset_features_,
            "verbosity": ripper_clf.verbosity,
        }

In [164]:
ripper_clf.selected_features_

['petal width (cm)', 'petal length (cm)']

In [184]:
X_df = preprocess.preprocess_prediction_data(preprocess_params)

In [185]:
X_df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
73,6.0-6.3,2.7-2.8,4.6-4.93,1.1-1.3
18,5.5-5.75,>3.61,1.5-1.7,0.2-0.4
118,>6.9,2.5-2.7,>5.7,>2.1
78,5.75-6.0,2.8-3.0,4.25-4.6,1.3-1.5
76,6.5-6.9,2.7-2.8,4.6-4.93,1.3-1.5
31,5.2-5.5,3.23-3.4,1.4-1.5,0.2-0.4
64,5.5-5.75,2.8-3.0,1.7-3.96,1.1-1.3
141,6.5-6.9,3.0-3.1,4.93-5.3,>2.1
68,6.0-6.3,<2.5,4.25-4.6,1.3-1.5
82,5.75-6.0,2.5-2.7,1.7-3.96,1.1-1.3


In [167]:
ripper_clf.out_model()

[[petalwidth(cm)=1.7-2.1] V
[petalwidth(cm)=>2.1] V
[petallength(cm)=>5.7]]


In [168]:
ruleset = ripper_clf.ruleset_

In [169]:
from wittgenstein import discretize

In [170]:
ripper_clf.bin_transformer_.construct_from_ruleset(ripper_clf.ruleset_)

{'petal width (cm)': [('1.7', '2.1')]}

In [191]:
ripper_clf.bin_transformer_

{'sepal length (cm)': ['<4.89', '4.89-5.0', '5.0-5.2', '5.2-5.5', '5.5-5.75', '5.75-6.0', '6.0-6.3', '6.3-6.5', '6.5-6.9', '>6.9'], 'sepal width (cm)': ['<2.5', '2.5-2.7', '2.7-2.8', '2.8-3.0', '3.0-3.1', '3.1-3.23', '3.23-3.4', '3.4-3.61', '>3.61'], 'petal length (cm)': ['<1.4', '1.4-1.5', '1.5-1.7', '1.7-3.96', '3.96-4.25', '4.25-4.6', '4.6-4.93', '4.93-5.3', '5.3-5.7', '>5.7'], 'petal width (cm)': ['<0.2', '0.2-0.4', '0.4-1.1', '1.1-1.3', '1.3-1.5', '1.5-1.7', '1.7-1.9', '1.9-2.1', '>2.1']}

In [172]:
discrete = discretize.defaultdict(list)

In [173]:
for cond in ruleset.get_conds():
    print(cond)
            # floor_ceil = self.find_floor_ceil(cond.val)
    floor_ceil = find_floor_ceil(cond.val) # The function find_floor_ceil is not an attribute of bin_transformer - Niccolò
    if floor_ceil:
        discrete[cond.feature].append(floor_ceil)
        print(floor_ceil)
for feat, ranges in discrete.items():
    ranges.sort(key=lambda x: float(x[0]))
    print(ranges)

petal width (cm)=1.7-2.1
('1.7', '2.1')
petal width (cm)=>2.1
petal length (cm)=>5.7
[('1.7', '2.1')]


In [174]:
def find_floor_ceil(value):
            """id min, max separated by a dash. Return None if invalid pattern."""
            split_idx = 0
            for i, char in enumerate(value):
                # Found a possible split and it's not the first number's minus sign
                if char == "-" and i != 0:
                    if split_idx is not None and not split_idx:
                        split_idx = i
                    # Found a - after the split, and it's not the minus of a negative number
                    elif i > split_idx + 1:
                        return None

            floor = value[:split_idx]
            ceil = value[split_idx + 1 :]
            if is_valid_decimal(floor) and is_valid_decimal(ceil):
                return (floor, ceil)
            else:
                return None

In [175]:
ruleset.get_conds()

[<Cond petal width (cm)=1.7-2.1>,
 <Cond petal width (cm)=>2.1>,
 <Cond petal length (cm)=>5.7>]

In [176]:
def is_valid_decimal(s):
            try:
                float(s)
            except:
                return False
            return True

In [177]:
discrete

defaultdict(list, {'petal width (cm)': [('1.7', '2.1')]})

In [194]:
ripper_clf.predict(X_test, give_reasons = False)

TypeError: float() argument must be a string or a number, not 'tuple'

In [134]:
from wittgenstein import preprocess

In [193]:
preprocess._upgrade_bin_transformer_ifdepr(ripper_clf)

In [192]:
ripper_clf.bin_transformer_ = ripper_clf.bin_transformer_.construct_from_ruleset(ruleset)

In [188]:
ruleset

<Ruleset [[petalwidth(cm)=1.7-2.1] V [petalwidth(cm)=>2.1] V [petallength(cm)=>5.7]]>

In [1]:
git clone https://github.com/maryami66/uci_dataset

SyntaxError: invalid syntax (<ipython-input-1-380dd165f01d>, line 1)

In [None]:
import numpy as np
import