In [1]:
import wittgenstein4 as lw
import pandas as pd
import numpy as np

In [2]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import random
random.seed(25)

iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = pd.Series(iris.target)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
ripper_clf = lw.RIPPER(k=2)#, n_discretize_bins = 3)

In [4]:
ripper_clf.fit(pd.concat([X_train,y_train], axis=1), class_feat = 0, pos_class = 2)

In [5]:
ripper_clf.score(X_test, y_test)

0.9666666666666667

In [6]:
x=ripper_clf.ruleset_

In [7]:
x.out_pretty()

[[petallength(cm)=>4.83] V
[petalwidth(cm)=>1.53]]


In [36]:
ripper_clf.ruleset_ = x

In [37]:
x.out_pretty()

[[petallength(cm)=>4.83] V
[petalwidth(cm)=>1.53]]


In [32]:
ripper_clf.predict(X_test)

[False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False]

In [13]:
f = ripper_clf.selected_features_
f

['petal length (cm)', 'petal width (cm)']

In [15]:
t = ripper_clf.trainset_features_
t

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [19]:
ripper_clf.class_feat=0
ripper_clf.pos_class=2
ripper_clf.selected_features_ = f
ripper_clf.trainset_features_ = t

In [33]:
ripper_clf.bin_transformer_

{'sepal length (cm)': ['<4.6', '4.6-4.89', '4.89-5.0', '5.0-5.1', '5.1-5.2', '5.2-5.4', '5.4-5.5', '5.5-5.66', '5.66-5.75', '5.75-5.85', '5.85-6.0', '6.0-6.14', '6.14-6.3', '6.3-6.4', '6.4-6.5', '6.5-6.7', '6.7-6.9', '6.9-7.21', '>7.21'], 'sepal width (cm)': ['<2.3', '2.3-2.5', '2.5-2.6', '2.6-2.7', '2.7-2.8', '2.8-2.9', '2.9-3.0', '3.05-3.1', '3.1-3.2', '3.23-3.4', '3.4-3.5', '3.5-3.61', '3.61-3.81', '>3.81'], 'petal length (cm)': ['<1.3', '1.3-1.4', '1.4-1.5', '1.5-1.7', '1.7-3.3', '3.3-3.96', '3.96-4.1', '4.1-4.25', '4.25-4.44', '4.44-4.6', '4.6-4.8', '4.8-4.93', '4.93-5.1', '5.1-5.3', '5.3-5.52', '5.52-5.7', '5.7-6.1', '>6.1'], 'petal width (cm)': ['<0.2', '0.2-0.3', '0.3-0.4', '0.4-1.0', '1.0-1.1', '1.1-1.3', '1.3-1.4', '1.4-1.5', '1.5-1.7', '1.7-1.8', '1.8-1.9', '1.9-2.0', '2.0-2.1', '2.1-2.31', '>2.31']}

In [7]:
ripper_clf.n_discretize_bins

3

In [11]:
maximum_discr_bins = 2
for i in range(4):
    n = len(np.unique(np.asarray(X_train)[:, i])) # Number of unique elements in the i-th variable
    maximum_discr_bins = max(maximum_discr_bins, n)

maximum_discr_bins = min(maximum_discr_bins, 20)
maximum_discr_bins

20

In [21]:
rulesets = []
min_dls = []

In [23]:
for i in range(2, maximum_discr_bins + 1):
    ripper_clf.n_discretize_bins = i
    ripper_clf.fit_rules(pd.concat([X_train,y_train], axis=1),
    n_discretize_bins = i,
    class_feat=0,
    pos_class=2,
    initial_model=None,
    cn_optimize=True)
    
    rulesets += [ripper_clf.ruleset_]
    min_dls += [ripper_clf.dl]

In [29]:
min_dls

[70.17301445623596,
 48.179882599379084,
 55.9368403106656,
 61.55314745759908,
 48.309708407840034,
 50.42490859120039,
 58.100697405028725,
 63.60563724193384,
 55.39855163483989,
 66.15829876486833,
 71.66057802256182,
 73.94247787944343,
 67.52467134004337,
 74.97663768245464,
 78.74021190960957,
 73.58934601977755,
 83.70975013620873,
 77.64616212663545,
 72.30236220672327]

In [25]:
ripper_clf.ruleset_ = rulesets[np.argmin(min_dls)] # Select the best ruleset
ripper_clf.n_discretize_bins = range(2, maximum_discr_bins + 1)[np.argmin(min_dls)]

In [26]:
ripper_clf.ruleset_.out_pretty()

[[petallength(cm)=>4.83 ^ petalwidth(cm)=>1.53] V
[petalwidth(cm)=>1.53] V
[petallength(cm)=>4.83]]


In [32]:
ripper_clf.ruleset_.out_pretty()

[[petallength(cm)=>4.83 ^ petalwidth(cm)=>1.53] V
[petalwidth(cm)=>1.53] V
[petallength(cm)=>4.83]]


In [183]:
preprocess_params = {
            "X": X_test,
            "class_feat": 0,
            "pos_class": 2,
            "bin_transformer_": ripper_clf.bin_transformer_,
            "user_requested_feature_names": None,
            "selected_features_": ripper_clf.selected_features_,
            "trainset_features_": ripper_clf.trainset_features_,
            "verbosity": ripper_clf.verbosity,
        }

In [164]:
ripper_clf.selected_features_

['petal width (cm)', 'petal length (cm)']

In [184]:
X_df = preprocess.preprocess_prediction_data(preprocess_params)

In [185]:
X_df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
73,6.0-6.3,2.7-2.8,4.6-4.93,1.1-1.3
18,5.5-5.75,>3.61,1.5-1.7,0.2-0.4
118,>6.9,2.5-2.7,>5.7,>2.1
78,5.75-6.0,2.8-3.0,4.25-4.6,1.3-1.5
76,6.5-6.9,2.7-2.8,4.6-4.93,1.3-1.5
31,5.2-5.5,3.23-3.4,1.4-1.5,0.2-0.4
64,5.5-5.75,2.8-3.0,1.7-3.96,1.1-1.3
141,6.5-6.9,3.0-3.1,4.93-5.3,>2.1
68,6.0-6.3,<2.5,4.25-4.6,1.3-1.5
82,5.75-6.0,2.5-2.7,1.7-3.96,1.1-1.3


In [167]:
ripper_clf.out_model()

[[petalwidth(cm)=1.7-2.1] V
[petalwidth(cm)=>2.1] V
[petallength(cm)=>5.7]]


In [168]:
ruleset = ripper_clf.ruleset_

In [169]:
from wittgenstein import discretize

In [170]:
ripper_clf.bin_transformer_.construct_from_ruleset(ripper_clf.ruleset_)

{'petal width (cm)': [('1.7', '2.1')]}

In [26]:
ripper_clf.bin_transformer_

None

In [172]:
discrete = discretize.defaultdict(list)

In [173]:
for cond in ruleset.get_conds():
    print(cond)
            # floor_ceil = self.find_floor_ceil(cond.val)
    floor_ceil = find_floor_ceil(cond.val) # The function find_floor_ceil is not an attribute of bin_transformer - Niccolò
    if floor_ceil:
        discrete[cond.feature].append(floor_ceil)
        print(floor_ceil)
for feat, ranges in discrete.items():
    ranges.sort(key=lambda x: float(x[0]))
    print(ranges)

petal width (cm)=1.7-2.1
('1.7', '2.1')
petal width (cm)=>2.1
petal length (cm)=>5.7
[('1.7', '2.1')]


In [174]:
def find_floor_ceil(value):
            """id min, max separated by a dash. Return None if invalid pattern."""
            split_idx = 0
            for i, char in enumerate(value):
                # Found a possible split and it's not the first number's minus sign
                if char == "-" and i != 0:
                    if split_idx is not None and not split_idx:
                        split_idx = i
                    # Found a - after the split, and it's not the minus of a negative number
                    elif i > split_idx + 1:
                        return None

            floor = value[:split_idx]
            ceil = value[split_idx + 1 :]
            if is_valid_decimal(floor) and is_valid_decimal(ceil):
                return (floor, ceil)
            else:
                return None

In [175]:
ruleset.get_conds()

[<Cond petal width (cm)=1.7-2.1>,
 <Cond petal width (cm)=>2.1>,
 <Cond petal length (cm)=>5.7>]

In [176]:
def is_valid_decimal(s):
            try:
                float(s)
            except:
                return False
            return True

In [177]:
discrete

defaultdict(list, {'petal width (cm)': [('1.7', '2.1')]})

In [194]:
ripper_clf.predict(X_test, give_reasons = False)

TypeError: float() argument must be a string or a number, not 'tuple'

In [134]:
from wittgenstein import preprocess

In [193]:
preprocess._upgrade_bin_transformer_ifdepr(ripper_clf)

In [192]:
ripper_clf.bin_transformer_ = ripper_clf.bin_transformer_.construct_from_ruleset(ruleset)

In [188]:
ruleset

<Ruleset [[petalwidth(cm)=1.7-2.1] V [petalwidth(cm)=>2.1] V [petallength(cm)=>5.7]]>

In [1]:
git clone https://github.com/maryami66/uci_dataset

SyntaxError: invalid syntax (<ipython-input-1-380dd165f01d>, line 1)

In [None]:
import numpy as np
import