# LCE

In [1]:
from lce import LCEClassifier
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split


# Load data and generate a train/test split
data = load_iris()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, random_state=0)

# Train LCEClassifier with default parameters
clf = LCEClassifier(n_jobs=-1, random_state=0)
clf.fit(X_train, y_train)

# Make prediction and compute accuracy score
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: {:.1f}%".format(accuracy*100))

Accuracy: 97.4%


# PGBM(本地project做也行)

In [2]:
from pgbm.sklearn import HistGradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_california_housing
X, y = fetch_california_housing(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
model = HistGradientBoostingRegressor().fit(X_train, y_train) 
yhat_test, yhat_test_std = model.predict(X_test, return_std=True)
yhat_dist = model.sample(yhat_test, yhat_test_std, n_estimates=1000)

ModuleNotFoundError: No module named 'pgbm'

# scikit_obliquetree

In [2]:

from sklearn.ensemble import BaggingRegressor
from sklearn.model_selection import cross_val_score

from scikit_obliquetree.HHCART import HouseHolderCART
from scikit_obliquetree.segmentor import MSE, MeanSegmentor


In [3]:

import pandas as pd
import numpy as np

data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)

X = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
y = raw_df.values[1::2, 2]

reg = BaggingRegressor(
    HouseHolderCART(MSE(), MeanSegmentor(), max_depth=3),
    n_estimators=100,
    n_jobs=-1,
)
print('CV Score', cross_val_score(reg, X, y))

CV Score [ 0.6841347   0.81646544  0.79149047  0.4900638  -0.06652007]


# stree

In [4]:
import time
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from stree import Stree

random_state = 1
X, y = load_iris(return_X_y=True)
Xtrain, Xtest, ytrain, ytest = train_test_split(
    X, y, test_size=0.2, random_state=random_state
)
now = time.time()
print("Predicting with max_features=sqrt(n_features)")
clf = Stree(random_state=random_state, max_features="auto")
clf.fit(Xtrain, ytrain)
print(f"Took {time.time() - now:.2f} seconds to train")
print(clf)
print(f"Classifier's accuracy (train): {clf.score(Xtrain, ytrain):.4f}")
print(f"Classifier's accuracy (test) : {clf.score(Xtest, ytest):.4f}")
print("=" * 40)
print("Predicting with max_features=n_features")
clf = Stree(random_state=random_state)
clf.fit(Xtrain, ytrain)
print(f"Took {time.time() - now:.2f} seconds to train")
print(clf)
print(f"Classifier's accuracy (train): {clf.score(Xtrain, ytrain):.4f}")
print(f"Classifier's accuracy (test) : {clf.score(Xtest, ytest):.4f}")

Predicting with max_features=sqrt(n_features)
Took 0.02 seconds to train
root feaures=(2, 3) impurity=0.9976 counts=(array([0, 1, 2], dtype=int64), array([39, 37, 44], dtype=int64))
root - Down(2), <cgaf> - Leaf class=2 belief= 0.955556 impurity=0.2623 counts=(array([1, 2], dtype=int64), array([ 2, 43], dtype=int64))
root - Up(2) feaures=(0, 1) impurity=0.6857 counts=(array([0, 1, 2], dtype=int64), array([39, 35,  1], dtype=int64))
root - Up(2) - Down(3), <cgaf> - Leaf class=1 belief= 0.972222 impurity=0.1831 counts=(array([1, 2], dtype=int64), array([35,  1], dtype=int64))
root - Up(2) - Up(3), <pure> - Leaf class=0 belief= 1.000000 impurity=0.0000 counts=(array([0], dtype=int64), array([39], dtype=int64))

Classifier's accuracy (train): 0.9750
Classifier's accuracy (test) : 0.9667
Predicting with max_features=n_features
Took 0.03 seconds to train
root feaures=(0, 1, 2, 3) impurity=0.9976 counts=(array([0, 1, 2], dtype=int64), array([39, 37, 44], dtype=int64))
root - Down(2) feaures=(

#  SLSDT

In [6]:
from sklearn.datasets import load_iris

X, y = load_iris(return_X_y=True)

from slsdt.slsdt import SLSDT


# split train and test data

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

clf = SLSDT()

clf.fit(X_train, y_train)

results = clf.predict(X_test)

print(f"Accuracy: {sum(results == y_test) / len(y_test)}")




Accuracy: 0.98


In [7]:
from sklearn import datasets
from slsdt.slsdt import SLSDT

iris = datasets.load_iris()

X = iris.data[:, :2] # we only take the sepal width and sepal length features.
y = iris.target

mark = y != 2

# we only take the 0 (Iris-setosa) and 1 (Iris-versicolor) class labels
X = X[mark]
y = y[mark]

clf = SLSDT()

clf.fit(X, y)

clf.print_tree()

result = clf.predict(X)

print(result)
print(result == y)

 Weights: [ 1.76737913 -1.10653544 -5.88734548]
  Predict: [ 0. 50.]
  Predict: [50.  0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1.]
[ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  