# Function Generation for the Training of λ-Nets

## Specitication of Experiment Settings

In [27]:
import torch
import torch.nn as nn


class SDT(nn.Module):
    """Fast implementation of soft decision tree in PyTorch.
    Parameters
    ----------
    input_dim : int
      The number of input dimensions.
    output_dim : int
      The number of output dimensions. For example, for a multi-class
      classification problem with `K` classes, it is set to `K`.
    depth : int, default=5
      The depth of the soft decision tree. Since the soft decision tree is
      a full binary tree, setting `depth` to a large value will drastically
      increases the training and evaluating cost.
    lamda : float, default=1e-3
      The coefficient of the regularization term in the training loss. Please
      refer to the paper on the formulation of the regularization term.
    use_cuda : bool, default=False
      When set to `True`, use GPU to fit the model. Training a soft decision
      tree using CPU could be faster considering the inherent data forwarding
      process.
    Attributes
    ----------
    internal_node_num_ : int
      The number of internal nodes in the tree. Given the tree depth `d`, it
      equals to :math:`2^d - 1`.
    leaf_node_num_ : int
      The number of leaf nodes in the tree. Given the tree depth `d`, it equals
      to :math:`2^d`.
    penalty_list : list
      A list storing the layer-wise coefficients of the regularization term.
    inner_nodes : torch.nn.Sequential
      A container that simulates all internal nodes in the soft decision tree.
      The sigmoid activation function is concatenated to simulate the
      probabilistic routing mechanism.
    leaf_nodes : torch.nn.Linear
      A `nn.Linear` module that simulates all leaf nodes in the tree.
    """

    def __init__(
            self,
            input_dim,
            output_dim,
            depth=5,
            lamda=1e-3,
            use_cuda=False):
        super(SDT, self).__init__()

        self.input_dim = input_dim
        self.output_dim = output_dim

        self.depth = depth
        self.lamda = lamda
        self.device = torch.device("cuda" if use_cuda else "cpu")

        self._validate_parameters()

        self.internal_node_num_ = 2 ** self.depth - 1
        self.leaf_node_num_ = 2 ** self.depth

        # Different penalty coefficients for nodes in different layers
        self.penalty_list = [
            self.lamda * (2 ** (-depth)) for depth in range(0, self.depth)
        ]

        # Initialize internal nodes and leaf nodes, the input dimension on
        # internal nodes is added by 1, serving as the bias.
        self.inner_nodes = nn.Sequential(
            nn.Linear(self.input_dim + 1, self.internal_node_num_, bias=False),
            nn.Sigmoid(),
        )

        self.leaf_nodes = nn.Linear(self.leaf_node_num_,
                                    self.output_dim,
                                    bias=False)

    def forward(self, X, is_training_data=False):
        _mu, _penalty = self._forward(X)
        y_pred = self.leaf_nodes(_mu)

        # When `X` is the training data, the model also returns the penalty
        # to compute the training loss.
        if is_training_data:
            return y_pred, _penalty
        else:
            return y_pred

    def _forward(self, X):
        """Implementation on the data forwarding process."""

        batch_size = X.size()[0]
        X = self._data_augment(X)

        path_prob = self.inner_nodes(X)
        path_prob = torch.unsqueeze(path_prob, dim=2)
        path_prob = torch.cat((path_prob, 1 - path_prob), dim=2)

        _mu = X.data.new(batch_size, 1, 1).fill_(1.0)
        _penalty = torch.tensor(0.0).to(self.device)

        # Iterate through internal odes in each layer to compute the final path
        # probabilities and the regularization term.
        begin_idx = 0
        end_idx = 1

        for layer_idx in range(0, self.depth):
            _path_prob = path_prob[:, begin_idx:end_idx, :]

            # Extract internal nodes in the current layer to compute the
            # regularization term
            _penalty = _penalty + self._cal_penalty(layer_idx, _mu, _path_prob)
            _mu = _mu.view(batch_size, -1, 1).repeat(1, 1, 2)

            _mu = _mu * _path_prob  # update path probabilities

            begin_idx = end_idx
            end_idx = begin_idx + 2 ** (layer_idx + 1)

        mu = _mu.view(batch_size, self.leaf_node_num_)

        return mu, _penalty

    def _cal_penalty(self, layer_idx, _mu, _path_prob):
        """
        Compute the regularization term for internal nodes in different layers.
        """

        penalty = torch.tensor(0.0).to(self.device)

        batch_size = _mu.size()[0]
        _mu = _mu.view(batch_size, 2 ** layer_idx)
        _path_prob = _path_prob.view(batch_size, 2 ** (layer_idx + 1))

        for node in range(0, 2 ** (layer_idx + 1)):
            alpha = torch.sum(
                _path_prob[:, node] * _mu[:, node // 2], dim=0
            ) / torch.sum(_mu[:, node // 2], dim=0)

            coeff = self.penalty_list[layer_idx]

            penalty -= 0.5 * coeff * (torch.log(alpha) + torch.log(1 - alpha))

        return penalty

    def _data_augment(self, X):
        """Add a constant input `1` onto the front of each sample."""
        batch_size = X.size()[0]
        X = X.view(batch_size, -1)
        bias = torch.ones(batch_size, 1).to(self.device)
        X = torch.cat((bias, X), 1)

        return X

    def _validate_parameters(self):

        if not self.depth > 0:
            msg = ("The tree depth should be strictly positive, but got {}"
                   "instead.")
            raise ValueError(msg.format(self.depth))

        if not self.lamda >= 0:
            msg = (
                "The coefficient of the regularization term should not be"
                " negative, but got {} instead."
            )
            raise ValueError(msg.format(self.lamda))


In [399]:
np.set_printoptions(suppress=True)

In [403]:
from sklearn.datasets import load_iris, load_breast_cancer
from sklearn.model_selection import train_test_split
X, y = load_breast_cancer(return_X_y=True)

scaler = MinMaxScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)



NameError: name 'MinMaxScaler' is not defined

In [401]:
X_train.shape

(381, 30)

In [402]:
X_train[:10]

array([[  15.46    ,   11.89    ,  102.5     ,  736.9     ,    0.1257  ,
           0.1555  ,    0.2032  ,    0.1097  ,    0.1966  ,    0.07069 ,
           0.4209  ,    0.6583  ,    2.805   ,   44.64    ,    0.005393,
           0.02321 ,    0.04303 ,    0.0132  ,    0.01792 ,    0.004168,
          18.79    ,   17.04    ,  125.      , 1102.      ,    0.1531  ,
           0.3583  ,    0.583   ,    0.1827  ,    0.3216  ,    0.101   ],
       [  12.85    ,   21.37    ,   82.63    ,  514.5     ,    0.07551 ,
           0.08316 ,    0.06126 ,    0.01867 ,    0.158   ,    0.06114 ,
           0.4993  ,    1.798   ,    2.552   ,   41.24    ,    0.006011,
           0.0448  ,    0.05175 ,    0.01341 ,    0.02669 ,    0.007731,
          14.4     ,   27.01    ,   91.63    ,  645.8     ,    0.09402 ,
           0.1936  ,    0.1838  ,    0.05601 ,    0.2488  ,    0.08151 ],
       [  19.21    ,   18.57    ,  125.5     , 1152.      ,    0.1053  ,
           0.1267  ,    0.1323  ,    0.08994 ,   

In [387]:
y_test.shape

(188,)

In [397]:

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms



def onehot_coding(target, device, output_dim):
    """Convert the class labels into one-hot encoded vectors."""
    target_onehot = torch.FloatTensor(target.size()[0], output_dim).to(device)
    target_onehot.data.zero_()
    #print(target_onehot)
    target_onehot.scatter_(1, target.view(-1, 1), 1.0)
    return target_onehot


if __name__ == "__main__":

    # Parameters
    input_dim = X_train.shape[1]#28 * 28    # the number of input dimensions
    output_dim = 2#10        # the number of outputs (i.e., # classes on MNIST)
    depth = 5              # tree depth
    lamda = 1e-3           # coefficient of the regularization term
    lr = 1e-2              # learning rate
    weight_decaly = 5e-4   # weight decay
    batch_size = 128       # batch size
    epochs = 10          # the number of training epochs
    log_interval = 100     # the number of batches to wait before printing logs
    use_cuda = False       # whether to use GPU

    # Model and Optimizer
    tree = SDT(input_dim, output_dim, depth, lamda, use_cuda)

    optimizer = torch.optim.Adam(tree.parameters(),
                                 lr=lr,
                                 weight_decay=weight_decaly)

    # Load data
    data_dir = "../Dataset/mnist"

    transformer = transforms.Compose(
        [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
    )
    
    X_train = torch.FloatTensor(X_train)
    y_train = torch.LongTensor(y_train)#torch.LongTensor(y.reshape(-1,1))
    
    X_test = torch.FloatTensor(X_test)
    y_test = torch.LongTensor(y_test)    
    #print('X', X.dtype)
    #print('y', y.dtype)    
    #print('X', X)
    #print('y', y)
    
    
    # Utils
    best_testing_acc = 0.0
    testing_acc_list = []
    training_loss_list = []
    criterion = nn.CrossEntropyLoss()
    device = torch.device("cuda" if use_cuda else "cpu")

    for epoch in range(epochs):

        # Training
        tree.train()
        
        batch_size = X_train.shape[0]#data.size()[0]
        data, target = X_train, y_train#data.to(device), target.to(device)
        target_onehot = onehot_coding(target, device, output_dim)

        #print(batch_size)
        #print(data.shape)
        #print(target.shape)
        #print(target_onehot.shape)

        
        output, penalty = tree.forward(data, is_training_data=True)
        print('output', output)
        print('penalty', penalty)
        
        loss = criterion(output, target.view(-1))
        print('loss', loss)
        
        loss += penalty

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Print training status
        if batch_idx % log_interval == 0:
            pred = output.data.max(1)[1]
            correct = pred.eq(target.view(-1).data).sum()

            msg = (
                "Epoch: {:02d} | Batch: {:03d} | Loss: {:.5f} |"
                " Correct: {:03d}/{:03d}"
            )
            print(msg.format(epoch, batch_idx, loss, correct, batch_size))
            training_loss_list.append(loss.cpu().data.numpy())

        # Evaluating
        tree.eval()
        correct = 0.


        batch_size = X_test.shape[0]#data.size()[0]
        data, target = X_test, y_test#data.to(device), target.to(device)

        output = F.softmax(tree.forward(data), dim=1)

        pred = output.data.max(1)[1]
        correct += pred.eq(target.view(-1).data).sum()

        accuracy = 100.0 * float(correct) / target.shape[0]

        if accuracy > best_testing_acc:
            best_testing_acc = accuracy

        msg = (
            "\nEpoch: {:02d} | Testing Accuracy: {}/{} ({:.3f}%) |"
            " Historical Best: {:.3f}%\n"
        )
        print(
            msg.format(
                epoch, correct,
                target.shape[0],
                accuracy,
                best_testing_acc
            )
        )
        testing_acc_list.append(accuracy)

output tensor([[-0.0743,  0.1495],
        [-0.0744,  0.1495],
        [ 0.1727, -0.0971],
        [-0.0744,  0.1495],
        [-0.0744,  0.1495],
        [-0.0744,  0.1495],
        [-0.0744,  0.1495],
        [-0.0744,  0.1495],
        [-0.0744,  0.1495],
        [-0.0743,  0.1495],
        [-0.0744,  0.1495],
        [-0.0743,  0.1495],
        [-0.0744,  0.1495],
        [-0.0717,  0.1469],
        [-0.0744,  0.1495],
        [-0.0744,  0.1495],
        [-0.0744,  0.1495],
        [-0.0743,  0.1495],
        [-0.0744,  0.1495],
        [-0.0744,  0.1495],
        [-0.0744,  0.1495],
        [-0.0736,  0.1487],
        [-0.0744,  0.1495],
        [-0.0744,  0.1495],
        [-0.0744,  0.1495],
        [-0.0744,  0.1495],
        [ 0.1727, -0.0971],
        [-0.0744,  0.1495],
        [-0.0744,  0.1495],
        [-0.0744,  0.1495],
        [-0.0744,  0.1495],
        [-0.0744,  0.1495],
        [-0.0743,  0.1495],
        [-0.0744,  0.1495],
        [ 0.1727, -0.0971],
        [-0.0

In [372]:
from sklearn.datasets import load_iris, load_breast_cancer
from sklearn.model_selection import train_test_split
X, y = load_iris(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [373]:
X_train.shape

(100, 4)

In [374]:
y_train.shape

(100,)

In [383]:

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms



def onehot_coding(target, device, output_dim):
    """Convert the class labels into one-hot encoded vectors."""
    target_onehot = torch.FloatTensor(target.size()[0], output_dim).to(device)
    target_onehot.data.zero_()
    #print(target_onehot)
    target_onehot.scatter_(1, target.view(-1, 1), 1.0)
    return target_onehot


if __name__ == "__main__":

    # Parameters
    input_dim = X_train.shape[1]#28 * 28    # the number of input dimensions
    output_dim = 3#10        # the number of outputs (i.e., # classes on MNIST)
    depth = 5              # tree depth
    lamda = 1e-3           # coefficient of the regularization term
    lr = 1e-2              # learning rate
    weight_decaly = 5e-4   # weight decay
    batch_size = 128       # batch size
    epochs = 1000          # the number of training epochs
    log_interval = 100     # the number of batches to wait before printing logs
    use_cuda = False       # whether to use GPU

    # Model and Optimizer
    tree = SDT(input_dim, output_dim, depth, lamda, use_cuda)

    optimizer = torch.optim.Adam(tree.parameters(),
                                 lr=lr,
                                 weight_decay=weight_decaly)

    # Load data
    data_dir = "../Dataset/mnist"

    transformer = transforms.Compose(
        [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
    )
    
    X_train = torch.FloatTensor(X_train)
    y_train = torch.LongTensor(y_train)#torch.LongTensor(y.reshape(-1,1))
    
    X_test = torch.FloatTensor(X_test)
    y_test = torch.LongTensor(y_test)    
    #print('X', X.dtype)
    #print('y', y.dtype)    
    #print('X', X)
    #print('y', y)
    
    
    # Utils
    best_testing_acc = 0.0
    testing_acc_list = []
    training_loss_list = []
    criterion = nn.CrossEntropyLoss()
    device = torch.device("cuda" if use_cuda else "cpu")

    for epoch in range(epochs):

        # Training
        tree.train()
        
        batch_size = X_train.shape[0]#data.size()[0]
        data, target = X_train, y_train#data.to(device), target.to(device)
        target_onehot = onehot_coding(target, device, output_dim)

        #print(batch_size)
        #print(data.shape)
        #print(target.shape)
        #print(target_onehot.shape)

        
        output, penalty = tree.forward(data, is_training_data=True)

        loss = criterion(output, target.view(-1))
        loss += penalty

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Print training status
        if batch_idx % log_interval == 0:
            pred = output.data.max(1)[1]
            correct = pred.eq(target.view(-1).data).sum()

            msg = (
                "Epoch: {:02d} | Batch: {:03d} | Loss: {:.5f} |"
                " Correct: {:03d}/{:03d}"
            )
            print(msg.format(epoch, batch_idx, loss, correct, batch_size))
            training_loss_list.append(loss.cpu().data.numpy())

        # Evaluating
        tree.eval()
        correct = 0.


        batch_size = X_test.shape[0]#data.size()[0]
        data, target = X_test, y_test#data.to(device), target.to(device)

        output = F.softmax(tree.forward(data), dim=1)

        pred = output.data.max(1)[1]
        correct += pred.eq(target.view(-1).data).sum()

        accuracy = 100.0 * float(correct) / target.shape[0]

        if accuracy > best_testing_acc:
            best_testing_acc = accuracy

        msg = (
            "\nEpoch: {:02d} | Testing Accuracy: {}/{} ({:.3f}%) |"
            " Historical Best: {:.3f}%\n"
        )
        print(
            msg.format(
                epoch, correct,
                target.shape[0],
                accuracy,
                best_testing_acc
            )
        )
        testing_acc_list.append(accuracy)

Epoch: 00 | Batch: 000 | Loss: 1.10822 | Correct: 035/100

Epoch: 00 | Testing Accuracy: 15.0/50 (30.000%) | Historical Best: 30.000%

Epoch: 01 | Batch: 000 | Loss: 1.10411 | Correct: 035/100

Epoch: 01 | Testing Accuracy: 17.0/50 (34.000%) | Historical Best: 34.000%

Epoch: 02 | Batch: 000 | Loss: 1.10004 | Correct: 041/100

Epoch: 02 | Testing Accuracy: 26.0/50 (52.000%) | Historical Best: 52.000%

Epoch: 03 | Batch: 000 | Loss: 1.09585 | Correct: 055/100

Epoch: 03 | Testing Accuracy: 19.0/50 (38.000%) | Historical Best: 52.000%

Epoch: 04 | Batch: 000 | Loss: 1.09144 | Correct: 043/100

Epoch: 04 | Testing Accuracy: 17.0/50 (34.000%) | Historical Best: 52.000%

Epoch: 05 | Batch: 000 | Loss: 1.08678 | Correct: 037/100

Epoch: 05 | Testing Accuracy: 16.0/50 (32.000%) | Historical Best: 52.000%

Epoch: 06 | Batch: 000 | Loss: 1.08184 | Correct: 036/100

Epoch: 06 | Testing Accuracy: 17.0/50 (34.000%) | Historical Best: 52.000%

Epoch: 07 | Batch: 000 | Loss: 1.07664 | Correct: 036/1

In [111]:
tree.leaf_nodes.weight

Parameter containing:
tensor([[ 0.0230,  0.1037,  0.0170,  0.0782, -0.0251,  0.1144,  0.0525, -0.1145,
         -0.0837,  0.0280, -0.0622,  0.0481, -0.0381, -0.1141, -0.0541, -0.1151,
         -0.0957,  0.0267,  0.0018,  0.0714,  0.0183, -0.0907, -0.0742,  0.0593,
         -0.1175, -0.0281,  0.0955, -0.1078, -0.0944,  0.1281,  0.0142, -0.0354,
         -0.0673,  0.1071,  0.0290,  0.0405,  0.0146,  0.0628, -0.1075,  0.0575,
         -0.0404,  0.1007,  0.0715,  0.0778, -0.0295, -0.0013,  0.0014, -0.0843,
          0.0857,  0.0264,  0.1017, -0.0677, -0.0819, -0.0488,  0.0175,  0.0238,
          0.0960, -0.1120,  0.0750,  0.1001, -0.0276, -0.0807,  0.0375, -0.0681],
        [ 0.1084,  0.0509,  0.0367,  0.1000,  0.0191,  0.0120, -0.1097, -0.1095,
          0.0797, -0.0630, -0.0239,  0.0278,  0.0503, -0.1013,  0.1144, -0.0482,
          0.1286, -0.1089, -0.0005, -0.0057,  0.0923,  0.0121, -0.0005,  0.0693,
          0.0428,  0.0831,  0.1023, -0.0549, -0.0004,  0.0824,  0.0744,  0.1081,
     

In [112]:
tree.leaf_nodes.weight.shape

torch.Size([3, 64])

In [97]:
tree.inner_nodes[1]

Sigmoid()

In [116]:
tree.inner_nodes[0].weight.shape

torch.Size([31, 5])

In [315]:
X = X[:90]
y = y[:90]

In [333]:
X = np.vstack([X, X])
y = np.hstack([y, y])

In [338]:
y.shape

(569,)

In [339]:
X.shape

(569, 30)

In [381]:
from sklearn.metrics import accuracy_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier

normal_dt = DecisionTreeClassifier(max_depth=4)
normal_dt.fit(X_train, y_train)
preds = normal_dt.predict(X_test)

print(accuracy_score(y_test, preds))



0.98


In [380]:
from sklearn.metrics import accuracy_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
import fuzzytree
from fuzzytree import FuzzyDecisionTreeClassifier

fuzzy_dt = FuzzyDecisionTreeClassifier(max_depth=4)
fuzzy_dt.fit(X_train, y_train)
preds = fuzzy_dt.predict(X_test)

print(accuracy_score(y_test, preds))



0.98


In [327]:
from sklearn.utils.validation import check_X_y, check_is_fitted, _check_sample_weight
def _get_sample_weight(X, sample_weight=None):
    sample_weight = _check_sample_weight(sample_weight, X, np.float64)
    indices = np.flatnonzero(sample_weight)
    sample_weight = sample_weight[indices] / sample_weight[indices].mean()

    return indices, sample_weight

In [334]:
X.shape

(360, 4)

In [335]:
_get_sample_weight(X)[0].shape

(360,)

In [328]:
_get_sample_weight(X)

(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
        34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
        51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
        68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
        85, 86, 87, 88, 89]),
 array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1.]))

In [288]:
X.shape

(76800, 4)

In [289]:
y.shape

(76800,)

In [282]:
fuzzy_dta

FuzzyDecisionTreeClassifier(max_depth=4)

In [122]:
fuzzy_dt.n_features_

4

In [118]:
fuzzy_dt.tree_

<fuzzytree._fuzzy_tree.FuzzyTree at 0x7fea6eb90790>

In [136]:
import numpy as np

In [176]:
def _init_sorted_cols(X):
    _sorted_cols = {}

    for feature_idx in range(X.shape[1]):
        _sorted_cols[feature_idx] = np.sort(X[:, feature_idx])
    return _sorted_cols

In [167]:
fuzzy_dt.tree_.true_branch.rule.split_val = 0

In [186]:
from fuzzytree._fuzzy_decision_rule import FuzzyDecisionRule

In [207]:
fuzzy_dt.tree_.false_branch

<fuzzytree._fuzzy_tree.FuzzyTree at 0x7fea6eb4c910>

In [203]:
print(fuzzy_dt.tree_.true_branch.true_branch.true_branch.rule)

None


In [201]:
fuzzy_dt.tree_.true_branch.rule = FuzzyDecisionRule(sorted_feature= _init_sorted_cols(X)[3], 
                                                    split_val=0.5, 
                                                    fuzziness=0.8, 
                                                    feature_idx=3)

In [214]:
_init_sorted_cols(X)[3]

array([0.1, 0.1, 0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
       0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
       0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.3, 0.3, 0.3, 0.3, 0.3,
       0.3, 0.3, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.5, 0.6, 1. , 1. ,
       1. , 1. , 1. , 1. , 1. , 1.1, 1.1, 1.1, 1.2, 1.2, 1.2, 1.2, 1.2,
       1.3, 1.3, 1.3, 1.3, 1.3, 1.3, 1.3, 1.3, 1.3, 1.3, 1.3, 1.3, 1.3,
       1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.5, 1.5, 1.5, 1.5, 1.5,
       1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.6, 1.6, 1.6, 1.6, 1.7, 1.7,
       1.8, 1.8, 1.8, 1.8, 1.8, 1.8, 1.8, 1.8, 1.8, 1.8, 1.8, 1.8, 1.9,
       1.9, 1.9, 1.9, 1.9, 2. , 2. , 2. , 2. , 2. , 2. , 2.1, 2.1, 2.1,
       2.1, 2.1, 2.1, 2.2, 2.2, 2.2, 2.3, 2.3, 2.3, 2.3, 2.3, 2.3, 2.3,
       2.3, 2.4, 2.4, 2.4, 2.5, 2.5, 2.5], dtype=float32)

In [321]:
X.shape

(90, 4)

In [319]:
fuzzy_dt.tree_.true_branch.rule.membership

array([0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 4.16934445e-04, 2.20780176e-03,
       5.41041589e-03, 1.00247768e-02, 1.60508846e-02, 2.34887392e-02,
       3.23383407e-02, 4.25996889e-02, 5.42727840e-02, 6.73576259e-02,
       8.18542146e-02, 9.77625502e-02, 1.15082633e-01, 1.33814462e-01,
       1.53958038e-01, 1.75513361e-01, 1.98480430e-01, 2.22859247e-01,
       2.48649810e-01, 2.75852120e-01, 3.04466177e-01, 3.34491981e-01,
       3.65929532e-01, 3.98778829e-01, 4.33039873e-01, 4.68712664e-01,
      

In [320]:
fuzzy_dt.tree_.true_branch.rule.membership.shape

(100,)

In [198]:
fuzzy_dt.tree_.true_branch.rule.split_val

0.7

In [199]:
fuzzy_dt.tree_.true_branch.rule

x[3] >= 0.7?

In [218]:
def membership_ratio(y, membership):
    """Calculate the membership ratio of each class in y.
    Parameters
    ----------
    y : array-like of shape (n_samples,)
        The array of labels.
    membership : array-like of shape (n_samples,)
        The membership of each sample that corresponding label is
        coming from.
    Returns
    -------
    membership_by_class : array-like of float of shape (len(np.unique(y)),)
        The membership ratio for each class in np.unique(y).
    """

    membership_by_class = np.bincount(y, weights=membership)
    if membership_by_class.any():
        membership_by_class /= membership_by_class.sum()

    return membership_by_class


In [279]:
membership_ratio(y, fuzzy_dt.tree_.class_weights)

ValueError: The weights and list don't have the same length.

array([0.33333333, 0.33333333, 0.33333333])

In [202]:
fuzzy_dt.tree_.true_branch.rule.evaluate(np.array([[1.7, 1.7, 1.7, 0.64]]))

array([0.7036657])

In [132]:
fuzzy_dt.tree_.true_branch.true_branch.rule

x[3] >= 1.65?

In [133]:
fuzzy_dt.tree_.true_branch.true_branch.true_branch.rule

In [210]:
fuzzy_dt.tree_.true_branch.true_branch.__dict__

{'class_weights': array([0.        , 0.10485109, 0.89514891]),
 'level': 2,
 'rule': x[3] >= 1.65?,
 'true_branch': <fuzzytree._fuzzy_tree.FuzzyTree at 0x7fea6eb90460>,
 'false_branch': <fuzzytree._fuzzy_tree.FuzzyTree at 0x7fea6eb90a30>}

In [124]:
fuzzy_dt.tree_.__weakref__

In [5]:
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [123]:
help(fuzzytree._fuzzy_tree.FuzzyTree)

Help on class FuzzyTree in module fuzzytree._fuzzy_tree:

class FuzzyTree(builtins.object)
 |  FuzzyTree(y, membership, level=0, rule=None, true_branch=None, false_branch=None)
 |  
 |  Fuzzy decision tree representation.
 |  
 |  Parameters
 |  ----------
 |  y : array-like of shape (n_samples,)
 |      The array of labels.
 |  membership : array-like of shape (n_samples,)
 |      The membership of samples that respective labels are
 |      coming from.
 |  level : int
 |      Depth of the node.
 |  rule : FuzzyDecisionRule, default=None
 |      The rule that was used to split this node. If None,
 |      then the node is a leaf.
 |  true_branch : FuzzyTree, default=None
 |      The child node containing labels of samples which
 |      memberships of were evaluated as non-zero by the
 |      fuzzy decision rule. If None, then the node is a leaf.
 |  false_branch : FuzzyTree, default=None
 |      The child node containing labels of samples which
 |      memberships of were evaluated as 

In [1]:
#######################################################################################################################################
###################################################### CONFIG FILE ####################################################################
#######################################################################################################################################
sleep_time = 0 #minutes


config = {
    'function_family': {
        'maximum_depth': 3,
        'fully_grown': True,       
        'balance': 0.5
        'balancing_tolerance': 0.05               
    }
    'data': {
        'number_of_variables': 3, 
        'num_classes': 2,
        
        'function_generation_type': 'make_classification', #'random'
        'objective': 'classification' # 'multiclass_classification', 'regression'
        
        'x_max': 1,
        'x_min': 0,
        'x_distrib': 'uniform', #'normal', 'uniform',       
        
        'same_training_all_lambda_nets': False,
        
        'lambda_dataset_size': 5000, #number of samples per polynomial
        'number_of_generated_datasets': 10000,
    }, 
    'computation':{
        'n_jobs': 5,
        'use_gpu': False,
        'gpu_numbers': '0',
        'RANDOM_SEED': 0,   
    }
}


In [2]:
#######################################################################################################################################
########################################### IMPORT GLOBAL VARIABLES FROM CONFIG #######################################################
#######################################################################################################################################
globals().update(config['data'])
globals().update(config['computation'])

## Imports

In [3]:
from itertools import product       # forms cartesian products
from more_itertools import random_product 
from tqdm import tqdm_notebook as tqdm
import pickle

import numpy as np

import pandas as pd
from joblib import Parallel, delayed

import random 
from random import sample 

import os
import sys

from sklearn.datasets import make_regression
from sklearn.preprocessing import MinMaxScaler

from sympy import Symbol, sympify

        
import seaborn as sns
        
import random 

import warnings

from time import perf_counter

In [4]:
#######################################################################################################################################
###################################################### SET VARIABLES + DESIGN #########################################################
#######################################################################################################################################
sns.set_style("darkgrid")

random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
    
    
pd.set_option('display.float_format', lambda x: '%.3f' % x)
pd.set_option('display.max_columns', 500)

warnings.filterwarnings('ignore')

In [6]:
from utilities.utility_functions import *
#######################################################################################################################################
####################################################### CONFIG ADJUSTMENTS ############################################################
#######################################################################################################################################

    
#######################################################################################################################################
################################################## UPDATE VARIABLES ###################################################################
#######################################################################################################################################
globals().update(config['data'])
globals().update(config['computation'])

initialize_utility_functions_config_from_curent_notebook(config)

#######################################################################################################################################
###################################################### PATH + FOLDER CREATION #########################################################
#######################################################################################################################################
globals().update(generate_paths(path_type='data_creation'))
generate_directory_structure()

#######################################################################################################################################
############################################################ SLEEP TIMER ##############################################################
#######################################################################################################################################
sleep_minutes(sleep_time)

<class 'KeyError'>
<class 'KeyError'>


In [7]:
print(path_identifier_polynomial_data)

poly_1000_train_5000_var_15_d_3_negd_0_prob_0_spars_15_amin_-1_amax_1_xdist_uniform_noise_normal_0bmin0.2bmax0.4lowd0.5azero0.25arand0.1_diffX


# Function Generation

In [None]:
def generate_random_decision_tree():
    
    
    
number_of_variables,
maximum_depth,
num_classes,
fully_grown
    

In [None]:
def generate_decision_tree_data(n_samples, noise, noise_dist, seed):
    
    decision_tree = generate_random_decision_tree()
    
    return decision_tree, X_data, y_data

In [10]:
if same_training_all_lambda_nets:
    parallel = Parallel(n_jobs=n_jobs, verbose=3, backend='multiprocessing')
    result_list = parallel(delayed(generate_decision_tree_data)(polynomial_array=list_of_polynomials[i], 
                                                               n_samples=lambda_dataset_size,
                                                               noise=noise,
                                                               noise_dist=noise_distrib, 
                                                               seed=RANDOM_SEED, 
                                                               sympy_calculation=False) for i in range(polynomial_data_size))  
else:
    parallel = Parallel(n_jobs=n_jobs, verbose=3, backend='multiprocessing')
    result_list = parallel(delayed(gen_regression_symbolic)(polynomial_array=list_of_polynomials[i], 
                                                               n_samples=lambda_dataset_size,
                                                               noise=noise,
                                                               noise_dist=noise_distrib, 
                                                               seed=RANDOM_SEED+i, 
                                                               sympy_calculation=False) for i in range(polynomial_data_size))

X_data_list = [[pd.Series(result[0],  index=list_of_monomial_identifiers_string), pd.DataFrame(result[1], columns=list(variables[:n]))] for result in result_list]
y_data_list = [[pd.Series(result[0],  index=list_of_monomial_identifiers_string), pd.DataFrame(result[2], columns=['result'])] for result in result_list]



[Parallel(n_jobs=5)]: Using backend MultiprocessingBackend with 5 concurrent workers.
[Parallel(n_jobs=5)]: Done  22 tasks      | elapsed:   15.2s
[Parallel(n_jobs=5)]: Done 118 tasks      | elapsed:   37.9s
[Parallel(n_jobs=5)]: Done 278 tasks      | elapsed:  1.2min
[Parallel(n_jobs=5)]: Done 502 tasks      | elapsed:  2.0min
[Parallel(n_jobs=5)]: Done 790 tasks      | elapsed:  3.3min
[Parallel(n_jobs=5)]: Done 1000 out of 1000 | elapsed:  3.9min finished


In [11]:
X_data_list[0][0].head(10)

300000000000000   0.000
210000000000000   0.000
201000000000000   0.000
200100000000000   0.000
200010000000000   0.000
200001000000000   0.000
200000100000000   0.000
200000010000000   0.000
200000001000000   0.000
200000000100000   0.000
dtype: float64

In [12]:
X_data_list[0][1].head(10)

Unnamed: 0,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o
0,0.549,0.715,0.603,0.545,0.424,0.646,0.438,0.892,0.964,0.383,0.792,0.529,0.568,0.926,0.071
1,0.087,0.02,0.833,0.778,0.87,0.979,0.799,0.461,0.781,0.118,0.64,0.143,0.945,0.522,0.415
2,0.265,0.774,0.456,0.568,0.019,0.618,0.612,0.617,0.944,0.682,0.36,0.437,0.698,0.06,0.667
3,0.671,0.21,0.129,0.315,0.364,0.57,0.439,0.988,0.102,0.209,0.161,0.653,0.253,0.466,0.244
4,0.159,0.11,0.656,0.138,0.197,0.369,0.821,0.097,0.838,0.096,0.976,0.469,0.977,0.605,0.739
5,0.039,0.283,0.12,0.296,0.119,0.318,0.414,0.064,0.692,0.567,0.265,0.523,0.094,0.576,0.929
6,0.319,0.667,0.132,0.716,0.289,0.183,0.587,0.02,0.829,0.005,0.678,0.27,0.735,0.962,0.249
7,0.576,0.592,0.572,0.223,0.953,0.447,0.846,0.699,0.297,0.814,0.397,0.881,0.581,0.882,0.693
8,0.725,0.501,0.956,0.644,0.424,0.606,0.019,0.302,0.66,0.29,0.618,0.429,0.135,0.298,0.57
9,0.591,0.574,0.653,0.652,0.431,0.897,0.368,0.436,0.892,0.806,0.704,0.1,0.919,0.714,0.999


In [13]:
y_data_list[0][0].head(10)

300000000000000   0.000
210000000000000   0.000
201000000000000   0.000
200100000000000   0.000
200010000000000   0.000
200001000000000   0.000
200000100000000   0.000
200000010000000   0.000
200000001000000   0.000
200000000100000   0.000
dtype: float64

In [14]:
y_data_list[0][1].head(10)

Unnamed: 0,result
0,0.149
1,0.25
2,0.05
3,-0.125
4,-0.085
5,0.198
6,0.083
7,0.247
8,0.587
9,-0.159


In [15]:
path_polynomials = './data/saved_polynomial_lists/polynomials_sample_' + path_identifier_polynomial_data + '.csv'
polynomials_list_df.to_csv(path_polynomials, index=False)

path_X_data = './data/saved_polynomial_lists/X_sample_' + path_identifier_polynomial_data + '.pkl'
with open(path_X_data, 'wb') as f:
    pickle.dump(X_data_list, f)#, protocol=2)
    
path_y_data = './data/saved_polynomial_lists/y_sample_' + path_identifier_polynomial_data + '.pkl'
with open(path_y_data, 'wb') as f:
    pickle.dump(y_data_list, f)#, protocol=2)
