In [4]:
import torch
import numpy as np
from torch import nn 
random_input = torch.randn(size=(100,16))

In [None]:
import torch
import numpy as np
from torch import nn 
random_input = torch.randn(size=(100,16))

In [112]:
params = dict(n_tree=5,
     tree_depth=3,
     n_in_feature=5,
     tree_feature_rate=0.1,
     n_class=2,
     jointly_training=True)

In [15]:
n_in_feature = random_input.shape[1]
depth = 3 
n_leaf = 2 ** depth
used_feature_rate = 0.4
n_class = 2 
n_used_feature = int(n_in_feature*used_feature_rate)

In [118]:
tree_params=dict(
    depth=depth,
    n_in_feature = random_input.shape[1],
    used_feature_rate = 0.4,
    n_class=2, 
    jointly_training=True
)

In [7]:
onehot = np.eye(n_in_feature)

In [11]:
using_idx = np.random.choice(np.arange(n_in_feature), n_used_feature, replace=False)
using_idx

array([ 1, 14,  5,  2,  0, 12])

In [14]:
feature_mask = onehot[using_idx].T
from torch.nn.parameter import Parameter
feature_mask = Parameter(torch.from_numpy(feature_mask).type(torch.FloatTensor),requires_grad=False)

In [17]:
pi = np.random.rand(n_leaf,n_class)
pi = Parameter(torch.from_numpy(pi).type(torch.FloatTensor),requires_grad=True)
pi

Parameter containing:
tensor([[0.4753, 0.0416],
        [0.7423, 0.3228],
        [0.4248, 0.3834],
        [0.5319, 0.6228],
        [0.3969, 0.5138],
        [0.9407, 0.6633],
        [0.0394, 0.9293],
        [0.2009, 0.4902]], requires_grad=True)

In [18]:
from collections import OrderedDict
decision = nn.Sequential(OrderedDict([
                        ('linear1',nn.Linear(n_used_feature,n_leaf)),
                        ('sigmoid', nn.Sigmoid()),
                        ]))
decision

Sequential(
  (linear1): Linear(in_features=6, out_features=8, bias=True)
  (sigmoid): Sigmoid()
)

In [22]:
feature_mask.shape , random_input.shape

(torch.Size([16, 6]), torch.Size([100, 16]))

In [2]:
from torch import nn 
class Tree(nn.Module) :
    def __init__(self, depth, n_in_feature, used_feature_rate, n_class , jointly_training=True) :
        self.depth = depth
        self.n_leaf = 2 ** depth
        self.n_class = n_class
        self.jointly_training = jointly_training
        # used features in this tree
        n_used_feature = int(n_in_feature*used_feature_rate)
        onehot = np.eye(n_in_feature)
        using_idx = np.random.choice(np.arange(n_in_feature), n_used_feature, replace=False)
        self.feature_mask = onehot[using_idx].T
        self.feature_mask = Parameter(torch.from_numpy(self.feature_mask).type(torch.FloatTensor),requires_grad=False)
        # leaf label distribution
        if jointly_training:
            self.pi = np.random.rand(self.n_leaf,n_class)
            self.pi = Parameter(torch.from_numpy(self.pi).type(torch.FloatTensor),requires_grad=True)
        else:
            self.pi = np.ones((self.n_leaf, n_class)) / n_class
            self.pi = Parameter(torch.from_numpy(self.pi).type(torch.FloatTensor), requires_grad=False)
        
        self.decision = nn.Sequential(OrderedDict([
                        ('linear1',nn.Linear(n_used_feature,self.n_leaf)),
                        ('sigmoid', nn.Sigmoid()),
                        ]))
    
    def forward(self,x):
        """
        :param x(Variable): [batch_size,n_features]
        :return: route probability (Variable): [batch_size,n_leaf]
        """
        if x.is_cuda and not self.feature_mask.is_cuda:
            self.feature_mask = self.feature_mask.cuda()

        feats = torch.mm(x,self.feature_mask) # ->[batch_size,n_used_feature]
        decision = self.decision(feats) # ->[batch_size,n_leaf]

        decision = torch.unsqueeze(decision,dim=2)
        decision_comp = 1-decision
        decision = torch.cat((decision,decision_comp),dim=2) # -> [batch_size,n_leaf,2]

        # compute route probability
        # note: we do not use decision[:,0]
        batch_size = x.size()[0]
        _mu = Variable(x.data.new(batch_size,1,1).fill_(1.))
        begin_idx = 1
        end_idx = 2
        for n_layer in range(0, self.depth):
            _mu = _mu.view(batch_size,-1,1).repeat(1,1,2)
            _decision = decision[:, begin_idx:end_idx, :]  # -> [batch_size,2**n_layer,2]
            _mu = _mu*_decision # -> [batch_size,2**n_layer,2]
            begin_idx = end_idx
            end_idx = begin_idx + 2 ** (n_layer+1)

        mu = _mu.view(batch_size,self.n_leaf)

        return mu

    def get_pi(self):
        if self.jointly_training:
            return F.softmax(self.pi,dim=-1)
        else:
            return self.pi

    def cal_prob(self,mu,pi):
        """
        :param mu [batch_size,n_leaf]
        :param pi [n_leaf,n_class]
        :return: label probability [batch_size,n_class]
        """
        p = torch.mm(mu,pi)
        return p


    def update_pi(self,new_pi):
        self.pi.data=new_pi

import numpy as np
tree_param = dict(depth = 3 ,
                  n_in_feature = 16 ,
                  used_feature_rate = 0.4 ,
                  n_class = 2 , 
                  jointly_training=True )
Tree(**tree_param)

In [26]:
feature_mask

Parameter containing:
tensor([[0., 0., 0., 0., 1., 0.],
        [1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.]])

In [115]:
class Tree(nn.Module):
    def __init__(self,depth,n_in_feature,used_feature_rate,n_class, jointly_training=True,**kwargs):
        super(Tree, self).__init__()
        self.depth = depth
        self.n_leaf = 2 ** depth
        self.n_class = n_class
        self.jointly_training = jointly_training

        # used features in this tree
        n_used_feature = int(n_in_feature*used_feature_rate)
        onehot = np.eye(n_in_feature)
        using_idx = np.random.choice(np.arange(n_in_feature), n_used_feature, replace=False)
        self.feature_mask = onehot[using_idx].T
        self.feature_mask = Parameter(torch.from_numpy(self.feature_mask).type(torch.FloatTensor),requires_grad=False)
        # leaf label distribution
        if jointly_training:
            self.pi = np.random.rand(self.n_leaf,n_class)
            self.pi = Parameter(torch.from_numpy(self.pi).type(torch.FloatTensor),requires_grad=True)
        else:
            self.pi = np.ones((self.n_leaf, n_class)) / n_class
            self.pi = Parameter(torch.from_numpy(self.pi).type(torch.FloatTensor), requires_grad=False)

        # decision
        self.decision = nn.Sequential(OrderedDict([
                        ('linear1',nn.Linear(n_used_feature,self.n_leaf)),
                        ('sigmoid', nn.Sigmoid()),
                        ]))

    def forward(self,x):
        """
        :param x(Variable): [batch_size,n_features]
        :return: route probability (Variable): [batch_size,n_leaf]
        """
        if x.is_cuda and not self.feature_mask.is_cuda:
            self.feature_mask = self.feature_mask.cuda()
        feats = torch.einsum("ij,jk->ik",[x,self.feature_mask]) # ->[batch_size,n_used_feature]
        decision = self.decision(feats) # ->[batch_size,n_leaf]

        decision = torch.unsqueeze(decision,dim=2)
        decision_comp = 1-decision
        decision = torch.cat((decision,decision_comp),dim=2) # -> [batch_size,n_leaf,2]

        # compute route probability
        # note: we do not use decision[:,0]
        batch_size = x.size()[0]
        _mu = Variable(x.data.new(batch_size,1,1).fill_(1.))
        begin_idx = 1
        end_idx = 2
        for n_layer in range(0, self.depth):
            _mu = _mu.view(batch_size,-1,1).repeat(1,1,2)
            _decision = decision[:, begin_idx:end_idx, :]  # -> [batch_size,2**n_layer,2]
            _mu = _mu*_decision # -> [batch_size,2**n_layer,2]
            begin_idx = end_idx
            end_idx = begin_idx + 2 ** (n_layer+1)

        mu = _mu.view(batch_size,self.n_leaf)

        return mu

    def get_pi(self):
        if self.jointly_training:
            return F.softmax(self.pi,dim=-1)
        else:
            return self.pi

    def cal_prob(self,mu,pi):
        """
        :param mu [batch_size,n_leaf]
        :param pi [n_leaf,n_class]
        :return: label probability [batch_size,n_class]
        """
        p=torch.einsum("ij,jk->ik",[mu,pi])
        return p


    def update_pi(self,new_pi):
        self.pi.data=new_pi

In [116]:
params

{'n_tree': 5,
 'tree_depth': 3,
 'n_in_feature': 5,
 'tree_feature_rate': 0.1,
 'n_class': 2,
 'jointly_training': True}

In [77]:
feats = torch.einsum("ij,jk->ik",[random_input, feature_mask])
decision_ = decision(feats)
decision_.shape

torch.Size([100, 8])

In [80]:
decision_ = torch.unsqueeze(decision_,dim=2)
decision_comp = 1-decision_
decision__ = torch.cat((decision_,decision_comp),dim=2)

In [82]:
decision__.size()

torch.Size([100, 8, 2])

In [83]:
from torch.autograd import Variable
batch_size = random_input.size()[0]
_mu = Variable(random_input.data.new(batch_size,1,1).fill_(1.))
_mu.shape
_mu.view(batch_size,-1,1).repeat(1,1,2).shape

torch.Size([100, 1, 2])

In [62]:
decision__.size()

torch.Size([100, 8, 2])

In [84]:

begin_idx = 1
end_idx = 2
for n_layer in range(0, depth):
    _mu = _mu.view(batch_size,-1,1).repeat(1,1,2)
    _decision = decision__[:, begin_idx:end_idx, :]  # -> [batch_size,2**n_layer,2]
    _mu = _mu*_decision # -> [batch_size,2**n_layer,2]
    begin_idx = end_idx
    end_idx = begin_idx + 2 ** (n_layer+1)
else :
    print(_mu.shape) 


torch.Size([100, 4, 2])


In [85]:
n_leaf

8

In [86]:
mu = _mu.view(batch_size,n_leaf)
mu

tensor([[0.0605, 0.0966, 0.1787, 0.1497, 0.1074, 0.1621, 0.1098, 0.1351],
        [0.0964, 0.1322, 0.1857, 0.1255, 0.1015, 0.1377, 0.1179, 0.1030],
        [0.0651, 0.2369, 0.3058, 0.1876, 0.0146, 0.0653, 0.0774, 0.0473],
        [0.1028, 0.1340, 0.2970, 0.1987, 0.0311, 0.0921, 0.1208, 0.0235],
        [0.1664, 0.1851, 0.2532, 0.2544, 0.0073, 0.0595, 0.0495, 0.0244],
        [0.0856, 0.0672, 0.0537, 0.0486, 0.3204, 0.1545, 0.0585, 0.2115],
        [0.0615, 0.0280, 0.0986, 0.1576, 0.2292, 0.2623, 0.1078, 0.0550],
        [0.1315, 0.1039, 0.1762, 0.2412, 0.0574, 0.1838, 0.0577, 0.0483],
        [0.1121, 0.0781, 0.1231, 0.1610, 0.1093, 0.1917, 0.1227, 0.1018],
        [0.0360, 0.0522, 0.2684, 0.1950, 0.1055, 0.1742, 0.1302, 0.0385],
        [0.0532, 0.0617, 0.0780, 0.0685, 0.2224, 0.1394, 0.1287, 0.2479],
        [0.1331, 0.1584, 0.1673, 0.1108, 0.0838, 0.1203, 0.1148, 0.1115],
        [0.0803, 0.0579, 0.0542, 0.0746, 0.2691, 0.2063, 0.0588, 0.1989],
        [0.1183, 0.1172, 0.1389, 0.165

In [87]:
import torch.nn.functional as F
pi_prob = F.softmax(pi,dim=-1)

In [88]:
mu.size() ,pi_prob.size()

(torch.Size([100, 8]), torch.Size([8, 2]))

In [122]:
tree1 = Tree(**tree_params)
tree2 = Tree(**tree_params)
mu = tree1(random_input)
p1=tree1.cal_prob(mu,tree1.get_pi())
mu = tree2(random_input)
p2=tree2.cal_prob(mu,tree2.get_pi())


In [128]:
torch.einsum("ijk,ikj->ij",[p1,p2])

RuntimeError: einsum() the number of subscripts in the equation (3) does not match the number of dimensions (2) for operand 0 and no ellipsis was given

In [124]:
print(torch.cat([p1.unsqueeze(2),p2.unsqueeze(2)],dim=2).size())
torch.sum(torch.cat([p1.unsqueeze(2),p2.unsqueeze(2)],dim=2),dim=2)/2

torch.Size([100, 2, 2])


tensor([[0.5345, 0.4655],
        [0.5357, 0.4643],
        [0.5222, 0.4778],
        [0.5030, 0.4970],
        [0.5286, 0.4714],
        [0.5199, 0.4801],
        [0.5164, 0.4836],
        [0.5307, 0.4693],
        [0.5256, 0.4744],
        [0.5149, 0.4851],
        [0.5164, 0.4836],
        [0.5329, 0.4671],
        [0.5195, 0.4805],
        [0.5173, 0.4827],
        [0.5278, 0.4722],
        [0.5162, 0.4838],
        [0.5209, 0.4791],
        [0.5380, 0.4620],
        [0.5176, 0.4824],
        [0.5242, 0.4758],
        [0.5251, 0.4749],
        [0.5326, 0.4674],
        [0.5315, 0.4685],
        [0.5157, 0.4843],
        [0.5245, 0.4755],
        [0.5232, 0.4768],
        [0.5146, 0.4854],
        [0.5280, 0.4720],
        [0.5199, 0.4801],
        [0.5237, 0.4763],
        [0.5133, 0.4867],
        [0.5258, 0.4742],
        [0.5354, 0.4646],
        [0.5128, 0.4872],
        [0.5269, 0.4731],
        [0.5246, 0.4754],
        [0.5340, 0.4660],
        [0.5190, 0.4810],
        [0.5

In [None]:
import torch
import numpy as np
from torch import nn 
random_input = torch.randn(size=(100,16))

In [109]:
p = torch.einsum("ij,jk->ik",[mu,pi_prob])
torch.einsum("ij,ik->ik",[p,p])

tensor([[0.4903, 0.5097],
        [0.4975, 0.5025],
        [0.5148, 0.4852],
        [0.5019, 0.4981],
        [0.5255, 0.4745],
        [0.4894, 0.5106],
        [0.4918, 0.5082],
        [0.5166, 0.4834],
        [0.4947, 0.5053],
        [0.4866, 0.5134],
        [0.4697, 0.5303],
        [0.5037, 0.4963],
        [0.4932, 0.5068],
        [0.5177, 0.4823],
        [0.4954, 0.5046],
        [0.4930, 0.5070],
        [0.5147, 0.4853],
        [0.5192, 0.4808],
        [0.4742, 0.5258],
        [0.5038, 0.4962],
        [0.4965, 0.5035],
        [0.5081, 0.4919],
        [0.4759, 0.5241],
        [0.5158, 0.4842],
        [0.5018, 0.4982],
        [0.5028, 0.4972],
        [0.4897, 0.5103],
        [0.5132, 0.4868],
        [0.4981, 0.5019],
        [0.4609, 0.5391],
        [0.4769, 0.5231],
        [0.5001, 0.4999],
        [0.5108, 0.4892],
        [0.5163, 0.4837],
        [0.5115, 0.4885],
        [0.5213, 0.4787],
        [0.4847, 0.5153],
        [0.4702, 0.5298],
        [0.5