In [44]:
from utils import datasets
from utils.parser import * 
import os 
import numpy as np 
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
import networkx as nx 
import torch
from torch import nn 
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import average_precision_score

In [97]:
def get_structure_from_adajancency(adajancency):
    structure = np.zeros(adajancency.shape)
    g = nx.DiGraph(adajancency) # train.A is the matrix where the direct connections are stored 
    for i in range(len(adajancency)):
        ancestors = list(nx.descendants(g, i)) #here we need to use the function nx.descendants() because in the directed graph the edges have source from the descendant and point towards the ancestor 
        if ancestors:
            structure[i, ancestors] = 1
    return structure 

def mymax(prob, R):
    R1 = (R + np.eye(R.shape[1])).astype(np.float32)
    R1 = torch.from_numpy(R1).unsqueeze(0)
    prob1 = prob.unsqueeze(-1)
    prob1, idx = torch.max(R1 * prob1, dim=1)
    return prob1

dataset_name = 'seq_FUN'
train, valid, test = initialize_dataset(dataset_name, datasets)
R = get_structure_from_adajancency(train.A)

scalar = StandardScaler().fit(train.X)
imputer = SimpleImputer(missing_values=np.nan, strategy='mean').fit(train.X)
train.X = scalar.transform(imputer.transform(train.X))
valid.X = scalar.transform(imputer.transform(valid.X))
test.X = scalar.transform(imputer.transform(test.X))

In [98]:
class HMCModel(nn.Module):
    def __init__(self, inp_dim, hid_dims, out_dim, drop_rate, R):
        super(HMCModel, self).__init__()
        self.hid_dims = hid_dims
        self.R = R 
        self.W = list()
        for i in range(len(hid_dims)):
            if i == 0:
                self.W.append(nn.Linear(inp_dim, hid_dims[i]))
            else:
                self.W.append(nn.Linear(hid_dims[i-1], hid_dims[i]))
        self.W.append(nn.Linear(hid_dims[-1], out_dim))
        self.W = nn.ModuleList(self.W)
        self.drop = nn.Dropout(drop_rate)
    def forward(self, x):
        for i in range(len(self.hid_dims)):
            x = self.W[i](x)
            x = nn.ReLU()(x)
            x = self.drop(x)
        x = self.W[-1](x)
        x = nn.Sigmoid()(x)
        if self.training:
            outputs = x 
        else:
            outputs = mymax(x, self.R)
        return outputs

In [99]:
train.X = torch.tensor(train.X, dtype=torch.float32)
valid.X = torch.tensor(valid.X, dtype=torch.float32)
test.X = torch.tensor(test.X, dtype=torch.float32)
train.Y = torch.tensor(train.Y)
valid.Y = torch.tensor(valid.Y)
test.Y = torch.tensor(test.Y)
train.to_eval = torch.tensor(train.to_eval, dtype=torch.uint8),  
test.to_eval = torch.tensor(test.to_eval, dtype=torch.uint8)

train_dataset = [(x,y) for (x,y) in zip(train.X, train.Y)]
val_dataset = [(x, y) for (x, y) in zip(valid.X, valid.Y)]
for (x, y) in zip(valid.X, valid.Y):
    train_dataset.append((x,y))
test_dataset = [(x,y) for (x,y) in zip(test.X, test.Y)]
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                            batch_size=4, 
                                            shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                            batch_size=4, 
                                            shuffle=False)
model = HMCModel(train.X.shape[1], [2000,2000], 500, 0.7, R)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5)

In [100]:
for epoch in range(13):
        model.train()

        for i, (x, labels) in enumerate(train_loader):
            # Clear gradients w.r.t. parameters
            optimizer.zero_grad()
            output = model(x.float())

            #MCLoss
            constr_output = mymax(output, R)
            train_output = labels*output.double()
            train_output = mymax(train_output, R)
            train_output = (1-labels)*constr_output.double() + labels*train_output

            loss = nn.BCELoss()(train_output[:,train.to_eval[0]], labels[:,train.to_eval[0]]) 

            predicted = constr_output.data > 0.5

            # Total number of labels
            total_train = labels.size(0) * labels.size(1)
            # Total correct predictions
            correct_train = (predicted == labels.byte()).sum()

            loss.backward()
            optimizer.step()

for i, (x,y) in enumerate(test_loader):
    model.eval()

    constrained_output = model(x.float())
    predicted = constrained_output.data > 0.5
    # Total number of labels
    total = y.size(0) * y.size(1)
    # Total correct predictions
    correct = (predicted == y.byte()).sum()

    if i == 0:
        predicted_test = predicted
        constr_test = constrained_output
        y_test = y
    else:
        predicted_test = torch.cat((predicted_test, predicted), dim=0)
        constr_test = torch.cat((constr_test, constrained_output), dim=0)
        y_test = torch.cat((y_test, y), dim =0)

In [101]:
y_true = y_test[:,torch.where(test.to_eval == 1)[0].numpy()].numpy()
y_pred = constr_test[:,torch.where(test.to_eval == 1)[0].numpy()].data.numpy()
score = average_precision_score(y_true, y_pred, average='micro')
print(score)

0.2895981035027354


# Model from paper

In [89]:
def get_constr_out(x, R):
    """ Given the output of the neural network x returns the output of MCM given the hierarchy constraint expressed in the matrix R """
    c_out = x.double()
    c_out = c_out.unsqueeze(1)
    c_out = c_out.expand(len(x),R.shape[1], R.shape[1])
    R_batch = R.expand(len(x),R.shape[1], R.shape[1])
    final_out, _ = torch.max(R_batch*c_out.double(), dim = 2)
    return final_out


class ConstrainedFFNNModel(nn.Module):
    """ C-HMCNN(h) model - during training it returns the not-constrained output that is then passed to MCLoss """
    def __init__(self, input_dim, hidden_dim, output_dim, hyperparams, R):
        super(ConstrainedFFNNModel, self).__init__()
        
        self.nb_layers = hyperparams['num_layers']
        self.R = R
        
        fc = []
        for i in range(self.nb_layers):
            if i == 0:
                fc.append(nn.Linear(input_dim, hidden_dim))
            elif i == self.nb_layers-1:
                fc.append(nn.Linear(hidden_dim, output_dim))
            else:
                fc.append(nn.Linear(hidden_dim, hidden_dim))
        self.fc = nn.ModuleList(fc)
        
        self.drop = nn.Dropout(hyperparams['dropout'])
        
        
        self.sigmoid = nn.Sigmoid()
        if hyperparams['non_lin'] == 'tanh':
            self.f = nn.Tanh()
        else:
            self.f = nn.ReLU()
        
    def forward(self, x):
        for i in range(self.nb_layers):
            if i == self.nb_layers-1:
                x = self.sigmoid(self.fc[i](x))
            else:
                x = self.f(self.fc[i](x))
                x = self.drop(x)
        if self.training:
            constrained_out = x
        else:
            constrained_out = get_constr_out(x, self.R)
        return constrained_out

In [90]:
batch_size = 4
num_layers = 3
dropout = 0.7
non_lin = 'relu'
hidden_dim = 2000
lr = 1e-4
weight_decay = 1e-5
num_epochs = 13
hyperparams = {'batch_size':batch_size, 'num_layers':num_layers, 'dropout':dropout, 'non_lin':non_lin, 'hidden_dim':hidden_dim, 'lr':lr, 'weight_decay':weight_decay}


In [92]:
train, val, test = initialize_dataset('seq_FUN', datasets)
train.to_eval, val.to_eval, test.to_eval = torch.tensor(train.to_eval, dtype=torch.uint8), torch.tensor(val.to_eval, dtype=torch.uint8), torch.tensor(test.to_eval, dtype=torch.uint8)

R = np.zeros(train.A.shape)
np.fill_diagonal(R, 1)
g = nx.DiGraph(train.A) # train.A is the matrix where the direct connections are stored 
for i in range(len(train.A)):
    ancestors = list(nx.descendants(g, i)) #here we need to use the function nx.descendants() because in the directed graph the edges have source from the descendant and point towards the ancestor 
    if ancestors:
        R[i, ancestors] = 1
R = torch.tensor(R)
#Transpose to get the descendants for each node 
R = R.transpose(1, 0)

scaler = StandardScaler().fit(np.concatenate((train.X, val.X)))
imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean').fit(np.concatenate((train.X, val.X)))
val.X, val.Y = torch.tensor(scaler.transform(imp_mean.transform(val.X))), torch.tensor(val.Y)
train.X, train.Y = torch.tensor(scaler.transform(imp_mean.transform(train.X))), torch.tensor(train.Y)       
test.X, test.Y = torch.tensor(scaler.transform(imp_mean.transform(test.X))), torch.tensor(test.Y)


In [96]:
train_dataset = [(x, y) for (x, y) in zip(train.X, train.Y)]
if ('others' not in args.dataset):
    val_dataset = [(x, y) for (x, y) in zip(val.X, val.Y)]
    for (x, y) in zip(val.X, val.Y):
        train_dataset.append((x,y))
test_dataset = [(x, y) for (x, y) in zip(test.X, test.Y)]

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                        batch_size=batch_size, 
                                        shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                        batch_size=batch_size, 
                                        shuffle=False)

tensor([[0., 1., 0.,  ..., 0., 0., 0.],
        [0., 1., 1.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], dtype=torch.float64)