### REQUIRE

In [91]:
!pip install torchviz

### Utils

In [92]:
import os
import shutil

class Util_class():

  """
  dict_in : dictionary
  list_key : list of key
  return True if ALL key in list_key is in dict
  """
  @staticmethod
  def check_key_in_dict(dict_in, list_key):
    is_in = True
    list_not_in = []
    for key in list_key:
      if key not in dict_in:
        is_in = False
        list_not_in.append(key)
    return [is_in,list_not_in]

  @staticmethod
  def same_key_in_dict(dict_in, list_key):
    is_in = True
    key_not_dict = []
    key_not_list = []

    for key in list_key:
      if key not in dict_in:
        is_in = False
        key_not_dict.append(key)

    for key in dict_in:
      if key not in list_key:
        is_in = False
        key_not_list.append(key)
    return [is_in,key_not_dict,key_not_list]

  @staticmethod
  def folder_manage(path, uniquify=True,clean=False, force=False):
    last_folder = os.path.basename(os.path.normpath(path))
    head_path = os.path.dirname(os.path.normpath(path))

    #head of path exist
    if os.path.exists(head_path):
        #path last folder not exist
        if not os.path.exists(path):
            os.makedirs(path)
            return os.path.normpath(path)
        #path last folder  exist
        else:
            if uniquify:
                counter = 1
                while os.path.exists(path):
                    path = head_path + "/" + last_folder  + "(" + str(counter) + ")"
                    counter += 1
                os.makedirs(path)
            #empty last folder
            elif clean:
                if force:
                    shutil.rmtree(path)
                    os.makedirs(path)
                else:
                    print(f'Enter YES or Y to delete all file or directory from: {path}')
                    input_clean = input()
                    if input_clean in ["YES","Y","yes","y"]:
                        shutil.rmtree(path)
                        os.makedirs(path)
                    else:
                        raise Util_class_folder_manage_forceDelete(path)
        return os.path.normpath(path)
    else:
        print(f'Enter YES or Y to create directories: {path}')
        input_clean = input()
        if input_clean in ["YES","Y","yes","y"]:
            os.makedirs(path)
            return os.path.normpath(path)
        else:
            raise Util_class_folder_manage_dirnameNotExist(head_path)

class Util_class_folder_manage_dirnameNotExist(Exception):
    """Exception raised for errors in activation function type"""

    def __init__(self, value):
        self.value = value

    def __str__(self):
        return f"Directory name '{self.value}' not exist."

class Util_class_folder_manage_forceDelete(Exception):
    """Exception raised for errors in activation function type"""

    def __init__(self, value):
        self.value = value

    def __str__(self):
        return f"Not possible force clean the folder: '{self.value}'."

### Load data

In [93]:
import json
import networkx as nx
import numpy as np
from collections import OrderedDict

class LoadDataset():

    def __init__(self,edge_file_name,attribute_file_name,label_file_name,attribute_file_format="normalized_matrix",is_directed_graph=False):
        """
        edge_file : file with all edge (pair of nodes)
        attribute_file : file with all attribute
        label_file :
        attribute_file_format : format od attribute data:
              "normal_matrix" : each row is alredy a frequency normalizzated vector (DEFAULT) es: CORA dataset
              "naive_text" : each row is item text description
        is_directed_graph : boolean, if true is a direct graph else (DEFAULT) is a undirect graph

        """
        self.is_directed_graph = is_directed_graph
        #input shape
        self.input_shape = dict()

        #Structural preprocessing
        self.edge_file_name = edge_file_name
        self.graph = self.edge_createGraph()
        self.edge_adj_matrix = np.array(nx.to_numpy_array(self.graph, nodelist=sorted(self.graph.nodes())))

        #Attribute preprocessing
        self.attribute_file_name = attribute_file_name
        self.attribute_adj_matrix = np.array(self.attribute_createMatrix(attribute_file_format))

        #Class preprocessing
        self.label_file_name = label_file_name
        self.label_vec = self.labels_createVector()




    def get_structural_matrix(self):
        return self.edge_adj_matrix

    def get_attribute_matrix(self):
        return self.attribute_adj_matrix

    def get_vector_matrix(self):
        return self.label_vec

    def get_graph(self):
        return self.graph

    def export_graph(self, pathfile, filename, extention="graphml"):
        path = pathfile+'/'+filename+'.'+extention

        if extention == "graphml":
            nx.write_graphml( self.graph, path)
        elif extention == "gml":
            nx.write_gml( self.graph, path)
        else:
            raise LoadDataset_Exception_Graph_FormatExport_notRecognized(extention)
        return True

    def get_input_shape(self, key):
        return self.input_shape[key]

    def edge_createGraph(self):
        if self.is_directed_graph:
            g = nx.DiGraph()
        else:
            g = nx.Graph()
        try:
            with open(self.edge_file_name, 'r') as edge_file:
                for line in edge_file:
                    edge = line.split()
                    if len(edge) == 3:
                        edge_weight = float(edge[2])
                    else:
                        edge_weight = 1.0
                    if len(edge) == 1:
                        g.add_node(int(edge[0]))
                    else:
                        g.add_edge(int(edge[0]), int(edge[1]), weight = edge_weight)
        except Exception as e:
            raise e
        self.input_shape['net'] = g.number_of_nodes()
        print("Structure dimension:\t",self.input_shape['net'])
        return g

    def attribute_createMatrix(self, attribute_file_format):
        if attribute_file_format == "normalized_matrix":
            try:
                att_matrix = []
                with open(self.attribute_file_name, 'r') as att_file:
                    for line in att_file:
                      att_line = line.replace("\n", "").split(" ")[1:]
                      att_matrix.append([float(n) for n in att_line])
                self.input_shape['att'] = len(att_matrix[0])
                print("Attribute dimension:\t",self.input_shape['att'])
                return att_matrix
            except Exception as e:
                raise e
        elif attribute_file_format == "naive_text":
            print("naive_text to do")
            try:
                att_matrix = []
                with open(self.attribute_file_name, 'r') as att_file:
                    for line in att_file:
                        print(line)
                        break
                    corpus = json.load(att_file)
                    print(corpus)

                return 0
            except Exception as e:
                raise e
        else:
            raise(LoadDataset_Exception_Attribute_Format_notRecognized(attribute_file_format))


    def labels_createVector(self):
        try:
            with open(self.label_file_name, 'r') as label_file:
                node_label_dict = {}
                for line in label_file:
                    split_line = line.replace("\n", "").split(" ")
                    node_id = int(split_line[0])
                    node_label = int(split_line[1])
                    node_label_dict[node_id] = node_label
                # sort the keys (node_ids) of the dictionary
                node_label_dict = OrderedDict(sorted(node_label_dict.items(), key=lambda t: t[0]))
                labels = np.array(list(node_label_dict.values()))
                return labels
        except Exception as e:
            raise e

    def get_labels(self):
        return self.label_vec


class LoadDataset_Exception_Attribute_Format_notRecognized(Exception):
      """Exception raised for errors in list of layers type"""

      def __init__(self, value):
          self.value = value

      def __str__(self):
          return f'{self.value} : type of attribute file format not recognized.'

class LoadDataset_Exception_Graph_FormatExport_notRecognized(Exception):
      """Exception raised for errors in list of layers type"""

      def __init__(self, value):
          self.value = value

      def __str__(self):
          return f'{self.value} : graph format export not recognized.'

### Batch Generator

In [94]:
class DataBatchGenerator():

    def __init__(self, net, att, labels, batch_size, shuffle, net_hadmard_coeff, att_hadmard_coeff):
        self.net = net
        self.att = att
        self.labels = labels
        self.number_of_samples = len(att)
        self.batch_size = batch_size
        self.number_of_batches = self.number_of_samples // batch_size
        self.shuffle = shuffle
        self.net_hadmard_coeff = net_hadmard_coeff
        self.att_hadmard_coeff = att_hadmard_coeff

    def generate(self):
        sample_index = np.arange(self.net.shape[0])

        counter = 0
        if self.shuffle:
            np.random.shuffle(sample_index)

        while (counter*self.batch_size < self.number_of_samples):
            start_samples_index = self.batch_size * counter
            end_samples_index = self.batch_size * (counter + 1)

            #list of samples's index
            samples_index = sample_index[start_samples_index : end_samples_index]

            #submatrix of W and A, cut for sample index
            net_batch = self.net[samples_index, :]
            att_batch = self.att[samples_index, :]
            net_batch_adj = self.net[samples_index, :][:, samples_index]
            node_label = self.labels[samples_index]
            node_index = samples_index

            # B_net and B_att param of hadmard operation
            B_net = np.ones(net_batch.shape)
            B_net[net_batch != 0] = self.net_hadmard_coeff

            B_att = np.ones(att_batch.shape)
            B_att[att_batch != 0] = self.att_hadmard_coeff

            # trasform np array to tensor
            net_batch_tensor = torch.from_numpy(net_batch).float()
            att_batch_tensor = torch.from_numpy(att_batch).float()
            net_batch_adj_tensor = torch.from_numpy(net_batch_adj).float()
            B_net_tensor = torch.from_numpy(B_net).float()
            B_att_tensor = torch.from_numpy(B_att).float()

            inputs = [net_batch_tensor, att_batch_tensor, net_batch_adj_tensor]
            B_params = [B_net_tensor, B_att_tensor]
            batch_info = [node_index, node_label]

            # feed the fit() function with new data
            yield inputs, B_params, batch_info
            counter += 1

### Competitive Kate Layer

In [95]:
import torch
from torch import nn, optim
import warnings

class KCompetitiveLayer(nn.Module):
    """
      dim_imput :
      act : String, activation function "
    """

    def __init__(self, ktop, alpha_factor):

        super(KCompetitiveLayer, self).__init__()
        self.ktop = ktop
        self.alpha_factor = alpha_factor

    def forward(self,x):

        dim_input = x.size()[1]
        k = min(self.ktop, x.size(1))
        if k < self.ktop:
            warnings.warn(f"ktop > input dim; using k={k} instead")

        # Posivite neurons computation
        POS_ktop = int(self.ktop/2)
        POS_values = (x + torch.abs(x))/2
        POS_topk_values, POS_topk_indices = torch.topk(POS_values, k = POS_ktop)
        device = x.device
        batch_size = x.size(0)
        POS_topk_range = torch.arange(batch_size, device=device).unsqueeze(1).repeat(1, POS_ktop)       
        POS_full_indices = torch.reshape(torch.stack([POS_topk_range, POS_topk_indices], axis = 2), [-1, 2])
        POS_sparse_values = torch.reshape(POS_topk_values, [-1])
        POS_reset = torch.sparse_coo_tensor(indices = POS_full_indices.t(),values = POS_sparse_values, size = x.size()).to_dense()
        POS_tmp = self.alpha_factor * torch.sum(POS_values - POS_reset, 1, keepdims=True)
        POS_reset = torch.sparse_coo_tensor(indices = POS_full_indices.t(),values = torch.reshape(POS_topk_values+POS_tmp, [-1]), size = x.size()).to_dense()

        # Negative neurons computation
        NEG_ktop = self.ktop - int(self.ktop/2)
        NEG_values = (x - torch.abs(x))/2
        NEG_topk_values, NEG_topk_indices = torch.topk(-NEG_values,largest =True, k = NEG_ktop)
        NEG_topk_range = torch.tile(torch.unsqueeze(torch.arange(0, NEG_topk_indices.size()[0]), 1), [1, NEG_ktop])
        NEG_full_indices = torch.reshape(torch.stack([NEG_topk_range, NEG_topk_indices], axis = 2), [-1, 2])
        NEG_sparse_values = torch.reshape(NEG_topk_values, [-1])
        NEG_reset = torch.sparse_coo_tensor(indices = NEG_full_indices.t(),values = NEG_sparse_values, size = x.size()).to_dense()
        NEG_tmp = self.alpha_factor * torch.sum(-NEG_values - NEG_reset, 1, keepdims=True)
        NEG_reset = torch.sparse_coo_tensor(indices = NEG_full_indices.t(),values = torch.reshape(NEG_topk_values+NEG_tmp, [-1]), size = x.size()).to_dense()

        # ensamble parts
        total_reset = POS_reset - NEG_reset
        return total_reset

### AutoEncoder

In [96]:
import torch
from torch import nn, optim

class AutoEncoder(nn.Module):
    """
    param dim: original dimension
    param layers_list : sequential sort list of dict : Each item  have a value for "type", as:
                                          DENSE -  hidden layers, with: "features" is dimention of features in output, "act_funtion" is the relative activation funcion,'bias' boolean
                                          DROP  -  dropout, with: "prob" is the percentaul of neuro drop
                                          KCOMP -  kcompetitivelayer,, with "ktop":int #of active neurons at end of computation, "alpha_factor":float coefficent
    param latent_dim : dimension of latent space
    last_isSigm : boolean, True if last activation function of decoder is a sigmoid
    return : autoencoder model
    """

    def __init__(self, dim, layers_list, latent_dim,last_isSigm= True):

        super().__init__()
        self.activation = {}
        self.encoder_list=[]
        self.decoder_list=[]

        last_dim = dim
        for i,layer in enumerate(layers_list):
            if layer['type'] == "DROP":
                prob = layer['prob']
                if isinstance(prob, float) and 0 <= prob <= 1:
                    self.encoder_list.append(torch.nn.Dropout(p=prob))
                    self.decoder_list.insert(0,torch.nn.Dropout(p=prob))
                else:
                    raise AutoEncoder_Exception_DropoutProb(prob)

            elif layer['type'] == "DENSE":
                self.encoder_list.append(torch.nn.Linear(in_features=last_dim, out_features=layer['features'], bias=layer['bias']))
                if layer['act_funtion'] == "RELU":
                    self.encoder_list.append(torch.nn.ReLU())
                    decoder_layer_funact = torch.nn.ReLU()
                elif layer['act_funtion'] == "SIGM":
                    self.encoder_list.append(torch.nn.Sigmoid())
                    decoder_layer_funact = torch.nn.Sigmoid()
                else:
                    raise AutoEncoder_Exception_ActivationFunction(layer['act_funtion'])

                if i == 0 and last_isSigm:
                  decoder_layer_funact = torch.nn.Sigmoid()
                self.decoder_list.insert(0,decoder_layer_funact)
                self.decoder_list.insert(0,torch.nn.Linear(in_features=layer['features'], out_features=last_dim, bias=layer['bias']))
                last_dim = layer['features']
            elif layer['type'] == "KCOMP":
                competitiveLayers = KCompetitiveLayer(layer['ktop'], layer['alpha_factor'])
                self.encoder_list.append(competitiveLayers)
            else:
                raise AutoEncoder_Exception_Type(layer['type'])



        if last_dim != latent_dim:
            raise AutoEncoder_Exception_LatentSpace(last_dim,latent_dim)
        self.encoder = nn.Sequential(*self.encoder_list)
        self.decoder = nn.Sequential(*self.decoder_list)


    def forward(self,x):
        x_latent = self.encoder(x)
        x_hat = self.decoder(x_latent)
        return {"x_input":x,"x_latent":x_latent,"x_output":x_hat}


class AutoEncoder_Exception_Type(Exception):
      """Exception raised for errors in list of layers type"""

      def __init__(self, value):
          self.value = value

      def __str__(self):
          return f'{self.value} : type layer not recognized: it should be a hidden layer linear (DENSE) or dropout layer (DROP).'


class AutoEncoder_Exception_DropoutProb(Exception):
      """Exception raised for errors in list of layers type"""

      def __init__(self, value):
          self.value = value

      def __str__(self):
          if isinstance(self.value, float):
              return f'Dropout should have probability param in range 0 to 1, but receive {self.value}.'
          else:
              return f'Dropout should be a float but receive a {type(self.value)}.'



class AutoEncoder_Exception_ActivationFunction(Exception):
      """Exception raised for errors in activation function type"""

      def __init__(self, value):
          self.value = value

      def __str__(self):
          return f'{self.value} : activation function not recognized: it should be a relu function (RELU), a sigmoid funcion (SIGM).'



class AutoEncoder_Exception_LatentSpace(Exception):
      """Exception raised for errors in list of layers type: last layer in list haven't the latent space dimention"""

      def __init__(self, last_dim,latent_dim):
          self.last_dim = last_dim
          self.latent_dim = latent_dim

      def __str__(self):
          return f'Last layer have {self.last_dim} output dimention but latent space should be {self.latent_dim}.'

### Loss Function

In [97]:
import torch
from torch import nn, optim

all_a = None
all_b = None
class LossFunction(nn.Module):
    """
    loss_functions : list of dictionary:
            loss_name : loss function name,
            coef : coefficent to totla loss function
    matrix_values : dictionary
            net : structure matrix
                y_true : grountruth matrix
                y_late : embedding matrix
                y_pred : predict matrix
             att : semantical matrix
                y_true : grountruth  matrix
                y_late : embedding matrix
                y_pred : predict matrix
    """
    def __init__(self, loss_functions,matrix_values):
      self.loss_functions = loss_functions
      self.matrix_values = matrix_values

    def loss_computate(self,verbose=False):

        loss_total = torch.zeros(1)
        for loss_function in self.loss_functions:
            loss_name = loss_function['loss_name']

            coef = loss_function['coef']
            if isinstance(coef, float) or isinstance(coef, int):
                if loss_name == "structur_proximity_1order":
                    _val = self.structur_proximity_1order( self.matrix_values["net"]["y_late"], self.matrix_values["net"]["y_adj"],self.matrix_values["net"])
                    loss_total.add_(_val.mul(coef))
                elif loss_name == "semantic_proximity_1order":
                    _val = self.semantic_proximity_1order( self.matrix_values["att"]["y_late"], self.matrix_values["net"]["y_adj"])
                    loss_total.add_(_val.mul(coef))
                elif loss_name == "structur_proximity_2order":
                    _val = self.structur_proximity_2order( self.matrix_values["net"]["y_pred"], self.matrix_values["net"]["y_true"], self.matrix_values["net"]["B_param"])
                    loss_total.add_(_val.mul(coef))
                elif loss_name == "semantic_proximity_2order":
                    _val = self.semantic_proximity_2order( self.matrix_values["att"]["y_pred"], self.matrix_values["att"]["y_true"], self.matrix_values["att"]["B_param"])
                    loss_total.add_(_val.mul(coef))
                elif loss_name == "consisency_proximity":
                    _val = self.consisency_proximity( self.matrix_values["net"]["y_late"], self.matrix_values["att"]["y_late"])
                    loss_total.add_(_val.mul(coef))
                elif loss_name == "consisency_compl_proximity":
                    _val = self.consisency_compl_proximity( self.matrix_values["net"]["y_late"], self.matrix_values["att"]["y_late"])
                    loss_total.add_(_val.mul(coef))
                elif loss_name == "square_diff_embedding_proximity":
                    _val = self.square_diff_embedding_proximity( self.matrix_values["net"]["y_late"], self.matrix_values["att"]["y_late"], self.matrix_values["net"]["y_adj"])
                    loss_total.add_(_val.mul(coef))
                else:
                    raise LossFunction_Exception_FuntionNotExist(loss_name)
            else:
                raise LossFunction_Exception_Coeff(loss_name, coef)
            if verbose:
                print("\t",loss_name,"\t->\t",_val,"\tTOT:\t",loss_total)
        if verbose:
            print("----\tTOTAL:\t",loss_total,"\t\t----\n")
        return loss_total


    def structur_proximity_1order(self, hs_emb, w_matrix,oth = None):
        """
        hs_emb : embedding matrix
        w_matrix : structural adjacency matrix
        return a tensor with value 0
        """
        sigmoid_argument = torch.matmul(hs_emb,torch.transpose(hs_emb,0,1))
        labels_1 = w_matrix + torch.eye(w_matrix.size()[0])
        cross_E1 = self.__sigmoid_cross_entropy_with_logits(labels= labels_1, logits= sigmoid_argument,inp=sigmoid_argument,oth=oth)
        labels_2 = torch.ones_like(torch.diag(sigmoid_argument))
        logits_2 = torch.diag(sigmoid_argument)
        cross_E2 = self.__sigmoid_cross_entropy_with_logits(labels= labels_2, logits= logits_2,oth=oth)
        cross_All = cross_E1 - cross_E2
        return torch.mean(cross_All)

    def semantic_proximity_1order(self, hs_emb, w_matrix):
        """
        hs_emb : embedding matrix
        w_matrix : structural adjacency matrix
        return a tensor with value 0
        """
        sigmoid_argument = torch.matmul(hs_emb,torch.transpose(hs_emb,0,1))
        labels_1 = w_matrix + torch.eye(w_matrix.size()[0])
        cross_E1 = self.__sigmoid_cross_entropy_with_logits(labels= labels_1, logits= sigmoid_argument)
        labels_2 = torch.ones_like(torch.diag(sigmoid_argument))
        logits_2 = torch.diag(sigmoid_argument)
        cross_E2 = self.__sigmoid_cross_entropy_with_logits(labels= labels_2, logits= logits_2)
        cross_All = cross_E1 - cross_E2
        return torch.mean(cross_All)

    def structur_proximity_2order(self, ys_true, ys_pred, b_param):
        """
        ys_true : vector of items where each item is a groundtruth matrix
        ys_pred : vector of items where each item is a prediction matrix
        return the sum of 2nd proximity of 2 matrix
        """
        loss_secondary = 0

        for i, y_true in enumerate(ys_true):
            y_pred = ys_pred[i]

            loss_secondary_item = torch.norm(torch.square(torch.sub(y_pred,y_true,alpha=1) * b_param), p=2)
            loss_secondary += loss_secondary_item
        return loss_secondary

    def semantic_proximity_2order(self, ys_true, ys_pred, b_param):
        """
        ys_true : vector of items where each item is a groundtruth matrix
        ys_pred : vector of items where each item is a prediction matrix
        return the sum of 2nd proximity of 2 matrix
        """
        loss_secondary = 0

        for i, y_true in enumerate(ys_true):
            y_pred = ys_pred[i]
            loss_secondary_item = torch.norm(torch.square(torch.sub(y_pred,y_true,alpha=1) * b_param), p=2)
            loss_secondary += loss_secondary_item
        return loss_secondary

    def consisency_proximity(self, hs_net, hs_att):
        """
        hs_net : matrix embedding structure
        hs_att : matrix embedding attribute
        return the consisency proximity value
        """
        loss_secondary = 0

        for i, h_net in enumerate(hs_net):
            h_att = hs_att[i]
            loss_secondary_item = torch.norm(torch.square(torch.sub(h_att,h_net,alpha=1)), p=2)
            loss_secondary += loss_secondary_item
        return loss_secondary


    def consisency_compl_proximity(self, hs_net, hs_att):
        """
        hs_net : matrix embedding structure
        hs_att : matrix embedding attribute
        return the consisency proximity value
        """

        logits = torch.sum(torch.multiply(hs_net, hs_att), dim=1)
        labels = torch.ones_like(logits)
        cross_All = self.__sigmoid_cross_entropy_with_logits(labels= labels, logits= logits)
        return torch.mean(cross_All)

    def __softmax_cross_entropy_with_logits(self, labels, logits):
        _cross_entropy = -torch.sum(F.log_softmax(logits, dim=1) * labels, dim=1)
        return _cross_entropy

    def __sigmoid_cross_entropy_with_logits(self, labels, logits,inp=None, oth=None):
        eps = 1e-12
        _cross_entropy_a = (labels * -torch.log(torch.sigmoid(logits) + eps))
        _cross_entropy_b = (1 - labels) * - torch.log(1 - torch.sigmoid(logits) + eps)
        _cross_entropy = _cross_entropy_a + _cross_entropy_b
        return _cross_entropy

    def square_diff_embedding_proximity(self, hs_net, hs_att, w_matrix):
        """
        ys_true : vector of items where each item is a groundtruth matrix
        ys_pred : vector of items where each item is a prediction matrix
        return the sum of 2nd proximity of 2 matrix
        """

        struct_proximity = self.structur_proximity_1order(hs_net, w_matrix)
        attrib_proximity = self.semantic_proximity_1order(hs_att, w_matrix)
        loss_square = torch.square(attrib_proximity + torch.neg(struct_proximity))
        return loss_square



class LossFunction_Exception_Coeff(Exception):
      """Exception raised for error if coeff is not int or float"""

      def __init__(self, loss_name, value):
          self.value = value
          self.loss_name = loss_name

      def __str__(self):
          return f'Loss "{self.loss_name}" coefficent should be a float or int but receive a {type(self.value)}.'

class LossFunction_Exception_FuntionNotExist(Exception):
      """Exception raised for error if coeff is not int or float"""

      def __init__(self, loss_name):
          self.loss_name = loss_name

      def __str__(self):
          return f'Loss "{self.loss_name}" not exist.'

### Ottimizzatore

In [98]:
import torch
from torch import nn, optim
#import AutoEncoder
#import Loss_function

class OptimizationFunction():

    def __init__(self, opt_config):
        """
          opt_config : list of dictionary:
              opt_name : optimizator function name,
              lr_rate :learning rate
              weight_decay : [OPT - if adam_L2] decay weight param
        """
        self.name_opt = opt_config["opt_name"]
        self.lr_rate = opt_config["lr_rate"]
        if self.name_opt not in ["adam", "adam_L2"]:
            raise OptimizationFunction_Exception_OptimizatorNotExist(self.name_opt)

        if self.name_opt == "adam_L2":
            if "weight_decay" not in opt_config:
                raise OptimizationFunction_OptimizatorParamsMissing(self.name_opt,"weight_decay")
            else:
                self.weight_decay = opt_config["weight_decay"]


    def get_optimizator(self, net_model):
        self.net_params = net_model.parameters()
        if self.name_opt == "adam":
            return torch.optim.Adam(params=self.net_params, lr=self.lr_rate)
        elif self.name_opt == "adam_L2":
            return torch.optim.Adam(params=self.net_params, lr=self.lr_rate, weight_decay=self.weight_decay)
        else:
            raise OptimizationFunction_Exception_OptimizatorNotExist(self.name_opt)

"""
class OptimizationFunction():
  optimizer = optim.SGD([torch.rand((2,2), requires_grad=True)], lr=0.1)
  optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

  scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
"""

class OptimizationFunction_Exception_OptimizatorParamsMissing(Exception):
      """Exception raised for error if a param is missing"""

      def __init__(self, name_opt, name_param_missing):
          self.name_opt = name_opt
          self.name_param_missing = name_param_missing

      def __str__(self):
          return f'Optimizator "{self.name_opt}" needs "{self.name_param_missing}" param.'

class OptimizationFunction_Exception_OptimizatorNotExist(Exception):
      """Exception raised for error if optimizator not exist"""

      def __init__(self, opt_name):
          self.opt_name = opt_name

      def __str__(self):
          return f'Optimizator "{self.opt_name}" not exist.'

 ###  Regularization

In [99]:
import torch
from torch import nn, optim
#import AutoEncoder
#import Loss_function

class RegularizationFunction():

    def __init__(self, reg_config):
        """
          reg_config : list of dictionary:
              reg_name : regularization name,
              coeff : regularization coefficent

        """
        self.regularizations = reg_config


    def get_regularization(self, net_model):

        net_params = net_model.parameters()
        loss_reg = 0
        for _reg in self.regularizations:
            reg_lambda = _reg["coeff"]
            reg_name = _reg["reg_name"]
            if reg_name == "L1":
                reg_norm1 = sum(param.abs().sum() for param in net_params)
                loss_reg += reg_lambda * reg_norm1
            elif reg_name == "L2":
                reg_norm2 = sum(param.pow(2.0).sum() for param in net_params)
                loss_reg += reg_lambda * reg_norm2
            else:
                raise RegularizationFunction_Exception_RegularizationNotExist(reg_name)
        return loss_reg



class RegularizationFunction_Exception_RegularizationNotExist(Exception):
      """Exception raised for error if optimizator not exist"""

      def __init__(self, opt_name):
          self.opt_name = opt_name

      def __str__(self):
          return f'Regularization "{self.opt_name}" not exist.'



### Save Model

In [100]:
import os

class CheckpointModel():
    def __init__(self, save_config):
      """
        save_config : dict
            type : ["best_model_loss", "every_tot", "first_train", "last_train"]
            times : [OPT - if type is "every_tot"] int, number of epoch when save
            overwrite : boolean
            path_file : path where save
            name_file : name of file
            path_not_exist_mode: if path not exist: "create","except", default:except
            path_exist_mode: "use","clean","new" default:"use"
      """
      self.type_checkpointer = ["best_model_loss", "every_tot", "first_train", "last_train"]
      self.checher = dict()
      if save_config == None:
          self.checher['enable'] = False
      else:
          self.checher['enable'] = True
          is_types_safe = True
          not_types_safe = list()

          for type_config in save_config['type']:
            if type_config not in self.type_checkpointer:
              is_types_safe = False
              not_types_safe.append(type_config)

          if is_types_safe:
              self.checher["type"] = save_config['type']

              dirpath_save = save_config["path_file"]
              self.checher["name_file"] = save_config["name_file"]

              if "path_not_exist" in save_config:
                  path_not_exist_mode = save_config["path_not_exist"]
              else:
                  path_not_exist_mode = "except"

              if "path_exist" in save_config:
                  path_exist_mode = save_config["path_exist"]
              else:
                  path_exist_mode = "use"


              if os.path.isdir(dirpath_save): #path esiste
                  if path_exist_mode == "use":
                      self.checher["path_file"] = dirpath_save
                  elif path_exist_mode == "clean":
                      self.checher["path_file"] = Util_class.folder_manage(dirpath_save, clean=True)
                  elif path_exist_mode == "new":
                      self.checher["path_file"] = Util_class.folder_manage(dirpath_save, uniquify=True)
                  else:
                      raise CheckpointModel_Exception_ParamPathNotRecoignezed("Exist", path_exist_mode)

              else: #path non esiste
                  if path_not_exist_mode == "create":
                      self.checher["path_file"] = Util_class.folder_manage(dirpath_save, uniquify=True)
                  elif path_not_exist_mode == "except":
                      raise CheckpointModel_Exception_SavePathNotExist(dirpath_save)
                  else:
                      raise CheckpointModel_Exception_ParamPathNotRecoignezed("NotExist", path_exist_mode)

              print("Your model's checkpoint is save in : {fpath}".format(fpath = self.checher["path_file"]))

              if "overwrite" in save_config:
                  self.checher["overwrite"] = save_config["overwrite"]
              else:
                  self.checher["overwrite"] = False

              for type_config in save_config['type']:
                  if type_config == "every_tot":
                      self.checher["times"] = save_config['times']
                      self.checher["next_epoch"] = save_config['times']
                  elif type_config == "best_model_loss":
                      self.checher["last_loss"] = None
          else:
              raise CheckpointModel_Exception_TypeChecker(not_types_safe)

    def checkToSave(self,graphe_model,epoch,epochs,loss, phase=None):
        if not isinstance(graphe_model, GraphEModel):
            raise CheckpointModel_Exception_GraphEModelType(graphe_model)
        else:
          to_save = False

          for type_config in self.checher['type']:
              if type_config == "every_tot":
                  if self.checher['next_epoch'] == epoch:
                      to_save = True
                      self.checher['next_epoch'] += self.checher["times"]
              elif type_config == "best_model_lost":
                  if self.checher['last_loss'] > loss:
                      to_save = True
                      self.checher['last_loss'] = loss
              elif type_config == "first_train":
                  if epoch==1:
                    to_save = True
              elif type_config == "last_train":
                  if epoch == epochs:
                    to_save = True

          if phase is not None:
              _phase = "_phase" + phase
          else:
              _phase = ""
          if to_save:
              if self.checher["overwrite"]:
                  path_chechpoint_file = "{fpath}/{fname}{fphase}.carbo".format(fpath = self.checher["path_file"], fname = self.checher["name_file"], fphase=_phase)
              else:
                  for type_config in self.checher['type']:
                      if type_config in ["every_tot", "first_train", "last_train"]:
                          path_chechpoint_file = "{fpath}/{fname}{fphase}_epoch_{fepoch}.carbo".format(fpath = self.checher["path_file"], fname = self.checher["name_file"], fepoch = epoch, fphase=_phase)
                      elif type_config in ["best_model_lost"]:
                          path_chechpoint_file = "{fphase}_epoch_{fepoch}_loss_{floss:.8f}.carbo".format(fpath = self.checher["path_file"],fname = self.checher["name_file"], fepoch = epoch, floss = loss, fphase=_phase)
              graphe_model.save_model(epoch = epoch, path_file = path_chechpoint_file)
              print("Epoch : ",epoch,"/",epochs,"\tLoss : ", loss, "\tmodel checkpoint saved as: {fpath}".format(fpath=path_chechpoint_file))


class CheckpointModel_Exception_TypeChecker(Exception):
      """Exception raised for errors in activation function type"""

      def __init__(self, value):
          self.value = value

      def __str__(self):
          return f'{self.value} : type of checkpointer not recognized.'

class CheckpointModel_Exception_GraphEModelType(Exception):
      """Exception raised for errors in activation function type"""

      def __init__(self, value):
          self.value = type(value)

      def __str__(self):
          return f'Model should be a GraphEModel object but checker receiver a {self.value} type object.'

class CheckpointModel_Exception_SavePathNotExist(Exception):
      """Exception raised for errors in activation function type"""

      def __init__(self, path):
          self.path = path

      def __str__(self):
          return f"Your model's checkpoint could be save because '{self.path}' not exist."

class CheckpointModel_Exception_ParamPathNotRecoignezed(Exception):
      """Exception raised for errors of path to save embedding is none"""

      def __init__(self,mode,value):
          self.value = value
          self.mode = mode

      def __str__(self):
          return f"{self.mode} modality param {self.value} is recognized."

### Modello

In [101]:
import torch
from torch import nn, optim
from torchviz import make_dot
import _pickle as cPickle
# import Util_class
# import AutoEncoder
# import Loss_function

class GraphEModel(nn.Module):

    def __init__(self, model_config,):
        """
          model_config : dictionary:
              att_dim : dimension of input and output of attribute/semantical space
              att_layers_list : param layers_list : sequential sort list of semantical network architecture
              att_latent_dim : dimension of embedding/latent semantical space

              net_dim : dimension of input and output of structural/network space
              net_layers_list : param layers_list : sequential sort list of structural network architecture
              net_latent_dim : dimension of embedding/latent structural space

              loss_functions : dictionary key: all,net,att
                  - all : A+N training modality, same loss for both
                  - net : A/N>N/A training modality, set loss for net model
                  - att : A/N>N/A training modality, set loss for att model
                  Each item is a list of vector: [Loss_function function name, param]

              regularization_net : list of dictionary of regularization for structure
                      reg_name : regularization function name,
                      coeff : coeff regularization influence

              regularization_att : list of dictionary of regularization for semantical
                      reg_name : regularization function name,
                      coeff : coeff regularization influence


              model_name : string, name of model

              optimizator_net : dictionary - optimizator config for structure
                      opt_name : optimizator function name,
                      lr_rate :learning rate
                      weight_decay : [OPT - if adam_L2] decay weight param

              optimizator_att : dictionary - optimizator config for semantical
                      opt_name : optimizator function name,
                      lr_rate :learning rate
                      weight_decay : [OPT - if adam_L2] decay weight param

              checkpoint_config : configuration for checkpoint

              training_config : string, order to make a training
                  "A>N" : first attribute and then structure
                  "N>A" : first structure and then attribute
                  "A+N" : attribute and structure simultaneously
                  "N+A" : attribute and structure simultaneously
        """
        super(GraphEModel, self).__init__()
        self.epochs_status = dict()

        self.att_dim = model_config["att_dim"]
        self.att_layers_list = model_config["att_layers_list"]
        self.att_latent_dim = model_config["att_latent_dim"]
        self.epochs_status['att'] = 0

        self.net_dim = model_config["net_dim"]
        self.net_layers_list = model_config["net_layers_list"]
        self.net_latent_dim = model_config["net_latent_dim"]
        self.epochs_status['net'] = 0

        self.loss_functions = model_config["loss_functions"]

        self.model_name = model_config["model_name"]

        # Model Autoencoders Initialization
        self.autoEncoder = dict()
        self.autoEncoder['att'] = AutoEncoder(dim=self.att_dim, layers_list=self.att_layers_list, latent_dim=self.att_latent_dim)
        self.autoEncoder['net'] = AutoEncoder(dim=self.net_dim, layers_list=self.net_layers_list, latent_dim=self.net_latent_dim)

        #Optimization Initialization
        self.optimizatior = dict()
        opt_net_obj = OptimizationFunction(model_config['optimizator_net'])
        opt_att_obj = OptimizationFunction(model_config['optimizator_att'])
        self.optimizatior['net'] = opt_net_obj.get_optimizator(self.autoEncoder['net'])
        self.optimizatior['att'] = opt_att_obj.get_optimizator(self.autoEncoder['att'])

        #Regularization Initialization
        self.regularization = dict()
        regularization_net_obj = RegularizationFunction(model_config['regularization_net'])
        regularization_att_obj = RegularizationFunction(model_config['regularization_att'])
        self.regularization['net'] = regularization_net_obj
        self.regularization['att'] = regularization_att_obj

        #self.optimizatior['net'] = torch.optim.Adam(params=self.autoEncoder['net'].parameters(), lr=1e-3,weight_decay=1e-4)
        #self.optimizatior['att'] = torch.optim.Adam(params=self.autoEncoder['att'].parameters(), lr=1e-3,weight_decay=1e-4)

        self.scheduler = dict()
        #self.scheduler['net'] = optim.lr_scheduler.StepLR(opt_net, step_size=15, gamma=0.5)
        #self.scheduler['att'] = optim.lr_scheduler.StepLR(opt_att, step_size=15, gamma=0.5)
        self.scheduler['net'] = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizatior['net'], mode='min',factor=0.1, patience=5)
        self.scheduler['att'] = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizatior['att'], mode='min',factor=0.1, patience=5)

        self.checkpointer = CheckpointModel(model_config["checkpoint_config"])
        self.training_config = model_config['training_config']

        self.space_embedded = { 'net': dict(), 'att': dict(), 'node_label':dict()}


    def get_Model_semantical(self):
        return self.autoEncoder['att']

    def get_Model_structural(self):
        return self.autoEncoder['net']

    def get_Models(self):
        return {"att":self.get_Model_semantical(), "net":self.get_Model_structural()}

    def save_model(self, epoch, path_file):
        torch.save({
            'NET_model_state_dict': self.autoEncoder['net'].state_dict(),
            'ATT_model_state_dict': self.autoEncoder['att'].state_dict(),

            'NET_optimizer_state_dict': self.optimizatior['net'].state_dict(),
            'ATT_optimizer_state_dict': self.optimizatior['att'].state_dict(),

            'epochs_status': self.epochs_status,
            'space_embedded': self.space_embedded,
            'checkpointer': self.checkpointer,

          }, path_file)

    def load_model(self, path_file):
        checkpoint = torch.load(path_file)
        self.autoEncoder['net'].load_state_dict(checkpoint['NET_model_state_dict'])
        self.autoEncoder['att'].load_state_dict(checkpoint['ATT_model_state_dict'])

        self.optimizatior['net'].load_state_dict(checkpoint['NET_optimizer_state_dict'])
        self.optimizatior['att'].load_state_dict(checkpoint['ATT_optimizer_state_dict'])

        self.epochs_status = checkpoint['epochs_status']
        self.space_embedded = checkpoint['space_embedded']
        self.checkpointer = checkpoint['checkpointer']

    def save_embedding(self):
        raise NotImplementedError('GraphE save_embedding not implemented')

    def model_info(self):
        print("STRUCTURAL Model's state_dict :")
        for param_tensor in self.autoEncoder['net'].state_dict():
            print(param_tensor, "\t", self.autoEncoder['net'].state_dict()[param_tensor].size())

        print("SEMANTICAL Model's state_dict :")
        for param_tensor in self.autoEncoder['att'].state_dict():
            print(param_tensor, "\t", self.autoEncoder['att'].state_dict()[param_tensor].size())

        # Print optimizer's state_dict
        print("STRUCTURAL Optimizer's state_dict:")
        for var_name in self.optimizatior['net'].state_dict():
            print(var_name, "\t", self.optimizatior['net'].state_dict()[var_name])
        print("SEMANTICAL Optimizer's state_dict:")
        for var_name in self.optimizatior['att'].state_dict():
            print(var_name, "\t", self.optimizatior['att'].state_dict()[var_name])

    def models_training(self, datagenerator, epochs, path_embedding="/content/", loss_verbose=False):


        if (self.training_config == "A+N") or (self.training_config == "N+A"):

            if isinstance(epochs, dict):
              res = self.models_training_simultaneously(datagenerator, epochs, path_embedding= path_embedding, loss_verbose= loss_verbose)

            else:
              raise GraphEModel_Exception__TrainingEpochType(epochs,self.training_config,int)

        elif self.training_config == "A>N":
            phases = ["att","net"]
            if isinstance(epochs, dict):
              epochs_check =Util_class.check_key_in_dict(epochs,phases)
              if epochs_check[0]:
                  res = self.models_training_2phased(phases,datagenerator, epochs, path_embedding= path_embedding, loss_verbose= loss_verbose)
              else:
                raise GraphEModel_Exception__TrainingEpochItems(epochs,phases,epochs_check[1])
            else:
              raise GraphEModel_Exception__TrainingEpochType(epochs,self.training_config,dict)

        elif self.training_config == "N>A":
            phases = ["net","att"]
            if isinstance(epochs, dict) :
              epochs_check =Util_class.check_key_in_dict(epochs,phases)
              if epochs_check[0]:
                  res = self.models_training_2phased(phases,datagenerator, epochs, path_embedding= path_embedding, loss_verbose= loss_verbose)
              else:
                raise GraphEModel_Exception__TrainingEpochItems(epochs,phases,epochs_check[1])
            else:
              raise GraphEModel_Exception__TrainingEpochType(epochs,self.training_config,dict)

        else:
            raise GraphEModel_Exception__TrainingModality()
        return res

    def models_training_simultaneously(self, datagenerator, epochs, path_embedding="/content/", loss_verbose=False):
        """
        data : DataBatchGenerator, data
        epochs : int, times re-training process do
        """
        outputs = dict()
        losses = []

        if not isinstance(datagenerator, DataBatchGenerator):
            raise GraphEModel_Exception__notDataBatchGeneratorClass(datagenerator)



        epochs_time = epochs["all"]

        tot_epochs = epochs_time + self.epochs_status['att']

        for epoch in range(1, epochs_time+1):


            loss_epoch =  []
            if epoch %2 == 0:
                print("=")
            else:
                print("==")


            node_4batch = list()

            for [input, B_param, batch_info] in datagenerator.generate():
                [net_batch, att_batch, net_batch_adj_tensor] = input
                [B_net, B_att] = B_param
                [node_index, node_labels] = batch_info

                # Output of Autoencoder
                net_comp = self.autoEncoder['net'].forward(net_batch)
                att_comp = self.autoEncoder['att'].forward(att_batch)

                # Calculating the loss function
                loss_values_matrix = {
                    "net": {
                        "y_true" : net_comp["x_input"],
                        "y_late" : net_comp["x_latent"],
                        "y_pred" : net_comp["x_output"],
                        "B_param" : B_net,
                        "y_adj" : net_batch_adj_tensor,
                    },
                    "att": {
                        "y_true" : att_comp["x_input"],
                        "y_late" : att_comp["x_latent"],
                        "y_pred" : att_comp["x_output"],
                        "B_param": B_att,
                        "y_adj" : None,
                    }
                }

                loss_obj = LossFunction(self.loss_functions['all'], loss_values_matrix)
                loss = loss_obj.loss_computate(loss_verbose)
                if torch.isnan(loss):
                    print(loss_values_matrix)
                    raise NotImplementedError('loss is nan')

                regularization_influence_net = self.regularization['net'].get_regularization(self.autoEncoder['net'])
                regularization_influence_att = self.regularization['att'].get_regularization(self.autoEncoder['att'])
                regularization_loss = regularization_influence_net + regularization_influence_att

                loss += regularization_loss
                # Resetta il gradiente
                self.optimizatior['net'].zero_grad()
                self.optimizatior['att'].zero_grad()

                loss.backward()

                # The gradients are set to zero,
                # the the gradient is computed and stored.
                # .step() performs parameter update
                self.optimizatior['net'].step()
                self.optimizatior['att'].step()


                # Storing the losses in a list for plotting
                #losses.append(loss)
                loss_epoch.append(loss.item())
                loss_mean_epoch = sum(loss_epoch) / float(len(loss_epoch))

                if epoch == tot_epochs-1:
                    output_dict_net = {
                        "latent" : net_comp["x_latent"],
                        "output" : net_comp["x_output"],
                    }
                    output_dict_att = {
                        "input" : att_comp["x_input"],
                        "latent" : att_comp["x_latent"],
                        "output" : att_comp["x_output"],
                    }
                    node_info = {
                        "node_index": node_index,
                        "node_label" : node_labels,
                    }

                    output_dict = {
                        "net" : output_dict_net,
                        "att" : output_dict_att,
                        "node_info":node_info
                    }
                    node_4batch.append(output_dict)


            self.epochs_status['att'] += 1
            self.epochs_status['net'] += 1
            epoch_globaly = self.epochs_status['net']

            self.scheduler['net'].step(loss_mean_epoch)
            self.scheduler['att'].step(loss_mean_epoch)
            losses.append(loss_mean_epoch)
            print("Epoch : ",epoch_globaly,"/",tot_epochs,"\tLoss : ",loss_mean_epoch,"\tlr net: ",self.optimizatior['net'].param_groups[0]['lr'],"\tlr att: ",self.optimizatior['att'].param_groups[0]['lr'])

            #pointchecker save a model according by checkpointer config
            self.checkpointer.checkToSave(self, epoch_globaly,tot_epochs, loss_mean_epoch)


            outputs[epoch] = node_4batch

        self.set_embedding(encoder_out=outputs, last_epoch= epochs_time - 1, save=True, path=path_embedding, phases=['att', 'net'])
        return {"output":outputs, "losses":losses, "saved_embedding":True}

    def models_training_2phased(self, phases_list,datagenerator, epochs, path_embedding="/content/", loss_verbose=False):
        check_phase = Util_class.same_key_in_dict(phases_list, ['net','att'])

        if not check_phase[0]:
            raise GraphEModel_Exception__TrainingPhasesNotSame(check_phase)

        outputs = dict()
        losses = dict()
        prev_phase_embedding = {
            'net' : {'index' : None,  'latent' : None},
            'att' : {'index' : None,  'latent' : None},
        }

        if not isinstance(datagenerator, DataBatchGenerator):
            raise GraphEModel_Exception__notDataBatchGeneratorClass(datagenerator)

        for phase in phases_list:
            epochs_time = epochs[phase]
            tot_epochs = epochs_time + self.epochs_status[phase]
            losses[phase] = list()

            if epochs[phase] < 1:
                print(f"No epoch to train for phase: {phase}.")

            for epoch in range(epochs_time):

                loss_epoch =  []

                if epoch %2 == 0:
                    print("=")
                else:
                    print("==")

                node_4batch = list()

                for [input, B_param, batch_info] in datagenerator.generate():
                    [net_batch, att_batch, net_batch_adj_tensor] = input
                    [B_net, B_att] = B_param
                    [node_index, node_labels] = batch_info

                    # Output of Autoencoder
                    autoencoder_component = dict()
                    if phase == "net":
                        autoencoder_component['net'] = self.autoEncoder['net'].forward(net_batch)
                        loss_values_matrix = {
                            "net": {
                                "y_true" : autoencoder_component['net']["x_input"],
                                "y_late" : autoencoder_component['net']["x_latent"],
                                "y_pred" : autoencoder_component['net']["x_output"],
                                "B_param" : B_net,
                                "y_adj" : net_batch_adj_tensor,
                            },
                            "att": {
                                "y_true" : None,#autoencoder_component['att']["x_input"],
                                "y_late" : None,#autoencoder_component['att']["x_latent"],
                                "y_pred" : None,#autoencoder_component['att']["x_output"],
                                "B_param": None,#B_att,
                                "y_adj" : None,
                            }
                        }
                    elif phase == "att":
                        autoencoder_component['att'] = self.autoEncoder['att'].forward(att_batch)
                        loss_values_matrix = {
                            "net": {
                                "y_true" : None,#autoencoder_component['net']["x_input"],
                                "y_late" : self.get_embedding(nodes_list=node_index, phase='net', type_output='tensor'),#autoencoder_component['net']["x_latent"],
                                "y_pred" : None,#autoencoder_component['net']["x_output"],
                                "B_param" : B_net,
                                "y_adj" : net_batch_adj_tensor,
                            },
                            "att": {
                                "y_true" : autoencoder_component['att']["x_input"],
                                "y_late" : autoencoder_component['att']["x_latent"],
                                "y_pred" : autoencoder_component['att']["x_output"],
                                "B_param": B_att,
                                "y_adj" : None,
                            }
                        }
                    # Calculating the loss function


                    '''
                    if phase == 'net':
                        loss_values_matrix[phase]["B_param"] = B_net
                        loss_values_matrix[phase]["y_adj"] = net_batch_adj_tensor
                        loss_values_matrix['att'] = {}
                        loss_values_matrix['att']['x_latent'] = None

                    elif phase == 'att':
                        loss_values_matrix[phase]["B_param"] = B_att
                        loss_values_matrix[phase]["y_adj"] = None
                        loss_values_matrix['net'] = {}
                        loss_values_matrix['net']['x_latent'] = None
                    '''


                    loss_obj = LossFunction(self.loss_functions[phase], loss_values_matrix)

                    loss = loss_obj.loss_computate(loss_verbose)



                    regularization_influence = self.regularization[phase].get_regularization(self.autoEncoder[phase])
                    regularization_loss = regularization_influence

                    loss += regularization_loss

                    # Reset gradient
                    self.optimizatior[phase].zero_grad()


                    #if phase == 'att':
                    #    make_dot(loss).render("loss", format="png")


                    loss.backward(retain_graph=True)
                    # The gradients are set to zero,
                    # the the gradient is computed and stored.
                    # .step() performs parameter update
                    self.optimizatior[phase].step()

                    # Storing the losses in a list for plotting
                    #losses.append(loss)
                    loss_epoch.append(loss.item())

                    # yhat
                    #make_dot(yhat, params=dict(list(model.named_parameters()))).render("rnn_torchviz", format="png")
                    #make_dot(autoencoder_component['att']['x_latent']).render("att_x_latent", format="png")
                    #make_dot(autoencoder_component['att']['x_output']).render("att_x_output", format="png")
                    #make_dot(loss).render("att_loss", format="png")

                    if epoch == epochs_time-1:
                        output_dict = {
                            "input" : autoencoder_component[phase]["x_input"],
                            "latent" : autoencoder_component[phase]["x_latent"],
                            "output" : autoencoder_component[phase]["x_output"],
                        }
                        node_info = {
                            "node_index": node_index,
                            "node_label" : node_labels,
                        }

                        output_dict = {
                            phase : output_dict,
                            "node_info":node_info
                        }
                        node_4batch.append(output_dict)

                loss_mean_epoch = sum(loss_epoch) / float(len(loss_epoch))

                self.epochs_status[phase] += 1
                epoch_globaly = self.epochs_status[phase]

                self.scheduler[phase].step(loss_mean_epoch)
                losses[phase].append(loss_mean_epoch)
                print("Phase : ",phase,"\tEpoch : ",epoch_globaly,"/",tot_epochs,"\tLoss : ",loss_mean_epoch,"\tlr net: ",self.optimizatior['net'].param_groups[0]['lr'],"\tlr att: ",self.optimizatior['att'].param_groups[0]['lr'])

                #raise NotImplementedError('GraphE models_training_phased not implemented')
                self.checkpointer.checkToSave(self, epoch_globaly,tot_epochs, loss_mean_epoch,phase=phase)
            outputs[phase] = node_4batch
            self.set_embedding(encoder_out=outputs, last_epoch= phase, save=True, path=path_embedding, phases=[phase])
        return {"output":outputs, "losses":losses, "saved_embedding":True}




    #"""  prev_phase_embedding[phase]['index'] =
    #prev_phase_embedding[phase]['latent'] =
    #"""

    def get_embedding(self, nodes_list=None, phase='net', type_output='tensor'):

        if phase not in ['net','att','node_label']:
            raise GraphEModel_Exception__EmbeddingKeyNotRecoignezed(phase)
        if nodes_list is None or len(nodes_list) == 0:
            nodes_list = []
            for k in self.space_embedded['node_label']:
                nodes_list.append(k)
            if len(nodes_list)>0:
                return self.get_embedding(nodes_list,phase,type_output)
            else:
                raise GraphEModel_Exception__EmbeddingNodeIdNotFound(-1)
        else:
            embedding_request = None
            for node_id in nodes_list:
                if node_id not in self.space_embedded['node_label']:
                    raise GraphEModel_Exception__EmbeddingNodeIdNotFound(node_id)
                else:
                    if phase == 'node_label':
                        if embedding_request is None:
                            embedding_request = [self.space_embedded[phase][node_id]]
                        else:
                            embedding_request.append(self.space_embedded[phase][node_id])

                    else:
                        if embedding_request is None:
                            embedding_request = self.space_embedded[phase][node_id]
                        else:
                            embedding_request = torch.vstack([embedding_request,self.space_embedded[phase][node_id]])

            if type_output == 'tensor':
                return embedding_request
            elif (type_output == 'np' or type_output == 'numpy') and phase == 'node_label':
                return np.array(embedding_request)
            elif (type_output == 'np' or type_output == 'numpy'):
                return np.array(list(embedding_request.detach().numpy()))
            else:
                raise GraphEModel_Exception__EmbeddingNodeIdNotFound(node_id)

            return embedding_request

    def set_embedding(self, encoder_out, last_epoch, save=False, path=None, phases=["net","att"]):
        """
        batches : epoch batches
        epoch : int, epoch to analized embedding

        RETURN set locally embedding space selected and if save=True it is saved in a file
        """

        for phase in phases:
            print("Set embedding for:\t",phase)
            for batch in range(len(encoder_out[last_epoch])):
                  for i in range(len(encoder_out[last_epoch][batch]['node_info']['node_index'])):
                      node_key = encoder_out[last_epoch][batch]['node_info']['node_index'][i]
                      self.space_embedded[phase][node_key] = encoder_out[last_epoch][batch][phase]['latent'][i].data.clone()

                      self.space_embedded['node_label'][node_key] = encoder_out[last_epoch][batch]['node_info']['node_label'][i]
            if save:
                if path is None:
                    raise GraphEModel_Exception__notPathEmbeddingParam()
                else:
                    path_embedding_file = "{fpath}embedding_{fmodelname}_{fphase}.ecarbo".format(fpath = path, fmodelname = self.model_name, fphase=phase)
                    with open(path_embedding_file, "wb") as fileEmbedding:
                          cPickle.dump(self.space_embedded, fileEmbedding)
                    print(f"Saved embedding for:\t {phase}\t\t on path:\t{path_embedding_file}")

class GraphEModel_Exception__notDataBatchGeneratorClass(Exception):
      """Exception raised for errors of data input type"""

      def __init__(self, value):
          self.value = value

      def __str__(self):
          return f"{type(self.value)} : type of attribute file format not recognized. It should be a 'DataBatchGenerator' istance."

class GraphEModel_Exception__notPathEmbeddingParam(Exception):
      """Exception raised for errors of path to save embedding is none"""

      def __init__(self):
          self.value = None

      def __str__(self):
          return f"Path where save embedding is None."

class GraphEModel_Exception__EmbeddingKeyNotRecoignezed(Exception):
      """Exception raised for errors of path to save embedding is none"""

      def __init__(self,value):
          self.value = value

      def __str__(self):
          return f"{self.value} is not embedding recognized key. Phase accept are: 'net','att' and 'node_label'."
class GraphEModel_Exception__EmbeddingNodeIdNotFound(Exception):
      """Exception raised for errors of path to save embedding is none"""

      def __init__(self,value):
          self.value = value

      def __str__(self):
          return f"Node id '{self.value}' not found."

class GraphEModel_Exception__TrainingModality(Exception):
      """Exception raised for error no training modality recognized"""

      def __init__(self,value):
          self.value = None

      def __str__(self):
          return f"{self.value} is not a modality for training recognized. It should be: 'A+N' or 'N<A' or 'A>N'."

class GraphEModel_Exception__TrainingEpochType(Exception):
      """Exception raised for error no training modality recognized"""

      def __init__(self,value,modality,typeObjRequest):
          self.value = value
          self.modality = modality
          self.typeObjRequest = typeObjRequest

      def __str__(self):
          return f"In modality of training like {self.modality}, epoch value shoud be a {str(self.typeObjRequest)} object but receive an {str(type(self.value))} object."

class GraphEModel_Exception__TrainingEpochItems(Exception):
      """Exception raised for error no training modality recognized"""

      def __init__(self,value, keyRequest, keyMissing):
          self.value = value
          self.keyRequest = keyRequest
          self.keyMissing = (" ").join(keyMissing)

      def __str__(self):
          return f"Epochs array should have {len(self.keyRequest)} items but receive {len(self.value)} items. Key Missin is: {self.keyMissing}"

class GraphEModel_Exception__TrainingPhasesNotSame(Exception):
      """Exception raised for error no training modality recognized"""

      def __init__(self,value,list_check_phases):
          self.value = value
          self.is_same = list_check_phases[0]
          self.key_not_dict = list_check_phases[1]
          self.key_not_list = list_check_phases[2]

      def __str__(self):
          message = "Phase should be same of declaration but:\n"
          if len(self.key_not_dict) > 0:
              _msg = (" ").join(self.key_not_dict)
              message += f"There are input phases key not recognized:\n\t {_msg} \n"

          if len(self.key_not_list) > 0:
              _msg = (" ").join(self.key_not_list)
              message += f"There are  missing phases:\n\t {_msg} \n"

          return message

*testo in corsivo*### Node Classification


In [102]:
import functools as ft
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score, rand_score
from sklearn import model_selection as sk_ms
from sklearn.svm import LinearSVC
from sklearn import preprocessing

class NodeClassification():

    def __init__(self, embedding_data, labels, normalize=False):
        if normalize:
            self.data = pd.DataFrame(preprocessing.normalize(embedding_data, norm='l2', axis=1))
        else:
            self.data = pd.DataFrame(embedding_data)
        self.oper_math = ['sum', 'avg']
        self.labels = labels
        self.performal_measure = {
            "accuracy_score" : ft.partial(accuracy_score),
            "precision_macro" : ft.partial(precision_score, average='macro'),
            "precision_micro" : ft.partial(precision_score, average='micro'),
            "recall_macro" : ft.partial(recall_score, average='macro'),
            "recall_micro" : ft.partial(recall_score, average='micro'),
            "f1_macro" : ft.partial(f1_score, average='macro', labels=np.unique(self.labels)),
            "f1_micro" : ft.partial(f1_score, average='micro', labels=np.unique(self.labels)),

            "precision_weighted" : ft.partial(precision_score, average='weighted'),
            "recall_weighted" : ft.partial(recall_score, average='weighted'),
            "f1_weighted" : ft.partial(f1_score, average='weighted', labels=np.unique(self.labels)),
        }

    def split_dataset(self, split_threshold, num_split, random_set):
        """
        split_threshold : float, threshold of test split
        num_split : int, number of split
        random_set : boolean, TRUE if each split is randomly different by others
        return : array, where each item is a dictionary of data's split, with keys X_train, X_test, Y_train, Y_test
        """
        data_splitted = list()
        _splitted_data = None
        for i in range(num_split):
            if random_set or (_splitted_data == None):
                X_train, X_test, Y_train, Y_test = sk_ms.train_test_split(self.data, self.labels, test_size=split_threshold)
                _splitted_data = {
                    "X_train" : X_train,
                    "Y_train" : Y_train,
                    "X_test" : X_test,
                    "Y_test" : Y_test,
                }
            data_splitted.append(_splitted_data)
        return data_splitted



    def classification(self, classifier_name, split_threshold, repetitions, group_by, random_set):
        """
        classifier_name : string, name of classifier. Implemented "svm",
        split_threshold : float, threshold of test split
        repetitions : int, number of repetitions of classification
        random_set : boolean, TRUE if each split is randomly different by others
        group_by : array, math operation group by measure
        return : array, each item is a data's split
        """
        data_splitted = self.split_dataset(split_threshold, repetitions, random_set)

        if classifier_name == "svm":
            classifier = LinearSVC()
        else:
            raise NodeClassification_notClassifierFound(classifier_name)
        measures_performance = dict()

        for n_repetition in range(repetitions):
            data_split = data_splitted.pop(0)
            classifier.fit(data_split["X_train"], data_split["Y_train"])
            predictions = self.prediction(classifier, data_split["X_test"])
            performance_computation = self.performance_computation(data_split["Y_test"], predictions)

            key_iter = "iter_"+str(n_repetition)
            measures_performance[key_iter] = performance_computation

        measure_total = dict()
        for oper in group_by:
            if oper in self.oper_math:
                measure_total[oper] = dict()
                for measure_name in self.performal_measure:
                      measure_total[oper][measure_name] = list()
            else:
                raise NodeClassification_notAggregationRecognizer(oper)

        for iteration_measure in measures_performance:
            for measure_name in self.performal_measure:
                value = measures_performance[iteration_measure][measure_name]
                for oper in group_by:
                    measure_total[oper][measure_name].append(value)
        for oper in group_by:
            for measure_name in self.performal_measure:
                if oper == "avg":
                    measure_total[oper][measure_name] = sum(measure_total[oper][measure_name])/len(measure_total[oper][measure_name])
                elif oper == "sum":
                    measure_total[oper][measure_name] = sum(measure_total[oper][measure_name])
                else:
                    measure_total[oper][measure_name] = 0
        return measure_total


    def prediction(self, model, data):
        return model.predict(data)

    def performance_computation(self, Y_test, Y_pred):
        performance_measure_computed = dict()

        for measure_name in self.performal_measure:
            measure_function = self.performal_measure[measure_name]
            measure_value = measure_function(Y_test, Y_pred)
            performance_measure_computed[measure_name] = measure_value
        return performance_measure_computed


class NodeClassification_notClassifierFound(Exception):
      """Exception raised for not classifier type found"""

      def __init__(self, name):
          self.name = name

      def __str__(self):
          return f"{type(self.name)} : type of classifier not found."

class NodeClassification_notAggregationRecognizer(Exception):
      """Exception raised for not classifier type found"""

      def __init__(self, name):
          self.name = name

      def __str__(self):
          return f" Classification not support '{type(self.name)}' group_by. It's accept: 'sum' or 'avg'."

### Node Clustering




In [103]:
import functools as ft
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score, rand_score, davies_bouldin_score, calinski_harabasz_score
from nltk.cluster.util import cosine_distance, euclidean_distance
from nltk.cluster.kmeans import KMeansClusterer

class NodeClustering():

    def __init__(self, data, labels):
        self.data = data
        self.oper_math = ['sum', 'avg']
        self.labels = labels

        self.performal_measure = {
            "rand_score" : ft.partial(rand_score),
            #"davies_bouldin_score" : ft.partial(davies_bouldin_score),
            #"calinski_harabasz_score" : ft.partial(calinski_harabasz_score),
        }

    def classic_clusterizzation(self, repetitions, group_by):
        """
        repetitions : int, number of repetitions of classification
        group_by : array, math operation group by measure
        return : array, each item is a data's split
        """

        classic_centroid_number = len(np.unique(self.labels))

        measures_performance = dict()

        for n_repetition in range(repetitions):
            kmeans = KMeansClusterer(classic_centroid_number, distance=cosine_distance, normalise=True, avoid_empty_clusters=True)
            Y_pred = kmeans.cluster(self.data, assign_clusters=True)
            performance_computation = self.performance_computation(self.labels, Y_pred)
            key_iter = "iter_"+str(n_repetition)
            measures_performance[key_iter] = performance_computation
        measure_total = dict()
        for oper in group_by:
            if oper in self.oper_math:
                measure_total[oper] = dict()
                for measure_name in self.performal_measure:
                    if measure_name not in self.performal_measure:
                        raise NodeClustering_notPerformanceRecognizer(measure_name)
                    else:
                        measure_total[oper][measure_name] = list()
            else:
                raise NodeClustering_notAggregationRecognizer(oper)

        for iteration_measure in measures_performance:
            for measure_name in self.performal_measure:
                value = measures_performance[iteration_measure][measure_name]
                for oper in group_by:
                    measure_total[oper][measure_name].append(value)
        for oper in group_by:
            for measure_name in self.performal_measure:
                if oper == "avg":
                    measure_total[oper][measure_name] = sum(measure_total[oper][measure_name])/len(measure_total[oper][measure_name])
                elif oper == "sum":
                    measure_total[oper][measure_name] = sum(measure_total[oper][measure_name])
                else:
                    measure_total[oper][measure_name] = 0
        return measure_total

    def performance_computation(self, Y_test, Y_pred):
        performance_measure_computed = dict()

        for measure_name in self.performal_measure:
            measure_function = self.performal_measure[measure_name]
            measure_value = measure_function(Y_test, Y_pred)
            performance_measure_computed[measure_name] = measure_value

        return performance_measure_computed

class NodeClustering_notAggregationRecognizer(Exception):
      """Exception raised for not classifier type found"""

      def __init__(self, name):
          self.name = name

      def __str__(self):
          return f" Clustering not support '{type(self.name)}' group_by. It's accept: 'sum' or 'avg'."

class NodeClustering_notPerformanceRecognizer(Exception):
      """Exception raised for not classifier type found"""

      def __init__(self, name):
          self.name = name

      def __str__(self):
          return f" Clustering not support '{type(self.name)}' performance measure."

###  Embedding Visualization

In [104]:
import time
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
%matplotlib inline
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns

class VisualEmbedding():

    def __init__(self, data, labels, reduction=["pca","t-sne"]):
        self.data = data
        self.reduction_methods = reduction
        self.labels = labels

    def embedding_visualization(self,path):
        feat_cols = [ 'att_'+str(i) for i in range(len(self.data[0])) ]
        df_data = pd.DataFrame(self.data,columns=feat_cols)
        df_data['y'] = self.labels
        df_data['label'] = df_data['y'].apply(lambda i: str(i))

        df_visual = df_data.copy()

        #pca-2d
        pca = PCA(n_components=3)
        pca_result = pca.fit_transform(df_data[feat_cols].values)
        df_visual['pca-one'] = pca_result[:,0]
        df_visual['pca-two'] = pca_result[:,1]
        df_visual['pca-three'] = pca_result[:,2]
        print('Explained variation per principal component: {}'.format(pca.explained_variance_ratio_))

        #t-sne-2pca
        time_start = time.time()
        tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300)
        tsne_results = tsne.fit_transform(df_data)
        df_visual['tsne-2d-one'] = tsne_results[:,0]
        df_visual['tsne-2d-two'] = tsne_results[:,1]
        print('t-SNE done! Time elapsed: {} seconds'.format(time.time()-time_start))


        #t-sne-50pca
        pca_50 = PCA(n_components=50)
        pca_result_50 = pca_50.fit_transform(df_data)
        tsne = TSNE(n_components=2, verbose=0, perplexity=40, n_iter=300)
        tsne_pca_results = tsne.fit_transform(pca_result_50)
        df_visual['tsne-pca50-one'] = tsne_pca_results[:,0]
        df_visual['tsne-pca50-two'] = tsne_pca_results[:,1]
        print('t-SNE done! Time elapsed: {} seconds'.format(time.time()-time_start))

        #plot
        plt.figure(figsize=(16,7))
        ax1 = plt.subplot(2, 2, 1)
        sns.scatterplot(
            x="pca-one", y="pca-two",
            hue="y",
            palette=sns.color_palette("hls", 10),
            data=df_visual,
            legend="full",
            alpha=1
        )

        ax2 = plt.subplot(2, 2, 2)
        ax = plt.figure(figsize=(16,10)).gca(projection='3d')
        ax.scatter(
            xs=df_visual["pca-one"],
            ys=df_visual["pca-two"],
            zs=df_visual["pca-three"],
            c=df_visual["y"],
            cmap='tab10'
        )
        ax.set_xlabel('pca-one')
        ax.set_ylabel('pca-two')
        ax.set_zlabel('pca-three')

        ax3 = plt.subplot(2, 2, 3)
        plt.figure(figsize=(16,10))
        sns.scatterplot(
            x="tsne-2d-one", y="tsne-2d-two",
            hue="y",
            palette=sns.color_palette("hls", 10),
            data=df_visual,
            legend="full",
            alpha=1
        )

        ax4 = plt.subplot(2, 2, 4)
        plt.figure(figsize=(16,10))
        sns.scatterplot(
            x="tsne-pca50-one", y="tsne-pca50-two",
            hue="y",
            palette=sns.color_palette("hls", 10),
            data=df_visual,
            legend="full",
            alpha=1
        )
        plt.show()
        return df_data



### Testing performance embedding


In [105]:
import pandas as pd
import matplotlib.pyplot as plt


class PerformanceEmbedding():

    def __init__(self, model, embedding_name='att', node_label='node_label'):
        if not isinstance(model, GraphEModel):
            raise PerformanceEmbedding_notModelClass(model)
        self.embedding = model.get_embedding(phase=embedding_name, type_output="numpy")
        self.labels = list(model.get_embedding(phase=node_label, type_output="numpy"))
        self.group_by = ['avg','sum']
        self.cluster_measure = ['rand_score']
        self.classifier_measure = ['accuracy_score','precision_macro','precision_micro','precision_weighted',
                                   'recall_macro','recall_micro','recall_weighted',
                                   'f1_macro','f1_micro','f1_weighted']

    def visualization(self):
        visualemb = VisualEmbedding(self.embedding,self.labels)
        return visualemb.embedding_visualization(None)

    def classification(self, repetitions = 10, classifier_name = "svm", performance_group_by='avg',labeled_data_threshold=None, measures_selected = None, random_set = True):
        if measures_selected is None:
            measures_selected = self.classifier_measure
        else:
            for meas in measures_selected:
                if meas not in self.classifier_measure:
                    raise PerformanceEmbedding_notMeasureExperiment(meas,'NodeClassification')
        if labeled_data_threshold is None:
            labeled_data_threshold = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]


        n_classify = NodeClassification(self.embedding,self.labels, normalize=True)
        measures = dict()
        for split_threshold in labeled_data_threshold:
            measure = n_classify.classification(classifier_name, split_threshold, repetitions, self.group_by, random_set)
            _key = "split_"+str(split_threshold)
            measures[_key] = measure

        return self.performance_measure(measures,measures_selected,performance_group_by)

    def clusterization(self, repetitions = 10, performance_group_by='avg', measures_selected = None):
        if measures_selected is None:
            measures_selected = self.cluster_measure
        else:
            for meas in measures_selected:
                if meas not in self.cluster_measure:
                    raise PerformanceEmbedding_notMeasureExperiment(meas,'NodeClustering')

        n_clusterfy = NodeClustering(self.embedding,self.labels)
        measures = dict()
        measure = n_clusterfy.classic_clusterizzation(repetitions, self.group_by)
        measures["all"] = measure
        return self.performance_measure(measures,measures_selected,performance_group_by)


    def performance_measure(self, measures, measures_selected, groub_by='avg',):
        pd_measure = pd.DataFrame()
        pd_measure['name_measure'] = measures_selected

        for split_name in measures:
            val_col = list()
            for meas_name in measures_selected:
                if meas_name not in measures[split_name][groub_by]:
                    raise PerformanceEmbedding_notMeasure(meas_name)
                else:
                    val_col.append(measures[split_name][groub_by][meas_name])
            pd_measure[split_name] = val_col
        pd_measure.set_index('name_measure')
        return pd_measure


    def loss_plot(self):
        data_plot_losses = [val.item() for val in DAGE_values['losses']]
        plt.xlabel('Iterations')
        plt.ylabel('Loss')
        plt.plot(data_plot_losses,"b.")



class PerformanceEmbedding_notModelClass(Exception):
      """Exception raised for not classifier type found"""

      def __init__(self, obj):
          self.obj = obj

      def __str__(self):
          return f"Model should be a 'GraphEModel' class object but receive a ''{type(self.obj)} object."

class PerformanceEmbedding_notMeasure(Exception):
      """Exception raised for not classifier type found"""

      def __init__(self, name_measure):
          self.name_measure = name_measure

      def __str__(self):
          return f"Percormance '{self.name_measure}' not recognized."

class PerformanceEmbedding_notMeasureExperiment(Exception):
      """Exception raised for not classifier type found"""

      def __init__(self, measure_name, experiment_name):
          self.measure_name = measure_name
          self.experiment_name = experiment_name

      def __str__(self):
          return f"Experiment '{self.experiment_name}' not implement performance called '{self.measure_name}'."

 ## DEMO on the CORA Dataset

In [106]:
torch.set_printoptions(edgeitems=64)

In [107]:
datase_name = 'cora'
path_models_checkpoint = '/content/models_checkpoint'
Util_class.folder_manage(path_models_checkpoint)
path = '/content/dataset/'
Util_class.folder_manage(path)
!git clone https://github.com/MIND-Lab/CAGE.git
!cp -r /content/CAGE/data/* /content/dataset

fatal: destination path 'CAGE' already exists and is not an empty directory.
'cp' is not recognized as an internal or external command,
operable program or batch file.


### CAGE

#### SETTING

In [108]:
edge_file_name = f"CAGE/data/{datase_name}/in_edges.txt"
attribute_file_name = f"CAGE/data/{datase_name}/in_features.txt"
label_file_name = f"CAGE/data/{datase_name}/in_group.txt"

path_model_checkpoint = f"/content/models_checkpoint/CAGE_{datase_name}"
Util_class.folder_manage(path_model_checkpoint)

is_directed_graph = False
attribute_file_format = "normalized_matrix"
dataLoad = LoadDataset(edge_file_name,attribute_file_name,label_file_name,attribute_file_format=attribute_file_format,is_directed_graph=is_directed_graph)

dataLoad.export_graph(f"/content/models_checkpoint/CAGE_{datase_name}",f"CAGE_{datase_name}_graph")
net_adj = dataLoad.get_structural_matrix()
att_adj = dataLoad.get_attribute_matrix()
label_vec = dataLoad.get_labels()
print(datase_name)

Structure dimension:	 2708
Attribute dimension:	 1433
cora


In [109]:
batch_size = 64
shuffle = True
net_hadmard_coeff = 5.0
att_hadmard_coeff = 5.0

batchGenerator = DataBatchGenerator(net_adj, att_adj, label_vec, batch_size, shuffle, net_hadmard_coeff, att_hadmard_coeff)

#### TRAINING

In [110]:
CAGE_net_layers_list = [
    {'type':'DENSE','features':128,'act_funtion':'RELU','bias':True}
]

CAGE_att_layers_list = [
    {'type':'KCOMP','ktop':200,'alpha_factor': 3.0},
    {'type':'DENSE','features':128,'act_funtion':'RELU','bias':True}
]

CAGE_loss_settings_list = {
    'all' : [
    ],
    'net':[
           {'loss_name':"structur_proximity_1order",'coef':1},
           {'loss_name':"structur_proximity_2order",'coef':1},
    ],
    'att':[
           {'loss_name':"semantic_proximity_2order",'coef':1},
           {'loss_name':"square_diff_embedding_proximity",'coef':1},
    ],
}

CAGE_optimizator_net_settings_list = {
        "opt_name" : "adam",
        "lr_rate" : 1e-4
}

CAGE_optimizator_att_settings_list = {
        "opt_name" : "adam",
        "lr_rate" : 1e-3
}

CAGE_regularization_net_settings_list = [
    {'reg_name': 'L2', 'coeff': 0.001}
]

CAGE_regularization_att_settings_list = [
    {'reg_name': 'L2', 'coeff': 0.001}
]

CAGE_checkpoint_config ={
    "type" : ["best_model_loss","first_train","last_train"],
    "times": 20,
    "overwrite" : False,
    "path_file" : f"/content/models_checkpoint/CAGE_{datase_name}",
    "name_file" : f"CAGE_{datase_name}_checkpoint",
    "path_not_exist": "create",
    "path_exist": "use",
}


CAGE_config = {
    "net_dim" : dataLoad.get_input_shape('net'),
    "net_layers_list" : CAGE_net_layers_list,
    "net_latent_dim" : 128,


    "att_dim" : dataLoad.get_input_shape('att'),
    "att_layers_list" : CAGE_att_layers_list,
    "att_latent_dim" : 128,

   "loss_functions" : CAGE_loss_settings_list,

   "optimizator_net" : CAGE_optimizator_net_settings_list,
   "optimizator_att" : CAGE_optimizator_att_settings_list,

   "regularization_net" : CAGE_regularization_net_settings_list,
   "regularization_att" : CAGE_regularization_att_settings_list,

   "checkpoint_config" : CAGE_checkpoint_config,
   "model_name" : "CAGE_wiki_opt1",
   "training_config" : "N>A",
}

In [111]:
CAGE_model = GraphEModel(CAGE_config)
CAGE_epochs_config = {
    'att' : 10,
    'net' : 10,
}

DAGE_values = CAGE_model.models_training(datagenerator=batchGenerator, loss_verbose=False,epochs=CAGE_epochs_config, path_embedding=f"/content/models_checkpoint/CAGE_{datase_name}/")

Your model's checkpoint is save in : /content/models_checkpoint/CAGE_cora
=
Phase :  net 	Epoch :  1 / 10 	Loss :  8983.334708280341 	lr net:  0.0001 	lr att:  0.001
Epoch :  1 / 10 	Loss :  8983.334708280341 	model checkpoint saved as: /content/models_checkpoint/CAGE_cora/CAGE_cora_checkpoint_phasenet_epoch_1.carbo
==
Phase :  net 	Epoch :  2 / 10 	Loss :  8908.606309047966 	lr net:  0.0001 	lr att:  0.001
=
Phase :  net 	Epoch :  3 / 10 	Loss :  8750.491199582122 	lr net:  0.0001 	lr att:  0.001
==
Phase :  net 	Epoch :  4 / 10 	Loss :  8447.238644622093 	lr net:  0.0001 	lr att:  0.001
=
Phase :  net 	Epoch :  5 / 10 	Loss :  8001.613715593205 	lr net:  0.0001 	lr att:  0.001
==
Phase :  net 	Epoch :  6 / 10 	Loss :  7443.724010378815 	lr net:  0.0001 	lr att:  0.001
=
Phase :  net 	Epoch :  7 / 10 	Loss :  6833.792168195858 	lr net:  0.0001 	lr att:  0.001
==
Phase :  net 	Epoch :  8 / 10 	Loss :  6229.178480991098 	lr net:  0.0001 	lr att:  0.001
=
Phase :  net 	Epoch :  9 / 10 	L

#### TESTING

In [112]:
CAGE_performances = PerformanceEmbedding(CAGE_model,embedding_name='att')

CAGE_class = CAGE_performances.classification(repetitions=10)
CAGE_class

Unnamed: 0,name_measure,split_0.1,split_0.2,split_0.3,split_0.4,split_0.5,split_0.6,split_0.7,split_0.8,split_0.9
0,accuracy_score,0.721402,0.724354,0.724846,0.723339,0.714623,0.708308,0.69884,0.681726,0.63388
1,precision_macro,0.724324,0.719639,0.718935,0.719428,0.703903,0.704677,0.698466,0.687678,0.651061
2,precision_micro,0.721402,0.724354,0.724846,0.723339,0.714623,0.708308,0.69884,0.681726,0.63388
3,precision_weighted,0.724554,0.725328,0.72528,0.723918,0.713598,0.708798,0.701407,0.684808,0.641854
4,recall_macro,0.687765,0.68954,0.691238,0.683233,0.677938,0.667241,0.656423,0.630675,0.56727
5,recall_micro,0.721402,0.724354,0.724846,0.723339,0.714623,0.708308,0.69884,0.681726,0.63388
6,recall_weighted,0.721402,0.724354,0.724846,0.723339,0.714623,0.708308,0.69884,0.681726,0.63388
7,f1_macro,0.699853,0.700612,0.701253,0.696633,0.687861,0.681736,0.671458,0.648996,0.588295
8,f1_micro,0.721402,0.724354,0.724846,0.723339,0.714623,0.708308,0.69884,0.681726,0.63388
9,f1_weighted,0.717588,0.721745,0.721747,0.720076,0.711854,0.704964,0.6957,0.676483,0.622907


In [113]:
CAGE_clust = CAGE_performances.clusterization(repetitions=10)
CAGE_clust

Unnamed: 0,name_measure,all
0,rand_score,0.79252


In [114]:
# assume your DataFrame is called pd_measure
CAGE_class.to_csv("cage_performance.csv", index=False)