# **utils.py**

In [None]:
import os
import time
import logging

import re
import yaml
import json

from ast import literal_eval
from nltk import sent_tokenize
from string import punctuation
from collections import Counter

import torch
import numpy as np
import pandas as pd
import tensorflow as tf

import matplotlib.pyplot as plt

In [None]:
os.environ['JOBLIB_TEMP_FOLDER'] = '/tmp'

In [None]:
def torch2np(tensor: torch.Tensor) -> np.array:
    if torch.cuda.is_available():
        tensor = tensor.cpu()
    return tensor.numpy()


def count_parameter(trainable_variables):
    total_parameters = np.sum(
        [np.prod(var.get_shape().as_list()) for var in trainable_variables]
    )
    return (total_parameters)


def min_max_normal(tensor):
    dim = tf.shape(tensor)[-1]
    max_value = tf.reduce_max(tensor, -1, keepdims=True)
    max_value = tf.tile(max_value, [1, 1, dim])
    min_value = tf.reduce_min(tensor, -1, keepdims=True)
    min_value = tf.tile(min_value, [1, 1, dim])
    norm_tensor = (tensor-min_value) / (max_value-min_value+1e-6)
    return norm_tensor


def z_score_normal(tensor):
    dim = tf.shape(tensor)[-1]
    axes = [2]
    mean, variance = tf.nn.moments(tensor, axes, keep_dims=True)
    std = tf.sqrt(variance)
    mean = tf.tile(mean, [1, 1, dim])
    std = tf.tile(std, [1, 1, dim])
    norm_tensor = (tensor - mean) / (std + 1e-6)
    return norm_tensor


def plot_history(history, figsize=(6, 9), return_figure: bool=True, **kwargs):
    """
    Plot the training history of one or more models.
    This creates a column of plots, with one plot for each metric recorded during training, with the
    plot showing the metric vs. epoch. If multiple models have been trained (that is, a list of
    histories is passed in), each metric plot includes multiple train and validation series.
    Validation data is optional (it is detected by metrics with names starting with ``val_``).
    
    Args:
        history: the training history, as returned by :meth:`tf.keras.Model.fit`
        individual_figsize (tuple of numbers): the size of the plot for each metric
        return_figure (bool): if True, then the figure object with the plots is returned, None otherwise.
        kwargs: additional arguments to pass to :meth:`matplotlib.pyplot.subplots`
    
    Returns:
        :class:`matplotlib.figure.Figure`: The figure object with the plots if ``return_figure=True``, None otherwise
    
    Reference:
        https://github.com/stellargraph/stellargraph/blob/develop/stellargraph/utils/history.py
    """

    # explicit colours are needed if there's multiple train or multiple validation series, because
    # each train series should have the same color. This uses the global matplotlib defaults that
    # would be used for a single train and validation series.
    colors = plt.rcParams["axes.prop_cycle"].by_key()["color"]
    color_train = colors[0]
    color_validation = colors[1]

    if not isinstance(history, list):
        history = [history]

    def remove_prefix(text, prefix):
        return text[text.startswith(prefix) and len(prefix) :]

    metrics = sorted({remove_prefix(m, "val_") for m in history[0].history.keys()})

    height, width = figsize
    overall_figsize = (width, len(metrics)*height)

    # plot each metric in a column, so that epochs are aligned (squeeze=False, so we don't have to
    # special case len(metrics) == 1 in the zip)
    fig, all_axes = plt.subplots(
        len(metrics), 1, squeeze=False, sharex="col", figsize=overall_figsize, **kwargs
    )

    has_validation = False
    for ax, m in zip(all_axes[:,0], metrics):
        for h in history:
            # summarize history for metric m
            ax.plot(h.history[m], c=color_train)

            try:
                val = h.history["val_" + m]
            except KeyError:
                # no validation data for this metric
                pass
            else:
                ax.plot(val, c=color_validation)
                has_validation = True

        ax.set_ylabel(m, fontsize="x-large")

    # don't be redundant: only include legend on the top plot
    labels = ["train"]
    if has_validation:
        labels.append("validation")
    all_axes[0, 0].legend(labels, loc="best", fontsize="x-large")

    # ... and only label "epoch" on the bottom
    all_axes[-1, 0].set_xlabel("epoch", fontsize="x-large")

    # minimise whitespace
    fig.tight_layout()

    if return_figure:
        return fig


def log_summary(model, line_length=None, positions=None):
    """
    Log a summary of a model.
    
    Args:
        model: Keras model instance.
        line_length: Total length of printed lines
            (e.g. set this to adapt the display to different
            terminal window sizes).
        positions: Relative or absolute positions of log elements in each line.
            If not provided, defaults to `[.33, .55, .67, 1.]`.
    
    Return: text of model summary
    """

    def log_row(fields, positions):
        line = ''
        for i in range(len(fields)):
            if i > 0:
                line = line[:-1] + ' '
            line += str(fields[i])
            line = line[:positions[i]]
            line += ' ' * (positions[i]-len(line))
        return line+'\n'

    def log_layer_summary(layer):
        """
        Log a summary for a single layer.
        Args:
            layer: target layer.
        """
        try:
            output_shape = layer.output_shape
        except AttributeError:
            output_shape = 'multiple'
        except RuntimeError:  # output_shape unknown in Eager mode.
            output_shape = '?'
        name = layer.name
        cls_name = layer.__class__.__name__
        if not layer.built and not getattr(layer, '_is_graph_network', False):
            # If a subclassed model has a layer that is not called in Model.call, the
            # layer will not be built and we cannot call layer.count_params().
            params = '0 (unused)'
        else:
            params = layer.count_params()
        fields = [name + ' (' + cls_name + ')', output_shape, params]
        return log_row(fields, positions)

    def log_layer_summary_with_connections(layer):
        """
        Log a summary for a single layer (including topological connections).
        Args:
            layer: target layer.
        """
        summary = ''
        try:
            output_shape = layer.output_shape
        except AttributeError:
            output_shape = 'multiple'
        connections = []
        for node in layer._inbound_nodes:
            if relevant_nodes and node not in relevant_nodes:
                # node is not part of the current network
                continue

            for inbound_layer, node_index, tensor_index, _ in node.iterate_inbound():
                connections.append(f'{inbound_layer.name}[{node_index}][{tensor_index}]')

        name = layer.name
        cls_name = layer.__class__.__name__
        if not connections:
            first_connection = ''
        else:
            first_connection = connections[0]
        fields = [
            name + ' (' + cls_name + ')', output_shape, layer.count_params(), first_connection
        ]
        summary += log_row(fields, positions)
        if len(connections) > 1:
            for i in range(1, len(connections)):
                fields = ['', '', '', connections[i]]
                summary += log_row(fields, positions)
        return summary

    if model.__class__.__name__ == 'Sequential':
        sequential_like = True
    elif not model._is_graph_network:
        # We treat subclassed models as a simple sequence of layers, for logging purposes.
        sequential_like = True
    else:
        sequential_like = True
        nodes_by_depth = model._nodes_by_depth.values()
        nodes = []
        for v in nodes_by_depth:
            if (len(v) > 1) or \
                (len(v) == 1 and len(tf.nest.flatten(v[0].keras_inputs)) > 1):
                # if the model has multiple nodes or if the nodes have multiple inbound_layers,
                # the model is no longer sequential
                sequential_like = False
                break
            nodes += v
        if sequential_like:
            # search for shared layers
            for layer in model.layers:
                flag = False
                for node in layer._inbound_nodes:
                    if node in nodes:
                        if flag:
                            sequential_like = False
                            break
                        else:
                            flag = True
                if not sequential_like:
                    break

    if sequential_like:
        line_length = line_length or 65
        positions = positions or [.45, .85, 1.]
        if positions[-1] <= 1:
            positions = [int(line_length * p) for p in positions]
        # header names for the different log elements
        to_display = ['Layer (type)', 'Output Shape', 'Param #']
    else:
        line_length = line_length or 98
        positions = positions or [.33, .55, .67, 1.]
        if positions[-1] <= 1:
            positions = [int(line_length * p) for p in positions]
        # header names for the different log elements
        to_display = ['Layer (type)', 'Output Shape', 'Param #', 'Connected to']
        relevant_nodes = []
        for v in model._nodes_by_depth.values():
            relevant_nodes += v

    summary = f'Model: "{model.name}"\n'
    summary += '_' * line_length + '\n'
    summary += log_row(to_display, positions)
    summary += '=' * line_length + '\n'
    
    layers = model.layers
    for i in range(len(layers)):
        if sequential_like:
            summary += log_layer_summary(layers[i])
        else:
            summary += log_layer_summary_with_connections(layers[i])
        if i == len(layers) - 1:
            summary += '=' * line_length + '\n'
        else:
            summary += '_' * line_length + '\n'

    if hasattr(model, '_collected_trainable_weights'):
        trainable_count = count_params(model._collected_trainable_weights)
    else:
        trainable_count = count_params(model.trainable_weights)

    non_trainable_count = count_params(model.non_trainable_weights)

    summary += 'Total params: {:,}\n'.format(trainable_count + non_trainable_count)
    summary += 'Trainable params: {:,}\n'.format(trainable_count)
    summary += 'Non-trainable params: {:,}\n'.format(non_trainable_count)
    summary += '_' * line_length + '\n'
    return summary


def count_params(weights):
    """
    Count the total number of scalars composing the weights.
    Args:
        weights: An iterable containing the weights on which to compute params
    Returns:
        The total number of scalars composing the weights
    """
    unique_weights = {id(w): w for w in weights}.values()
    weight_shapes = [w.shape.as_list() for w in unique_weights]
    standardized_weight_shapes = [
        [0 if w_i is None else w_i for w_i in w] for w in weight_shapes
    ]
    return int(sum(np.prod(p) for p in standardized_weight_shapes))

# **layers.py**

In [None]:
import numpy as np
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import (
    Input, InputLayer, Layer, Embedding, 
    Conv1D, Conv2D, Dropout, Dense, 
    Dot, Concatenate, Average, Add,
    Lambda, Reshape, 
    Softmax, Maximum, Minimum,
)
from tensorflow.keras.activations import softmax, sigmoid, relu

In [None]:
def _bernoulli(shape, mean):
    return tf.nn.relu(
        tf.sign(mean-tf.random.uniform(shape, minval=0, maxval=1, dtype=tf.float32))
    )


def to_float(tensor):
    return tf.cast(tensor, tf.float32)


def gelu(x):
    """Gaussian Error Linear Unit.
    This is a smoother version of the RELU.
    Original paper: https://arxiv.org/abs/1606.08415
    refer : https://github.com/google-research/bert/blob/bee6030e31e42a9394ac567da170a89a98d2062f/modeling.py#L264
    Args:
        x: float Tensor to perform activation.
    Returns:
        `x` with the GELU activation applied.
    """
    cdf = 0.5 * (1.0 + tf.math.tanh(
        (np.sqrt(2 / np.pi) * (x + 0.044715 * tf.math.pow(x, 3)))))
    return x * cdf


################################## 
#           Keras Layer          #
##################################

class DropBlock2D(Layer):
    
    # Adopted from https://github.com/DHZS/tf-dropblock 

    def __init__(self, keep_prob: float=1.0, block_size: int=1, scale: bool=True, **kwargs):
        super(DropBlock2D, self).__init__(**kwargs)
        self.keep_prob = keep_prob
        self.block_size = block_size
        self.scale = tf.constant(scale, dtype=tf.bool)

    def compute_output_shape(self, input_shape):
        return input_shape

    def build(self, input_shape):
        assert len(input_shape) == 4
        _, self.h, self.w, self.channel = input_shape.as_list()
        # pad the mask
        p1 = (self.block_size-1) // 2
        p0 = (self.block_size-1) - p1
        self.padding = [[0, 0], [p0, p1], [p0, p1], [0, 0]]
        self.set_keep_prob()
        super(DropBlock2D, self).build(input_shape)

    def _create_mask(self, input_shape):
        sampling_mask_shape = tf.stack([input_shape[0],
                                        self.h-self.block_size+1,
                                        self.w-self.block_size+1,
                                        self.channel])
        mask = _bernoulli(sampling_mask_shape, self.gamma)
        mask = tf.pad(mask, self.padding)
        mask = tf.nn.max_pool(mask, [1, self.block_size, self.block_size, 1], [1, 1, 1, 1], padding='SAME')
        mask = 1 - mask
        return mask

    def call(self, inputs, training=None, **kwargs):
        def drop():
            mask = self._create_mask(tf.shape(inputs))
            mask_size = to_float(tf.size(mask))
            inputs_masked = inputs * mask
            output = tf.cond(self.scale,
                             true_fn=lambda: inputs_masked*mask_size/tf.reduce_sum(mask),
                             false_fn=lambda: inputs_masked)
            return output

        if training is None:
            training = K.learning_phase()
            print(self.name, training)
        output = tf.cond(
            tf.logical_or(tf.logical_not(bool(training)), 
                          tf.equal(self.keep_prob, 1.0)), 
            true_fn=lambda: inputs, 
            false_fn=drop
        )
        return output

    def set_keep_prob(self, keep_prob=None):
        """This method only supports Eager Execution"""
        if keep_prob is not None:
            self.keep_prob = keep_prob
        w, h = to_float(self.w), to_float(self.h)
        dropout_rate = 1. - self.keep_prob
        self.gamma = dropout_rate*(w*h) / (self.block_size**2) / ((w-self.block_size+1)*(h-self.block_size+1))


class L2Norm(Layer):
    def __init__(self, axis=-1, **kwargs):
        super(L2Norm, self).__init__(**kwargs)
        self.axis = axis
        self.supports_masking = True

    def call(self, inputs, mask=None):
        # return K.l2_normalize(inputs, axis=self.axis)
        return tf.math.l2_normalize(inputs, axis=self.axis)


class SoftMask2D(Layer):
    def __init__(self, scale: bool=False, **kwargs):
        super(SoftMask2D, self).__init__(**kwargs)
        self.scale = scale
        self.supports_masking = True

    def call(self, inputs):
        x, mask = inputs
        if self.scale:
            dim = tf.shape(x)[-1]
            max_x = tf.math.reduce_max(x, axis=-1, keepdims=True, name='max_x')
            max_x = tf.tile(max_x, [1, 1, dim], name='max_x_tiled')
            x = tf.math.subtract(x, max_x, name='x_scaled')
        length = tf.shape(mask)[1]
        mask_d1 = tf.tile(tf.expand_dims(mask, axis=1), [1, length, 1], name='mask_d1')
        y = tf.math.multiply(tf.exp(x), mask_d1, name='y')
        sum_y = tf.math.reduce_sum(y, axis=-1, keepdims=True, name='sum_y')
        att = tf.math.divide(y, sum_y+K.epsilon(), name='att')

        mask_d2 = tf.tile(tf.expand_dims(mask, axis=2), [1, 1, length], name='mask_d2')
        att = tf.math.multiply(att, mask_d2, name='att_masked')
        return att


class ExpandDim(Layer):
    def __init__(self, axis=-1, **kwargs):
        super(ExpandDim, self).__init__(**kwargs)
        self.axis = axis
        self.supports_masking = False

    def call(self, inputs): 
        return tf.expand_dims(inputs, axis=self.axis)


class Squeeze(Layer):
    def __init__(self, axis=-1, **kwargs):
        super(Squeeze, self).__init__(**kwargs)
        self.axis = axis
        self.supports_masking = False

    def call(self, inputs):
        return tf.squeeze(inputs, axis=self.axis)


class ReduceDim(Layer):
    def __init__(self, method: str='mean', axis=None, **kwargs):
        super(ReduceDim, self).__init__(**kwargs)
        self.axis = axis
        self.method = method.lower()
        self.supports_masking = False

    def call(self, inputs):
        if self.method == 'sum':
            return tf.math.reduce_sum(inputs, axis=self.axis)
        elif self.method == 'mean':
            return tf.math.reduce_mean(inputs, axis=self.axis)
        elif self.method == 'max':
            return tf.math.reduce_max(inputs, axis=self.axis)
        elif self.method == 'min':
            return tf.math.reduce_min(inputs, axis=self.axis)
        elif self.method == 'std':
            return tf.math.reduce_std(inputs, axis=self.axis)
        elif self.method == 'variance':
            return tf.math.reduce_variance(inputs, axis=self.axis)
        else:
            raise ValueError(f'method={self.method} has been implemented yet!')


class MatMul(Layer):
    def __init__(self, adjoint_a=False, adjoint_b=False, 
                    transpose_a=False, transpose_b=False,     
                    a_is_sparse=False, b_is_sparse=False, **kwargs):
        super(MatMul, self).__init__(**kwargs)
        self.adjoint_a, self.adjoint_b = adjoint_a, adjoint_b
        self.transpose_a, self.transpose_b = transpose_a, transpose_b
        self.a_is_sparse, self.b_is_sparse = a_is_sparse, b_is_sparse        
        self.supports_masking = False

    def call(self, inputs):
        args = {
            'a': inputs[0], 'b': inputs[1],
            'adjoint_a': self.adjoint_a, 'adjoint_b': self.adjoint_b,
            'transpose_a': self.transpose_a, 'transpose_b': self.transpose_b,
            'a_is_sparse': self.a_is_sparse, 'b_is_sparse': self.b_is_sparse,
        }
        return tf.linalg.matmul(**args)


class RACL_Block(Model):
    
    def __init__(self, opt, Normalizer, DropBlocks, TileBlock,
                conv_args: dict, dense_args: dict, block_id: int, **kwargs):
        super(RACL_Block, self).__init__(**kwargs)
        self._name = f'RACL_Block_{block_id}'
        self.opt = opt
        self.block_id = block_id

        self.Tile = TileBlock
        self.Normalizer = Normalizer
        self.DropBlock_aspect, self.DropBlock_opinion, self.DropBlock_context = DropBlocks

        self.Aspect_Extractor = Conv1D(filters=self.opt.n_filters, name=f'Aspect_Conv-{block_id}', **conv_args)
        self.Opinion_Extractor = Conv1D(filters=self.opt.n_filters, name=f'Opinion_Conv-{block_id}', **conv_args)
        self.Context_Extractor = Conv1D(filters=self.opt.embedding_dim, name=f'Context_Conv-{block_id}', **conv_args)

        self.Aspect_Classifier = Dense(name=f'Aspect_Classifier-{block_id}', **dense_args)
        self.Opinion_Classifier = Dense(name=f'Opinion_Classifier-{block_id}', **dense_args)
        self.Sentiment_Classifier = Dense(name=f'Sentiment_Classifier-{block_id}', **dense_args)

    def call(self, inputs):
        aspect_input, opinion_input, context_input, context_query, word_mask, position_att = inputs
        i = self.block_id
        
        # Extract Private Features for each task
        aspect_conv = self.Aspect_Extractor(aspect_input)
        opinion_conv = self.Opinion_Extractor(opinion_input)
        context_conv = self.Context_Extractor(context_input)

        # Normalize
        aspect_conv_norm = self.Normalizer(aspect_conv)
        opinion_conv_norm = self.Normalizer(opinion_conv)
        context_conv_norm = self.Normalizer(context_conv)

        # Relation R1
        aspect_see_opinion = MatMul(adjoint_b=True, name=f'aspect_see_opinion-{i}')([aspect_conv_norm, opinion_conv_norm])
        aspect_attend_opinion = SoftMask2D(name=f'aspect_attend_opinion-{i}')([aspect_see_opinion, word_mask])
        aspect_weigh_opinion = MatMul(name=f'aspect_weigh_opinion-{i}')([aspect_attend_opinion, opinion_conv])
        aspect_interact = Concatenate(axis=-1, name=f'aspect_interact-{i}')([aspect_conv, aspect_weigh_opinion])

        opinion_see_aspect = MatMul(adjoint_b=True, name=f'opinion_see_aspect-{i}')([opinion_conv_norm, aspect_conv_norm])
        opinion_attend_aspect = SoftMask2D(name=f'opinion_attend_aspect-{i}')([opinion_see_aspect, word_mask])
        opinion_weigh_aspect = MatMul(name=f'opinion_weigh_aspect-{i}')([opinion_attend_aspect, aspect_conv])
        opinion_interact = Concatenate(axis=-1, name=f'opinion_interact-{i}')([opinion_conv, opinion_weigh_aspect])

        # AE & OE Prediction
        aspect_pred = self.Aspect_Classifier(aspect_interact)
        opinion_pred = self.Opinion_Classifier(opinion_interact)

        # OE Confidence - a slight difference from the original paper.
        # For propagating R3, we calculate the confidence of each candidate opinion word.
        # Only when a word satisfies the condition Prob[B,I] > Prob[O] in OE, it can be propagated to SC.
        opinion_condition = Lambda(lambda x: 1-2.*tf.nn.softmax(x, axis=-1)[:,:,0], name=f'opinion_condition-{i}')(opinion_pred)
        opinion_confidence = Lambda(lambda x: tf.math.maximum(0., x), name=f'opinion_confidence-{i}')(opinion_condition)
        mask = self.Tile(word_mask)
        opinion_propagated = self.Tile(opinion_confidence)
        opinion_propagated = MatMul(name=f'opinion_propagated_masked-{i}')([opinion_propagated, mask])
        opinion_propagated = MatMul(name=f'opinion_propagated-{i}')([opinion_propagated, position_att])

        # SC Aspect-Context Attention
        word_see_context = MatMul(adjoint_b=True, name=f'word_see_context-{i}')([(context_query), context_conv_norm])
        word_see_context = MatMul(name=f'word_see_context_masked-{i}')([word_see_context, position_att])
        word_attend_context = SoftMask2D(scale=True, name=f'word_attend_context-{i}')([word_see_context, word_mask])

        # Relation R2 & R3
        word_attend_context += aspect_attend_opinion + opinion_propagated
        word_weigh_context = MatMul(name=f'word_weigh_context-{i}')([word_attend_context, context_conv])
        context_interact = context_query + word_weigh_context

        # SC Prediction
        sentiment_pred = self.Sentiment_Classifier(context_interact)

        # We use DropBlock to enhance the learning of the private features for AE, OE & SC.
        # For more details, refer to 
        #   http://papers.nips.cc/paper/8271-dropblock-a-regularization-method-for-convolutional-networks for more details.
        aspect_interact = ExpandDim(axis=-1)(aspect_interact)
        aspect_interact = self.DropBlock_aspect(aspect_interact, self.opt.is_training)
        aspect_interact = Squeeze(axis=-1)(aspect_interact)

        opinion_interact = ExpandDim(axis=-1)(opinion_interact)
        opinion_interact = self.DropBlock_opinion(opinion_interact, self.opt.is_training)
        opinion_interact = Squeeze(axis=-1)(opinion_interact)
        
        context_conv = ExpandDim(axis=-1)(context_conv)
        context_conv = self.DropBlock_context(context_conv, self.opt.is_training)
        context_conv = Squeeze(axis=-1)(context_conv)

        return [(aspect_pred, opinion_pred, sentiment_pred), 
                (aspect_interact, opinion_interact, context_interact, context_conv)]


def dropoutize_embeddings(opt, layer_name: str='embeddings_dropout', model_name: str='dropoutize_embeddings'):
    model = Sequential(name=model_name)
    model.add(Dropout(rate=1-opt.keep_prob_1, 
                      input_shape=(opt.max_sentence_len, opt.embedding_dim), 
                      seed=opt.random_seed,
                      name=layer_name))
    word_inputs, word_embeddings = model.inputs, model.outputs
    return word_inputs[0], word_embeddings[0]


def create_embeddings(inputs, opt, embedding_dim: int, layer_prefix: str='', pretrained_embeddings=None):
    embedding_args = {
        'input_dim': opt.vocab_size+1, # MUST: +1
        'output_dim': embedding_dim,
        'name': layer_prefix+'_embeddings',
        'trainable': False # finetune only happens after warm-up
    }
    if pretrained_embeddings is not None \
        and pretrained_embeddings.shape==(embedding_args['input_dim'], embedding_args['output_dim']):
        embedding_args['weights'] = [pretrained_embeddings]
    embeddings = Embedding(**embedding_args)(inputs)
    embeddings = Dropout(1-opt.keep_prob_1, name=layer_prefix+'_embeddings_dropout')(embeddings)
    return embeddings

# **losses.py**

In [None]:
import numpy as np
import tensorflow as tf
import tensorflow.keras.backend as K

In [None]:
def crossentropy(preds, labels, weights=None, name='loss'):
    
    with tf.name_scope(name) as scope:
        loss = tf.nn.softmax_cross_entropy_with_logits(logits=preds, labels=labels, name='unweighted_loss')

        if weights is None:
            loss = tf.reduce_mean(loss, name='final_loss')
            return loss

        class_weights = tf.constant(weights, name='class_weights')
        sample_weights = tf.reduce_sum(class_weights*labels, axis=1, name='sample_weights')
        loss = tf.reduce_mean(sample_weights*loss, name='weighted_loss')
        return loss


def RACL_losses(y_true, y_pred, masks, opt):
    ae_label, oe_label, sc_label = y_true
    ae_pred, oe_pred, sc_pred = y_pred
    word_mask, sentiment_mask = masks

    # Format predictions
    ae_pred = tf.cast(ae_pred, tf.float32, name='ae_pred')
    oe_pred = tf.cast(oe_pred, tf.float32, name='oe_pred')
    sc_pred = tf.cast(sc_pred, tf.float32, name='sc_pred')
    word_mask = tf.cast(word_mask, tf.float32, name='word_mask')
    sentiment_mask = tf.cast(sentiment_mask, tf.float32, name='sentiment_mask')

    # Convert values to probabilities
    ae_prob = tf.nn.softmax(ae_pred, axis=-1, name='ae_prob')
    oe_prob = tf.nn.softmax(oe_pred, axis=-1, name='oe_prob')
    # sc_prob = tf.nn.softmax(sc_pred, axis=-1, name='sc_prob')

    # Define shapes
    batch_size = ae_pred.shape[0]
    output_shape = [-1, opt.n_classes]
    mask_shape = [1, 1, opt.n_classes]

    # Mask AE, OE, SC Predictions
    word_mask = tf.tile(tf.expand_dims(word_mask, axis=-1), mask_shape)
    ae_pred = tf.reshape(word_mask*ae_pred, output_shape, name='ae_pred_masked')
    oe_pred = tf.reshape(word_mask*oe_pred, output_shape, name='oe_pred_masked')

    sentiment_mask = tf.tile(tf.expand_dims(sentiment_mask, axis=-1), mask_shape)
    sc_pred = tf.reshape(sentiment_mask*sc_pred, output_shape, name='sc_pred_masked')

    # Relation R4 (only in Training)
    # In training / validation, sentiment masks are set to 1.0 only for aspect terms.
    # In testing, sentiment masks are set to 1.0 for all words (except padded ones).

    # Format Labels
    ae_label = tf.cast(ae_label, tf.float32, name='ae_label')
    oe_label = tf.cast(oe_label, tf.float32, name='oe_label')
    sc_label = tf.cast(sc_label, tf.float32, name='sc_label')
    ae_label = tf.reshape(ae_label, output_shape, name='ae_label_flat')
    oe_label = tf.reshape(oe_label, output_shape, name='oe_label_flat')
    sc_label = tf.reshape(sc_label, output_shape, name='sc_label_flat')

    # AE & OE Regularization cost - only get Beginning [1] and Inside [2] values
    ae_cost = tf.reduce_sum(ae_prob[:,:,1:], axis=-1, name='ae_cost')
    oe_cost = tf.reduce_sum(oe_prob[:,:,1:], axis=-1, name='oe_cost')
    total_cost = ae_cost + oe_cost - 1.
    total_cost = tf.maximum(0., total_cost, name='total_cost')
    reg_cost = tf.reduce_sum(total_cost) / tf.reduce_sum(word_mask)
    reg_cost = tf.identity(reg_cost, name='regularization_cost') 

    # Weighted SoftMax Categorical Cross-Entropy for AE, OE, SC
    ae_loss = crossentropy(ae_pred, ae_label, opt.term_weights, name='aspect') 
    oe_loss = crossentropy(oe_pred, oe_label, opt.term_weights, name='opinion') 
    sc_loss = crossentropy(sc_pred, sc_label, opt.polarity_weights, name='sentiment') 

    loss = opt.aspect_weight * ae_loss + \
           opt.opinion_weight * oe_loss + \
           opt.sentiment_weight * sc_loss + \
           opt.regularization_weight * reg_cost
    loss = tf.identity(loss, name='overall_loss')
    return loss, ae_loss, oe_loss, sc_loss, reg_cost

# **metrics.py**

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.metrics import Metric
from tensorflow.keras.callbacks import Callback

In [None]:
epsilon = K.epsilon()

term_code = {
    'begin': 1, 
    'inside': 2, 
    'outside': 0, 
}

idx2polarity = {
    0: 'background', 
    1: 'positive', 
    2: 'negative', 
    3: 'neutral', 
    4: 'conflict', 
}

polarity2idx = {
    v: k for k,v in idx2polarity.items()
}


def mask_absa(y_aspect, y_sentiment, mask) -> (list, list):
    # Mask background words and conflict-sentiment words
    #       for not to count in evaluation
    Ys_aspect, Ys_sentiment = [], []
    for seq_aspect, seq_sentiment, seq_mask in zip(y_aspect, y_sentiment, mask):
        labels_aspect, labels_sentiment = [], []
        for l_a, l_s, m in zip(seq_aspect, seq_sentiment, seq_mask):
            if m == 0:
                break
            labels_aspect.append(np.argmax(l_a))
            if not np.any(l_s):
                # all 0s means background or conflict-sentiment word 
                #       -> be not counted for evaluation
                labels_sentiment.append(0)
            else:
                labels_sentiment.append(np.argmax(l_s)+1)

        Ys_aspect.append(labels_aspect)
        Ys_sentiment.append(labels_sentiment)
    return Ys_aspect, Ys_sentiment


def score_absa_single_sample(t_true, t_pred, s_true=[], s_pred=[], counters: dict={}, term_only: bool=False):
    n_relevants, n_corrects, n_predicteds = 0, 0, 0
    n_words = len(t_true)
    for j in range(n_words):
        if t_true[j] == term_code['begin']:
            n_relevants += 1
            if not term_only:
                if s_true[j] != polarity2idx['background']:
                    counters['overall'][idx2polarity[s_true[j]]] += 1
            
            if t_pred[j] == term_code['begin']:
                matching = True
                for k in range(j+1, len(t_true)):
                    if t_true[k] == term_code['inside'] and t_pred[k] == term_code['inside']:
                        continue
                    elif t_true[k] != term_code['inside'] and t_pred[k] != term_code['inside']:
                        break
                    else:
                        matching = False
                        break

                if matching:
                    n_corrects += 1
                    if not term_only:
                        if s_true[j] != polarity2idx['background']: 
                            counters['gold'][idx2polarity[s_true[j]]] += 1
                            counters['pred'][idx2polarity[s_pred[j]]] += 1
                            if s_true[j] == s_pred[j]:
                                counters['correct'][idx2polarity[s_pred[j]]] += 1
                        else:
                            counters['pred']['conflict'] += 1

    for t_p in t_pred:
        if t_p == term_code['begin']:
            n_predicteds += 1
    
    if term_only:
        return n_relevants, n_corrects, n_predicteds
    return [n_relevants, n_corrects, n_predicteds], counters


def score_absa(terms_true, terms_pred,
               sentiments_true: list=[], sentiments_pred: list=[],
               method: str='micro', term_only: bool=False):
    # Define useful variables
    if not term_only:
        # Sentiment Distribution for Aspect / Opinion Terms:
        #          pred_count: predicted results that are correctly extracted
        #          gold_count: gold results that are correctly extracted
        #       correct_count: results that get both span & prediction correctly
        #       overall_count: ground-truth
        counters = {
            'gold': {'positive': 0, 'negative': 0, 'neutral': 0, }, 
            'pred': {'positive': 0, 'negative': 0, 'neutral': 0, 'conflict': 0}, 
            'correct': {'positive': 0, 'negative': 0, 'neutral': 0, }, 
            'overall': {'positive': 0, 'negative': 0, 'neutral': 0, }
        }

    # Do statistics
    n_corrects, n_predicteds, n_relevants = 0, 0, 0
    n_samples = len(terms_true)
    for i in range(n_samples):
        t_true, t_pred = terms_true[i], terms_pred[i]

        if term_only:
            sample_relevants, sample_corrects, sample_predicteds = score_absa_single_sample(t_true, t_pred, term_only=term_only)
        else:
            s_true, s_pred = sentiments_true[i], sentiments_pred[i]
            [sample_relevants, sample_corrects, sample_predicteds], \
                counters = score_absa_single_sample(t_true, t_pred, s_true, s_pred, counters, term_only)

        n_corrects += sample_corrects
        n_relevants += sample_relevants
        n_predicteds += sample_predicteds

    # Calculate evaluation metrics for Term (of Aspect or Opinion)
    term_P = n_corrects / (n_predicteds+epsilon) # precision
    term_R = n_corrects / (n_relevants+epsilon) # recall
    term_F1 = 2*term_P*term_R / (term_P+term_R+epsilon)

    if term_only:
        return term_F1

    sentiment_Acc, sentiment_F1, absa_F1 = score_sentiment_and_overall(n_predicteds, counters, method)
    return term_F1, sentiment_Acc, sentiment_F1, absa_F1


def score_sentiment_and_overall(n_predicteds: int, counters: dict, method: str='micro'):
    # Precision and Recall per each sentiment polarity
    positive_P = counters['correct']['positive'] / (counters['pred']['positive']+epsilon)
    positive_R = counters['correct']['positive'] / (counters['gold']['positive']+epsilon)

    negative_P = counters['correct']['negative'] / (counters['pred']['negative']+epsilon)
    negative_R = counters['correct']['negative'] / (counters['gold']['negative']+epsilon)

    neutral_P = counters['correct']['neutral'] / (counters['pred']['neutral']+epsilon)
    neutral_R = counters['correct']['neutral'] / (counters['gold']['neutral']+epsilon)

    # Calculate evaluation metrics for Sentiment
    n_corrects_sentiment = counters['correct']['positive'] + counters['correct']['negative'] + counters['correct']['neutral']
    n_corrects_aspect = counters['gold']['positive'] + counters['gold']['negative'] + counters['gold']['neutral']
    n_overall = counters['overall']['positive'] + counters['overall']['negative'] + counters['overall']['neutral']

    sentiment_Acc = n_corrects_sentiment / (n_corrects_aspect+epsilon)
    if method == 'micro':
        sentiment_P = (positive_P+negative_P+neutral_P) / 3.0
        sentiment_R = (positive_R+negative_R+neutral_R) / 3.0
        sentiment_F1 = 2*sentiment_P*sentiment_R / (sentiment_P+sentiment_R+epsilon)
    elif method == 'macro':
        positive_F1 = 2*positive_P*positive_R / (positive_P+positive_R+epsilon)
        negative_F1 = 2*negative_P*negative_R / (negative_P+negative_R+epsilon)
        neutral_F1 = 2*neutral_P*neutral_R / (neutral_P+neutral_R+epsilon)
        sentiment_F1 = (positive_F1+negative_F1+neutral_F1) / 3.0
    else:
        raise ValueError('method must be either micro or macro')

    # Calculate evaluation metrics for ABSA
    absa_P = n_corrects_sentiment / (n_predicteds-counters['pred']['conflict']+epsilon)
    absa_R = n_corrects_sentiment / (n_overall+epsilon)
    absa_F1 = 2*absa_P*absa_R / (absa_P+absa_R+epsilon)    

    return sentiment_Acc, sentiment_F1, absa_F1


def evaluate_absa(aspects_true, aspects_pred,
                  opinions_true, opinions_pred,
                  sentiments_true, sentiments_pred,
                  mask, include_opinion: bool=True, threshold: float=0.5):
    aspects_true, sentiments_true = mask_absa(aspects_true, sentiments_true, mask)
    aspects_pred, sentiments_pred = mask_absa(aspects_pred, sentiments_pred, mask)
    absa_scores = score_absa(aspects_true, aspects_pred, sentiments_true, sentiments_pred)
    # aspect_f1, sentiment_acc, sentiment_f1, absa_f1 = absa_scores

    if include_opinion:
        opinions_true, _ = mask_absa(opinions_true, sentiments_true, mask)
        opinions_pred, _ = mask_absa(opinions_pred, sentiments_pred, mask)
        opinion_f1 = score_absa(opinions_true, opinions_pred, term_only=True)
        absa_scores = [opinion_f1] + list(absa_scores)

    return absa_scores


epsilon = K.epsilon()

term_code = {
    'begin': 1, 
    'inside': 2, 
    'outside': 0, 
}

idx2polarity = {
    0: 'background', 
    1: 'positive', 
    2: 'negative', 
    3: 'neutral', 
    4: 'conflict', 
}

polarity2idx = {
    v: k for k,v in idx2polarity.items()
}


def mask_absa(y_aspect, y_sentiment, mask) -> (list, list):
    # Mask background words and conflict-sentiment words
    #       for not to count in evaluation
    Ys_aspect, Ys_sentiment = [], []
    for seq_aspect, seq_sentiment, seq_mask in zip(y_aspect, y_sentiment, mask):
        labels_aspect, labels_sentiment = [], []
        for l_a, l_s, m in zip(seq_aspect, seq_sentiment, seq_mask):
            if m == 0:
                break
            labels_aspect.append(np.argmax(l_a))
            if not np.any(l_s):
                # all 0s means background or conflict-sentiment word 
                #       -> be not counted for evaluation
                labels_sentiment.append(0)
            else:
                labels_sentiment.append(np.argmax(l_s)+1)

        Ys_aspect.append(labels_aspect)
        Ys_sentiment.append(labels_sentiment)
    return Ys_aspect, Ys_sentiment


def score_absa_single_sample(t_true, t_pred, s_true=[], s_pred=[], counters: dict={}, term_only: bool=False):
    n_relevants, n_corrects, n_predicteds = 0, 0, 0
    n_words = len(t_true)
    for j in range(n_words):
        if t_true[j] == term_code['begin']:
            n_relevants += 1
            if not term_only:
                if s_true[j] != polarity2idx['background']:
                    counters['overall'][idx2polarity[s_true[j]]] += 1
            
            if t_pred[j] == term_code['begin']:
                matching = True
                for k in range(j+1, len(t_true)):
                    if t_true[k] == term_code['inside'] and t_pred[k] == term_code['inside']:
                        continue
                    elif t_true[k] != term_code['inside'] and t_pred[k] != term_code['inside']:
                        break
                    else:
                        matching = False
                        break

                if matching:
                    n_corrects += 1
                    if not term_only:
                        if s_true[j] != polarity2idx['background']: 
                            counters['gold'][idx2polarity[s_true[j]]] += 1
                            counters['pred'][idx2polarity[s_pred[j]]] += 1
                            if s_true[j] == s_pred[j]:
                                counters['correct'][idx2polarity[s_pred[j]]] += 1
                        else:
                            counters['pred']['conflict'] += 1

    for t_p in t_pred:
        if t_p == term_code['begin']:
            n_predicteds += 1
    
    if term_only:
        return n_relevants, n_corrects, n_predicteds
    return [n_relevants, n_corrects, n_predicteds], counters


def score_absa(terms_true, terms_pred,
               sentiments_true: list=[], sentiments_pred: list=[],
               method: str='micro', term_only: bool=False):
    # Define useful variables
    if not term_only:
        # Sentiment Distribution for Aspect / Opinion Terms:
        #          pred_count: predicted results that are correctly extracted
        #          gold_count: gold results that are correctly extracted
        #       correct_count: results that get both span & prediction correctly
        #       overall_count: ground-truth
        counters = {
            'gold': {'positive': 0, 'negative': 0, 'neutral': 0, }, 
            'pred': {'positive': 0, 'negative': 0, 'neutral': 0, 'conflict': 0}, 
            'correct': {'positive': 0, 'negative': 0, 'neutral': 0, }, 
            'overall': {'positive': 0, 'negative': 0, 'neutral': 0, }
        }

    # Do statistics
    n_corrects, n_predicteds, n_relevants = 0, 0, 0
    n_samples = len(terms_true)
    for i in range(n_samples):
        t_true, t_pred = terms_true[i], terms_pred[i]

        if term_only:
            sample_relevants, sample_corrects, sample_predicteds = score_absa_single_sample(t_true, t_pred, term_only=term_only)
        else:
            s_true, s_pred = sentiments_true[i], sentiments_pred[i]
            [sample_relevants, sample_corrects, sample_predicteds], \
                counters = score_absa_single_sample(t_true, t_pred, s_true, s_pred, counters, term_only)

        n_corrects += sample_corrects
        n_relevants += sample_relevants
        n_predicteds += sample_predicteds

    # Calculate evaluation metrics for Term (of Aspect or Opinion)
    term_P = n_corrects / (n_predicteds+epsilon) # precision
    term_R = n_corrects / (n_relevants+epsilon) # recall
    term_F1 = 2*term_P*term_R / (term_P+term_R+epsilon)

    if term_only:
        return term_F1

    sentiment_Acc, sentiment_F1, absa_F1 = score_sentiment_and_overall(n_predicteds, counters, method)
    return term_F1, sentiment_Acc, sentiment_F1, absa_F1


def score_sentiment_and_overall(n_predicteds: int, counters: dict, method: str='micro'):
    # Precision and Recall per each sentiment polarity
    positive_P = counters['correct']['positive'] / (counters['pred']['positive']+epsilon)
    positive_R = counters['correct']['positive'] / (counters['gold']['positive']+epsilon)

    negative_P = counters['correct']['negative'] / (counters['pred']['negative']+epsilon)
    negative_R = counters['correct']['negative'] / (counters['gold']['negative']+epsilon)

    neutral_P = counters['correct']['neutral'] / (counters['pred']['neutral']+epsilon)
    neutral_R = counters['correct']['neutral'] / (counters['gold']['neutral']+epsilon)

    # Calculate evaluation metrics for Sentiment
    n_corrects_sentiment = counters['correct']['positive'] + counters['correct']['negative'] + counters['correct']['neutral']
    n_corrects_aspect = counters['gold']['positive'] + counters['gold']['negative'] + counters['gold']['neutral']
    n_overall = counters['overall']['positive'] + counters['overall']['negative'] + counters['overall']['neutral']

    sentiment_Acc = n_corrects_sentiment / (n_corrects_aspect+epsilon)
    if method == 'micro':
        sentiment_P = (positive_P+negative_P+neutral_P) / 3.0
        sentiment_R = (positive_R+negative_R+neutral_R) / 3.0
        sentiment_F1 = 2*sentiment_P*sentiment_R / (sentiment_P+sentiment_R+epsilon)
    elif method == 'macro':
        positive_F1 = 2*positive_P*positive_R / (positive_P+positive_R+epsilon)
        negative_F1 = 2*negative_P*negative_R / (negative_P+negative_R+epsilon)
        neutral_F1 = 2*neutral_P*neutral_R / (neutral_P+neutral_R+epsilon)
        sentiment_F1 = (positive_F1+negative_F1+neutral_F1) / 3.0
    else:
        raise ValueError('method must be either micro or macro')

    # Calculate evaluation metrics for ABSA
    absa_P = n_corrects_sentiment / (n_predicteds-counters['pred']['conflict']+epsilon)
    absa_R = n_corrects_sentiment / (n_overall+epsilon)
    absa_F1 = 2*absa_P*absa_R / (absa_P+absa_R+epsilon)    

    return sentiment_Acc, sentiment_F1, absa_F1


def evaluate_absa(aspects_true, aspects_pred,
                  opinions_true, opinions_pred,
                  sentiments_true, sentiments_pred,
                  mask, include_opinion: bool=True, threshold: float=0.5):
    aspects_true, sentiments_true = mask_absa(aspects_true, sentiments_true, mask)
    aspects_pred, sentiments_pred = mask_absa(aspects_pred, sentiments_pred, mask)
    absa_scores = score_absa(aspects_true, aspects_pred, sentiments_true, sentiments_pred)
    # aspect_f1, sentiment_acc, sentiment_f1, absa_f1 = absa_scores

    if include_opinion:
        opinions_true, _ = mask_absa(opinions_true, sentiments_true, mask)
        opinions_pred, _ = mask_absa(opinions_pred, sentiments_pred, mask)
        opinion_f1 = score_absa(opinions_true, opinions_pred, term_only=True)
        absa_scores = [opinion_f1] + list(absa_scores)

    return absa_scores

# **optimizers.py**

In [None]:
pip install gradient-centralization-tf

In [None]:
import gctf
import tensorflow as tf
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import Callback
import tensorflow.keras.backend as K

In [None]:
def get_optimizer(optimizer_option: str, use_grad_centralized: bool=True, learning_rate: float=0.001, **kwargs):

    optimizer_option = optimizer_option.lower()
    if optimizer_option == 'adam':
        if use_grad_centralized:
            from gctf.optimizers import adam as Optimizer
        else:
            from tensorflow.keras.optimizers import Adam as Optimizer
    elif optimizer_option == 'nadam':
        if use_grad_centralized:
            from gctf.optimizers import nadam as Optimizer
        else:
            from tensorflow.keras.optimizers import Nadam as Optimizer
    elif optimizer_option == 'adagrad':
        if use_grad_centralized:
            from gctf.optimizers import adagrad as Optimizer
        else:
            from tensorflow.keras.optimizers import Adagrad as Optimizer
    elif optimizer_option == 'adadelta':
        if use_grad_centralized:
            from gctf.optimizers import adadelta as Optimizer
        else:
            from tensorflow.keras.optimizers import Adadelta as Optimizer
    elif optimizer_option == 'rmsprop':
        if use_grad_centralized:
            from gctf.optimizers import rmsprop as Optimizer
        else:
            from tensorflow.keras.optimizers import RMSprop as Optimizer
    else:
        if use_grad_centralized:
            from gctf.optimizers import sgd as Optimizer
        else:
            from tensorflow.keras.optimizers import SGD as Optimizer
    
    return Optimizer(learning_rate=learning_rate, **kwargs)

# **lr_schedulers.py**

In [None]:
import numpy as np
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.callbacks import Callback

In [None]:
def noam_scheme(global_step, init_lr, warmup_steps=16):
    """
    Noam scheme learning rate decay
        init_lr: (scalar) initial learning rate. 
        global_step: (scalar) current training step
        warmup_steps: (scalar) During warmup_steps, learning rate increases until it reaches init_lr.
    """
    step = tf.cast(global_step+1, dtype=tf.float32, name="global_step")
    return init_lr * (warmup_steps**0.5) * tf.minimum(step*(warmup_steps**-1.5), step**-0.5)


class CyclicLR(Callback):
    """
    This callback implements a cyclical learning rate policy (CLR).
    The method cycles the learning rate between two boundaries with some constant frequency, 
        as detailed in this paper (https://arxiv.org/abs/1506.01186).
    The amplitude of the cycle can be scaled on a per-iteration or per-cycle basis.
    
    This class has three built-in policies, as put forth in the paper.
    "triangular":
        A basic triangular cycle w/ no amplitude scaling.
    "halving":
        A basic triangular cycle that scales initial amplitude by half each cycle.
    "exponential":
        A cycle that scales initial amplitude by gamma**(cycle iterations) at each 
        cycle iteration.

    For more detail, please read the paper.
    
    # Example
        ```python
            clr = CyclicLR(base_lr=0.001, max_lr=0.006,
                                step_size=2000., mode='triangular')
            model.fit(X_train, Y_train, callbacks=[clr])
        ```
    
    Class also supports custom scaling functions:
        ```python
            clr_fn = lambda x: 0.5*(1+np.sin(x*np.pi/2.))
            clr = CyclicLR(base_lr=0.001, max_lr=0.006,
                                step_size=2000., scale_fn=clr_fn,
                                scale_mode='cycle')
            model.fit(X_train, Y_train, callbacks=[clr])
        ```    
    # Arguments
        base_lr: initial learning rate which is the
            lower boundary in the cycle.
        max_lr: upper boundary in the cycle. Functionally,
            it defines the cycle amplitude (max_lr - base_lr).
            The lr at any cycle is the sum of base_lr
            and some scaling of the amplitude; therefore 
            max_lr may not actually be reached depending on
            scaling function.
        step_size: number of training iterations per
            half cycle. Authors suggest setting step_size
            2-8 x training iterations in epoch.
        mode: one of {original, halving, exponential}.
            Default 'original'.
            Values correspond to policies detailed above.
            If scale_fn is not None, this argument is ignored.
        gamma: constant in 'exp_range' scaling function:
            gamma**(cycle iterations)
        scale_fn: Custom scaling policy defined by a single
            argument lambda function, where 
            0 <= scale_fn(x) <= 1 for all x >= 0.
            mode paramater is ignored 
        scale_mode: {'cycle', 'iterations'}.
            Defines whether scale_fn is evaluated on 
            cycle number or cycle iterations (training
            iterations since start of cycle). Default is 'cycle'.
    """
    def __init__(self, base_lr=0.001, max_lr=0.1, step_size=2000., mode='original',
                 gamma=1., scale_fn=None, scale_mode='cycle'):
        super(CyclicLR, self).__init__()

        self.base_lr = base_lr
        self.max_lr = max_lr
        self.step_size = step_size
        self.mode = mode
        self.gamma = gamma
        if scale_fn == None:
            if self.mode == 'halving':
                self.scale_fn = lambda x: 1/(2.**(x-1))
                self.scale_mode = 'cycle'
            elif self.mode == 'exponential':
                self.scale_fn = lambda x: gamma**(x)
                self.scale_mode = 'iterations'
            else:
                self.scale_fn = lambda x: 1.
                self.scale_mode = 'cycle'
        else:
            self.scale_fn = scale_fn
            self.scale_mode = scale_mode
        self.clr_iterations = 0.
        self.trn_iterations = 0.
        self.history = {}

        self._reset()

    def _reset(self, new_base_lr=None, new_max_lr=None, new_step_size=None):
        """
        Resets cycle iterations.
            Optional boundary/step size adjustment.
        """
        if new_base_lr is not None:
            self.base_lr = new_base_lr
        if new_max_lr is not None:
            self.max_lr = new_max_lr
        if new_step_size is not None:
            self.step_size = new_step_size
        self.clr_iterations = 0.
        
    def clr(self):
        cycle = np.floor(1+self.clr_iterations/(2*self.step_size))
        x = np.abs(self.clr_iterations/self.step_size - 2*cycle + 1)
        if self.scale_mode == 'cycle':
            return self.base_lr + (self.max_lr-self.base_lr)*np.maximum(0, (1-x))*self.scale_fn(cycle)
        else:
            return self.base_lr + (self.max_lr-self.base_lr)*np.maximum(0, (1-x))*self.scale_fn(self.clr_iterations)
        
    def on_train_begin(self, logs={}):
        logs = logs or {}
        if self.clr_iterations == 0:
            K.set_value(self.model.optimizer.lr, self.base_lr)
        else:
            K.set_value(self.model.optimizer.lr, self.clr())        
            
    def on_batch_end(self, epoch, logs=None):        
        logs = logs or {}
        self.trn_iterations += 1
        self.clr_iterations += 1

        self.history.setdefault('lr', []).append(K.get_value(self.model.optimizer.lr))
        self.history.setdefault('iterations', []).append(self.trn_iterations)

        for k, v in logs.items():
            self.history.setdefault(k, []).append(v)
        
        new_lr = self.clr()
        K.set_value(self.model.optimizer.lr, new_lr)

# **data_generators.py**

In [None]:
import os
from glob import glob

import numpy as np
import sklearn as skl
import tensorflow as tf

from tensorflow.keras import backend as K
from tensorflow.keras.utils import Sequence

In [None]:
class DataGenerator(Sequence):

    def __init__(self, data_root, opt, validate: bool=False):
        
        self.opt = opt
        self.data_root = data_root

        # list of files containing both word-embeddings and multi-labels
        if isinstance(self.data_root, str):
            self.files = glob(os.path.join(self.data_root, 'sample_*.npz'))
        elif isinstance(self.data_root, (list, tuple)):
            self.files = []
            for data_dir in self.data_root:
                self.files += glob(os.path.join(data_dir, 'sample_*.npz'))
                
        self.indices = np.array(list(range(len(self.files))))
        if not validate:
            self.batch_size = opt.batch_size
            self.shuffle = True
        else:
            self.batch_size = len(self.indices)
            self.shuffle = False

        self.on_epoch_end()

    def __len__(self):
        """ Denotes the number of batches per epoch """
        n_samples = len(self.files)
        return n_samples//self.batch_size + (0 if n_samples%self.batch_size==0 else 1)

    def __getitem__(self, index):
        """ Generate single batch of data """

        start_index = self.batch_size * index
        end_index = self.batch_size * (index+1)
        indices = self.indices[start_index:end_index]

        # Generate data
        emb_batch = []
        em_batch, sm_batch, pa_batch = [], [], []
        ay_batch, oy_batch, sy_batch = [], [], []

        for idx in indices:
            sample_file = self.files[idx]
            sample_data = np.load(sample_file)
            
            # Load embeddings
            emb_pad = np.zeros((self.opt.max_sentence_len, self.opt.embedding_dim))
            emb = sample_data['sent_emb']
            emb_pad[:emb.shape[0],:] = emb
            emb_batch += [emb_pad]

            # Load masks
            em_batch += [sample_data['sent_mask']]
            sm_batch += [sample_data['s_mask']]
            pa_batch += [sample_data['pos_att']]
            
            # Load labels
            ay = sample_data['a_y'].astype(float)
            oy = sample_data['o_y'].astype(float)
            sy = sample_data['s_y'].astype(float)
            ay_batch += [self.smooth_labels(ay) if self.opt.label_smoothing else ay]
            oy_batch += [self.smooth_labels(oy) if self.opt.label_smoothing else oy]
            sy_batch += [self.smooth_labels(sy) if self.opt.label_smoothing else sy]
        
        data_batch = [emb_batch, em_batch, sm_batch, pa_batch, ay_batch, oy_batch, sy_batch]
        data_batch = [np.array(d) for d in data_batch]
        return data_batch[:4], data_batch[4:]

    def smooth_labels(self, labels, factor=0.1):
        # smooth the labels
        labels *= (1 - factor)
        labels += (factor / labels.shape[-1])
        return labels

    def on_epoch_end(self):
        """ Update indices after each epoch """
        if self.shuffle:
            self.indices = skl.utils.shuffle(self.indices)

# **RACL.py**

In [None]:
import os
import sys
import ntpath
import time
import pickle
import logging
from tqdm import tqdm as print_progress

import math
import numpy as np
import tensorflow as tf
if not tf.executing_eagerly():
    tf.enable_eager_execution()

from tensorflow.keras import backend as K
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import (
    Input, InputLayer, Embedding, LSTM, Bidirectional,
    Conv1D, Conv2D, Dropout, Dense, 
    Dot, Concatenate, Average, Add, Multiply,
    Lambda, Reshape, 
    Softmax, Maximum, Minimum,
)
from tensorflow.keras.activations import softmax, sigmoid
from tensorflow.keras.initializers import Identity, GlorotNormal, GlorotUniform
from tensorflow.keras.optimizers import Adam, Nadam, Adagrad, Adadelta, RMSprop, SGD
from tensorflow.keras.callbacks import ReduceLROnPlateau, TensorBoard, LearningRateScheduler, EarlyStopping, ModelCheckpoint, Callback
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.utils import plot_model

In [None]:
class ABSA_Evaluation(Callback):

    def __init__(self, validation_data, logger, opt, include_opinion: bool=True, threshold: float=0.2, name='ABSA_scores', **kwargs):
        super().__init__(**kwargs) # handle base args (e.g. dtype)
        self._name = name
        self.Xs, self.Ys_true = validation_data
        self.opt = opt
        self.logger = logger
        self.include_opinion = include_opinion
        self.threshold = threshold
        self.records = {
            'opinion_f1': [], 'OE_loss': [], 'Reg_cost': [],
            'aspect_f1': [], 'AE_loss': [], 
            'sentiment_acc': [], 'sentiment_f1': [], 'SC_loss': [],
            'ABSA_f1': [], 'total_loss': [],
        }

    def on_train_begin(self, logs={}):
        ...
        
    def on_epoch_end(self, epoch, logs={}):
        start = time.time()

        # Forward pass
        *Ys_pred, word_mask, sentiment_mask = self.model(self.Xs, training=False)

        # Compute losses
        losses = RACL_losses(self.Ys_true, Ys_pred, [word_mask, sentiment_mask], self.opt)

        # Evaluate
        scores = evaluate_absa(self.Ys_true[0], Ys_pred[0],
                               self.Ys_true[1], Ys_pred[1],
                               self.Ys_true[2], Ys_pred[2],
                               word_mask, self.include_opinion)
        end = time.time()

        metrics = {
            'opinion_f1': scores[0], 'OE_loss': losses[1],
            'aspect_f1': scores[1], 'AE_loss': losses[2], 'Reg_cost': losses[4],
            'sentiment_acc': scores[2], 'sentiment_f1': scores[3], 'SC_loss': losses[3],
            'ABSA_f1': scores[4], 'total_loss': losses[0],
        }

        self.max_score_index, self.min_loss_index = self.update_metrics(metrics)
        display_text = f'Epoch {epoch+1:03d} - Evaluation in {int(end-start)} seconds\n' + \
            f'\tOE_loss={losses[1]:.3f}, AE_loss={losses[2]:.3f}, SC_loss={losses[3]:.3f}, Reg_cost={losses[4]:.3f}, total_loss={losses[0]:.3f}' + \
            f'\n\topinion_f1={scores[0]:.7f}, aspect_f1={scores[1]:.7f}, sentiment_acc={scores[2]:.7f}, sentiment_f1={scores[3]:.7f}, ABSA_f1={scores[4]:.7f}' + \
            f'\n--> Best score at Epoch {self.max_score_index}' + \
            f'\n--> Best loss at Epoch {self.min_loss_index}'
        self.logger.info(display_text)
        return metrics

    def update_metrics(self, metrics):
        for k, v in metrics.items():
            self.records[k].append(v)
        return np.argmax(self.records['ABSA_f1'])+1, \
                np.argmin(self.records['total_loss'])+1

In [None]:
class RACLModel(Model):

    def set_opt(self, opt):
        self.opt = opt

    def train_step(self, data):
        Xs, Ys_true = data

        with tf.GradientTape() as tape:
            # Forward pass
            *Ys_pred, word_mask, sentiment_mask = self(Xs, training=True)  

            # Compute the loss value. Loss function is configured in `compile()`.
            losses = RACL_losses(Ys_true, Ys_pred, [word_mask, sentiment_mask], self.opt)

        # Backward progagation - Compute gradients & Update weights
        trainable_vars = self.trainable_variables
        gradients = tape.gradient(losses[0], trainable_vars)
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
        return {
            'OE_loss': losses[1], 'AE_loss': losses[2], 'SC_loss': losses[3], 
            'Reg_cost': losses[4], 'loss': losses[0], 'lr': self.optimizer.learning_rate,
        }

    def test_step(self, data):
        # Unpack the data
        Xs, Ys_true = data

        # Compute predictions
        *Ys_pred, word_mask, sentiment_mask = self(Xs, training=False)

        # Compute the loss value
        losses = RACL_losses(Ys_true, Ys_pred, [word_mask, sentiment_mask], self.opt)
        return {
            'OE_loss': losses[1], 'AE_loss': losses[2], 'SC_loss': losses[3], 
            'Reg_cost': losses[4], 'loss': losses[0], 
        }

In [None]:
class RACL(object):

    def __init__(self, opt):
        self.opt = opt
        self.mode = 'train' if self.opt.is_training else 'predict'
        if opt.random_type == 'uniform':
            self.initializer = GlorotUniform(seed=self.opt.random_seed)
        else:
            self.initializer = GlorotNormal(seed=self.opt.random_seed)

        if self.opt.is_training:
            # Build logger
            log_dir = os.path.join(opt.logs_path, self.opt.task)
            if not os.path.isdir(log_dir):
                os.makedirs(log_dir)
            filename = os.path.join(log_dir, f'{time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())}.txt')
            self.logger = logging.getLogger(filename)
            self.logger.setLevel(logging.DEBUG)
            # self.logger.propagate = False
            self.logger.addHandler(logging.StreamHandler())
            self.logger.addHandler(logging.FileHandler(filename, 'a'))

            # Log hyper-parameters
            info = ''
            for arg in vars(self.opt):
                info += ('>>> {0}: {1}\n'.format(arg, getattr(opt, arg)))
            self.logger.info('{:-^80}\n{}\n'.format('Parameters', info))

        # Build checkpoint & tensorboard
        if opt.is_training:
            self.ckpt_dir = opt.ckpt_path
            self.board_dir = os.path.join(opt.output_path, "tensorboard")
            self.viz_dir = os.path.join(opt.output_path, "visualization")
            for dir_ in [self.ckpt_dir, self.board_dir, self.viz_dir]:
                if not os.path.isdir(dir_):
                    os.makedirs(dir_)

        # Build model
        inputs, embeddings, position_att, word_mask, sentiment_mask = self.build_input_block()
        predictions = list(self.build_RACL_block(embeddings, position_att, word_mask))
        
        if self.opt.is_training:
            model_inputs = [inputs, word_mask, sentiment_mask, position_att]
            model_outputs = predictions + [word_mask, sentiment_mask]
            self.model = RACLModel(inputs=model_inputs, outputs=model_outputs, name='RACL')
    
            model_summary = log_summary(self.model)
            self.logger.info(model_summary)
            self.visualize_architecture()
        else:
            predictions_as_prob = self.build_output_block(predictions)
            self.model = RACLModel(inputs=[inputs, word_mask, position_att], 
                                   outputs=predictions_as_prob, name='RACL')
            # self.load_weights(opt.ckpt_path)
            self.model.summary()

    def visualize_architecture(self):
        plot_model(self.model, to_file=f'{self.opt.model}_{self.mode}.png', dpi=128, show_shapes=True, show_layer_names=True)

    def build_input_block(self):
        inputs, embeddings = dropoutize_embeddings(self.opt)
        inputs._name = 'embeddings_concat'

        # Inputs for Masking
        position_att = Input(shape=(self.opt.max_sentence_len, self.opt.max_sentence_len), name='position_att')
        word_mask = Input(shape=(self.opt.max_sentence_len,), name='word_mask')
        sentiment_mask = Input(shape=(self.opt.max_sentence_len,), name='sentiment_mask')
        return inputs, embeddings, position_att, word_mask, sentiment_mask
        
    def build_RACL_block(self, embeddings, position_att, word_mask):
        # Preprocessing
        inputs = Dropout(rate=1-self.opt.keep_prob_1, name='inputs_dropout')(embeddings)

        # Shared Features
        conv_args = {'kernel_size': 1, 'strides': 1, 'padding': 'same', 'activation': 'relu', }
        Feature_Extractor = Conv1D(filters=self.opt.embedding_dim, name='shared_features', **conv_args)
        shared_features = Feature_Extractor(inputs)
        shared_features = Dropout(rate=1-self.opt.keep_prob_1, name='shared_features_dropout')(shared_features)

        # Define repeatable layers in RACL interactions
        DropBlock_aspect = DropBlock2D(keep_prob=self.opt.keep_prob_2, block_size=3, name='DropBlock2D_aspect')
        DropBlock_opinion = DropBlock2D(keep_prob=self.opt.keep_prob_2, block_size=3, name='DropBlock2D_opinion')
        DropBlock_context = DropBlock2D(keep_prob=self.opt.keep_prob_2, block_size=3, name='DropBlock2D_context')

        L2Normalize = L2Norm()
        Tile = Lambda(lambda x: tf.tile(tf.expand_dims(x, axis=1), [1, self.opt.max_sentence_len, 1]), name='Tiler-in-RACL')

        # We found that the SC task is more difficult than the AE and OE tasks.
        # Hence, we augment it with a memory-like mechanism by updating the aspect query with the retrieved contexts.
        # For more details about the memory network, refer to 
        #       https://www.aclweb.org/anthology/D16-1021/ .
        aspect_inputs, opinion_inputs, context_inputs = [shared_features], [shared_features], [shared_features]
        aspect_preds, opinion_preds, sentiment_preds = [], [], []
        context_queries = [shared_features]
        
        conv_args['kernel_size'] = self.opt.kernel_size
        dense_args = {'units': self.opt.n_classes, 'kernel_initializer': self.initializer}

        for interact_i in range(self.opt.n_interactions):            
            racl_block = RACL_Block(self.opt, L2Normalize, [DropBlock_aspect, DropBlock_opinion, DropBlock_context], Tile,
                                    conv_args, dense_args, block_id=interact_i)
            output_preds, output_interacts = racl_block([aspect_inputs[-1], opinion_inputs[-1], context_inputs[-1], context_queries[-1], word_mask, position_att])
            aspect_pred, opinion_pred, sentiment_pred = output_preds
            aspect_interact, opinion_interact, context_interact, context_conv = output_interacts

            # Stacking
            aspect_preds.append(ExpandDim(axis=-1, name=f'aspect_pred-{interact_i}')(aspect_pred))
            opinion_preds.append(ExpandDim(axis=-1, name=f'opinion_pred-{interact_i}')(opinion_pred))
            sentiment_preds.append(ExpandDim(axis=-1, name=f'sentiment_pred-{interact_i}')(sentiment_pred))

            aspect_inputs.append(aspect_interact)
            opinion_inputs.append(opinion_interact)
            context_inputs.append(context_conv)
            context_queries.append(context_interact) # update query

        # Multi-layer Short-cut
        aspect_preds = Concatenate(axis=-1, name='aspect_preds')(aspect_preds)
        opinion_preds = Concatenate(axis=-1, name='opinion_preds')(opinion_preds)
        sentiment_preds = Concatenate(axis=-1, name='sentiment_preds')(sentiment_preds)
        aspect_pred = ReduceDim('mean', axis=-1, name='AE_pred')(aspect_preds)
        opinion_pred = ReduceDim('mean', axis=-1, name='OE_pred')(opinion_preds)
        sentiment_pred = ReduceDim('mean', axis=-1, name='SC_pred')(sentiment_preds)
        return aspect_pred, opinion_pred, sentiment_pred

    def build_output_block(self, preds):
        aspect_pred, opinion_pred, sentiment_pred = preds

        # Scale probability
        aspect_prob = Softmax(axis=-1, name='aspect_prob')(aspect_pred)
        opinion_prob = Softmax(axis=-1, name='opinion_prob')(opinion_pred)
        sentiment_prob = Softmax(axis=-1, name='sentiment_prob')(sentiment_pred)
        return aspect_prob, opinion_prob, sentiment_prob
    
    def train(self):

        # Load generators
        train_set = DataGenerator(self.opt.train_path, self.opt, validate=False)
        val_set = DataGenerator(self.opt.val_path, self.opt, validate=True)
        test_set = DataGenerator(self.opt.test_path, self.opt, validate=True)
        n_trains, n_vals, n_tests = len(train_set), len(val_set), len(test_set)

        ################################
        #     Training Procedure       #
        ################################
        Evaluator = ABSA_Evaluation(val_set[0], self.logger, opt=self.opt, include_opinion=self.opt.include_opinion)
        train_arguments = {
            'x': train_set,
            'steps_per_epoch': n_trains,
            'validation_data': val_set,
            'validation_steps': n_vals,
            'verbose': 1,
            'callbacks': [
                # ReduceLROnPlateau(monitor='val_loss', factor=0.69, patience=5, min_lr=1e-7, verbose=1),
                CyclicLR(mode='exponential', base_lr=self.opt.lr//169, max_lr=self.opt.lr, step_size=n_trains*2),
                # TensorBoard(self.board_dir),
                ModelCheckpoint(os.path.join(self.opt.ckpt_path, 'RACL-epoch={epoch:03d}.h5'), save_weights_only=True, monitor='loss', verbose=1),
                Evaluator, 
                # EarlyStopping(monitor="val_loss", patience=11, restore_best_weights=True, verbose=1)
            ]
        }
        self.model.set_opt(self.opt)
        self.model.compile(optimizer=get_optimizer(self.opt.optimizer, learning_rate=self.opt.lr))

        phases = ['all', 'opinion', 'aspect', 'sentiment', 'all']
        epochs = [p*self.opt.n_epochs for p in range(len(phases)+1)]
        histories = []

        for l in range(self.opt.n_loops):
            self.logger.info(f"\n\tLoop {l+1:03d} / {self.opt.n_loops:03d}")
            for p_i, phase in enumerate(phases):
                self.logger.info(f"\n\t\tPhase {p_i+1}: Training {phase.upper()} layers ...")
                history = self.train_per_phase(initial_epoch=l*self.opt.n_epochs*len(phases)+epochs[p_i], 
                                                      epochs=l*self.opt.n_epochs*len(phases)+epochs[p_i+1], 
                                             train_arguments=train_arguments, 
                                                       phase=phase)
                histories.append(history)
                
                # Update weights for losses
                self.logger.info(f"\n\t\tPhase {p_i+1}: Updating loss weights ...")
                if p_i >= len(phases)-1:
                    f1_o, f1_a, _, f1_s, _ = self.evaluate(test_set=val_set)
                    scores = np.array([f1_a, f1_o, f1_s], dtype=float)
                    weights = 1 / (scores+K.epsilon())
                    weights /= np.min(weights)
                    weights = np.clip(weights, 1., 16.9)
                else:
                    next_phase = phases[p_i+1]
                    if next_phase == 'aspect':
                        weights = [1.69, 1.00, 1.00]
                    elif next_phase == 'opinion':
                        weights = [1.00, 1.69, 1.00]
                    elif next_phase == 'sentiment':
                        weights = [1.00, 1.00, 1.69]

                self.opt.aspect_weight = weights[0]
                self.opt.opinion_weight = weights[1]
                self.opt.sentiment_weight = weights[2]
                self.logger.info(f"\n\t\t\t aspect_weight = {weights[0]} \n\t\t\t opinion_weight = {weights[1]} \n\t\t\t sentiment_weight = {weights[2]}")

            # Save best weights per phase
            ckpt_ids_to_keep = [Evaluator.min_loss_index, Evaluator.max_score_index]
            for ckpt_id, ckpt_type in zip(ckpt_ids_to_keep, ['loss', 'score']):
                model_ckpt = os.path.join(self.ckpt_dir, f'RACL-epoch={ckpt_id:03d}.h5')
                self.model.load_weights(model_ckpt)
                self.model.save_weights(os.path.join(self.ckpt_dir, f'RACL-best-{ckpt_type}-loop={l+1}.h5'))

            # Clean epoch weights
            ckpt_ids_to_keep = [Evaluator.min_loss_index, Evaluator.max_score_index]
            for ckpt_file in glob(os.path.join(self.ckpt_dir, 'RACL-epoch=*.h5')):
                ckpt_id = int(ntpath.basename(ckpt_file)[11:14])
                if ckpt_id in ckpt_ids_to_keep:
                    continue
                if os.path.isfile(ckpt_file):
                    os.remove(ckpt_file)
                        
        # Visualization
        try:
            history_fig = plot_history(histories)
            history_fig.savefig(os.path.join(self.viz_dir, 'training_history.png'))

            for t_i, train_history in enumerate(histories):
                with open(f'train_history_{t_i}.hst', 'wb') as f_writer:
                    pickle.dump(train_history.history, f_writer)
        except Exception:
            pass
    
        # Testing Process
        self.logger.info('\n\tTesting')
        for ckpt_file in sorted(glob(os.path.join(self.ckpt_dir, 'RACL-best-*.h5'))):
            scores = self.evaluate(model_ckpt=ckpt_file, test_set=test_set)
            self.logger.info(f'\n\tPrediction by {ntpath.basename(ckpt_file)}')
            self.logger.info(f'\t\topinion_f1={scores[0]:.7f}\n\t\taspect_f1={scores[1]:.7f}\n\t\tsentiment_acc={scores[2]:.7f}\n\t\tsentiment_f1={scores[3]:.7f}\n\t\tABSA_f1={scores[4]:.7f}')

    def train_aspect(self, initial_epoch: int, epochs: int, train_arguments: dict):
        for layer in self.model.layers:
            if 'aspect' in layer.name.lower():
                layer.trainable = True
                self.logger.info(f"\t\t\t{layer.name}")
            else:
                layer.trainable = False
        history = self.model.fit(initial_epoch=initial_epoch, epochs=epochs, **train_arguments)
        return history

    def train_opinion(self, initial_epoch: int, epochs: int, train_arguments: dict):
        for layer in self.model.layers:
            if 'opinion' in layer.name.lower():
                layer.trainable = True 
                self.logger.info(f"\t\t\t{layer.name}")
            else:
                layer.trainable = False
        history = self.model.fit(initial_epoch=initial_epoch, epochs=epochs, **train_arguments)
        return history

    def train_sentiment(self, initial_epoch: int, epochs: int, train_arguments: dict):
        for layer in self.model.layers:
            if any(ss in layer.name.lower() for ss in ['sentiment', 'context']):
                layer.trainable = True
                self.logger.info(f"\t\t\t{layer.name}")
            else:
                layer.trainable = False
        history = self.model.fit(initial_epoch=initial_epoch, epochs=epochs, **train_arguments)
        return history

    def train_all(self, initial_epoch: int, epochs: int, train_arguments: dict):
        for layer in self.model.layers:
            layer.trainable = True
        history = self.model.fit(initial_epoch=initial_epoch, epochs=epochs, **train_arguments)
        return history

    def train_per_phase(self, initial_epoch: int, epochs: int, train_arguments: dict, phase: str='all'):

        phase = phase.lower()
        if phase == 'embedding':
            history = self.train_embedding(initial_epoch, epochs, train_arguments)
        elif phase == 'aspect':
            history = self.train_aspect(initial_epoch, epochs, train_arguments)
        elif phase == 'opinion':
            history = self.train_opinion(initial_epoch, epochs, train_arguments)
        elif phase == 'sentiment':
            history = self.train_sentiment(initial_epoch, epochs, train_arguments)
        else:
            history = self.train_all(initial_epoch, epochs, train_arguments)
        return history

    def evaluate(self, model_ckpt='', test_set: DataGenerator=None):
        # Load generator
        if not isinstance(test_set, DataGenerator):
            test_set = DataGenerator(self.opt.test_path, self.opt, validate=True)

        # Load weights
        if model_ckpt != '' and os.path.isfile(model_ckpt):
            self.model.load_weights(model_ckpt)

        # Evaluate
        Xs, Ys_true = test_set[0]
        *Ys_pred, word_mask, _ = self.model.predict(Xs)
        scores = evaluate_absa(Ys_true[0], Ys_pred[0],
                               Ys_true[1], Ys_pred[1],
                               Ys_true[2], Ys_pred[2],
                               word_mask, include_opinion=self.opt.include_opinion)
        return scores

    def predict(self, sentence, word_mask, position_att):
        ae_pred, oe_pred, sc_pred = self.model.predict([sentence, word_mask, position_att])
        return ae_pred, oe_pred, sc_pred

    def load_weights(self, weights_path):
        if not os.path.isfile(weights_path):
            raise FileNotFoundError(f"weights_path:\n\t{weights_path}\ndoesn't exist!")
        try:
            self.model.load_weights(weights_path)
        except Exception as e:
            print(e)

# **train.py**

In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
# os.environ['CUDA_VISIBLE_DEVICES'] = '0'

import json
import time
import argparse
from pathlib import Path

import random
import numpy as np
import tensorflow as tf
tf.autograph.set_verbosity(3) 

In [None]:
def load_config(config_path: str):
    """
    load config file (yaml)

        Parameters
        ----------
            config_path: config path to yaml file

        Returns
        -------
        config : dict
    """

    logger.info(f" Loading config from {config_path}...")
    with open(config_path, 'r') as f:
        config = yaml.full_load(f)
    
    return config

In [None]:
def load_basic_arguments(parser):
    # Define arguments
    parser.add_argument('--model', default='racl', type=str, help='model name')
    parser.add_argument('--task', default='hotel', type=str, help='task name')
    parser.add_argument('--n_loops', default=2, type=int, help='number of loops to repeat `n_epochs` for training procedure')
    parser.add_argument('--n_epochs', default=10, type=int, help='number of epochs for training per phase')
    parser.add_argument('--batch_size', default=64, type=int, help='number of samples per batch')
    parser.add_argument('--lr', default=0.00069, type=float, help='learning rate')
    parser.add_argument('--max_sentence_len', default=256, type=int, help='maximum number of words in sentence')
    parser.add_argument('--embedding_dim', default=768, type=int, help='embedding dimension')
    parser.add_argument('--n_interactions', default=6, type=int, help='number of RACL blocks to interact')
    parser.add_argument('--keep_prob_1', default=.97, type=float, help='keep prob for inputs')
    parser.add_argument('--keep_prob_2', default=.97, type=float, help='keep prob for tasks')
    parser.add_argument('--n_filters', default=96, type=int, help='number of filters in convolution')
    parser.add_argument('--kernel_size', default=13, type=int, help='kernel size in convolution')
    parser.add_argument('--optimizer', default='adam', type=str, help='optimizer for model | default: SGD (Stochastic Gradient Descent)')
    parser.add_argument('--random_type', default='normal', type=str, help='random type: uniform or normal (default)')
    parser.add_argument('--random_seed', default=4_10_20, type=int, help='random seed')
    parser.add_argument('--aspect_weight', default=1., type=float, help='weight of aspect loss')
    parser.add_argument('--opinion_weight', default=1., type=float, help='weight of opinion loss')
    parser.add_argument('--sentiment_weight', default=1.69, type=float, help='weight of sentiment loss')
    parser.add_argument('--regularization_weight', default=1e-4, type=float, help='weight of regularization loss')
    parser.add_argument('--label_smoothing', default=True, type=bool, help='label smoothing for regularization')
    parser.add_argument('--load_pretrained', default=False, type=bool, help='whether to load an existing checkpoint')
    parser.add_argument('--include_opinion', default=True, type=bool, help='whether to use opinion for model')
    opt = parser.parse_known_args()[0]

    opt.n_classes = 3
    opt.is_training = True

    opt.term_weights = [.2, .5, .3] # Outside-Beginning-Inside
    opt.polarity_weights = [.3, .3, .4] # Positive-Negative-Neutral

    random.seed(opt.random_seed)
    np.random.seed(opt.random_seed)
    tf.random.set_seed(opt.random_seed)
    return opt

In [None]:
def train(parser, args):
    
    opt = load_basic_arguments(parser)
        
    # Assign pre-defined paths
    opt.logs_path = "./logs"
    opt.ckpt_path = "./checkpoint"
    opt.output_path = "./output"
    opt.evaluate_path = "./evaluate"
    opt.prediction_path = "./predictions"
    opt.train_path = "../input/absa-hotel-distiluse/dataset/train"
    opt.test_path = "../input/absa-hotel-distiluse/dataset/test"
    opt.val_path = "../input/absa-hotel-distiluse/dataset/val"

    for path in [opt.logs_path, opt.output_path, opt.ckpt_path, opt.evaluate_path, opt.prediction_path]:
        if not os.path.isdir(path):
            os.makedirs(path)

    # Train
    start_time = time.time()
    model = RACL(opt)
    # model.visualize_architecture()
    model.load_weights("../input/raclhotel-distiluse-256l/RACL-epoch200.h5")
    model.train()
    end_time = time.time()
    time_running = end_time - start_time
    run_hours = int(time_running//3600)
    run_minutes = int((time_running-run_hours*3600)//60)
    run_seconds = int(time_running - run_hours*3600 - run_minutes*60)
    run_time = f'\n\n\nTraining in {run_hours}h {run_minutes}m {run_seconds}s'
    model.logger.info(run_time)

In [None]:
parser = argparse.ArgumentParser(description='Model Training')
parser.add_argument('-c', '--config-path', default='../input/absa-hotel-distiluse/model_config.yml', type=str, help='Config path')
args = parser.parse_known_args()[0]

train(parser, args)

In [None]:
import shutil

# os.chdir(r'/kaggle/working')
# dir_path = '/kaggle/working/'
# shutil.make_archive(dir_path+"data", 'zip', dir_path)
# shutil.rmtree('/kaggle/working/checkpoint')