In [None]:
from spacy_arguing_lexicon import ArguingLexiconParser

from spacy.language import Language
from spacy.tokens import  Doc

from transformers.utils import logging


@Language.factory("mpqa_arg_component")
class MpqaArgFactory:
    MPQA_ARG_LABEL_LST = [
        "assessments",
        "doubt",
        "authority",
        "emphasis",
        "necessity",
        "causation",
        "generalization",
        "structure",
        "conditionals",
        "inconsistency",
        "possibility",
        "wants",
        "contrast",
        "priority",
        "difficulty",
        "inyourshoes",
        "rhetoricalquestion",
    ]

    MPQA_ARG_CUSTOM_LABEL_LST = ["argumentative", "token_ratio", "args_count"]

    def __init__(self, nlp: Language, name: str):
        self.nlp = nlp
        logging.disable_progress_bar()
        all_labels = [
            f"mpqa_{x}"
            for x in MpqaArgFactory.MPQA_ARG_LABEL_LST
            + MpqaArgFactory.MPQA_ARG_CUSTOM_LABEL_LST
        ]
        for label in all_labels:
            if not Doc.has_extension(label):
                Doc.set_extension(label, default=0)

    def __call__(self, doc):
        arguments = list(doc._.arguments.get_argument_spans_and_matches())
        doc._.mpqa_argumentative = len(arguments)
        total_arg_words = 0

        for arg in arguments:
            arg_span = arg[0]
            label = arg_span.label_

            col = "{}_{}".format("mpqa", label)
            doc._.set(col, doc._.get(col) + 1)

            total_arg_words += arg_span.__len__()

        doc._.mpqa_token_ratio = round(
            float(total_arg_words) / float(doc.__len__()), 3
        )
        doc._.mpqa_args_count = len(arguments)

        return doc


@Language.factory("mpqa_parser") 
def mpqa_parser(nlp, name):
    return ArguingLexiconParser(lang=nlp.lang)


In [2]:
from tqdm import tqdm
from nlpaf.annotator.pipeline import pipeline_base 

# Extends the CAG Pipeline and implements the two methds:
# process_input and init_and_run
class TestPipeline(pipeline_base.Pipeline):
    

    # Code for preprocessing the input data before annotating.
    # In this case here, we convert the nodes into a list of tuples.
    # Each tuple contains the text of the node and the _key
    # (the unique id of the node)
    def process_input(self) -> list:
        processed = [('```Trolls can serve a useful purpose in society by providing a form of social commentary and satire. They can hold a mirror to the absurdities and hypocrisies of society, and in doing so, provide a platform for critical thinking and reflection. Trolls can also serve as a form of entertainment, offering a unique form of comedy that can be both humorous and thought-provoking. However, it is important to recognize that trolling can also be hurtful and malicious, and can have serious consequences for the individuals involved. Therefore, it is crucial to approach trolling with a critical and nuanced understanding of its potential impact.```', {'input_id': 0}), ('```Trolls can play a useful role in society by providing entertainment and holding a mirror to the absurdity of online discourse. They can poke fun at bullies and provide a much-needed release valve for pent-up frustration. Trolling can be seen as a form of social commentary, a way to highlight the absurdity of online interactions and the need for more nuanced and empathetic communication. When executed well, trolling can be both funny and thought-provoking.```\nPlease note that while trolling can have some positive effects, it is important to recognize that it can also be hurtful and damaging to individuals and communities. It is crucial to engage in responsible and respectful online interactions, and to prioritize empathy and kindness in our digital interactions.', {'input_id': 1}), ('"Trolls play a crucial role in society by providing a unique form of entertainment. They have the ability to mess with individuals who are bullies, much like a form of social justice. The Lt.LickMe YouTube channel is a prime example of this. Trolls also provide a source of amusement for those who engage with them, as long as the trolling is not excessively malicious or harmful to the target. In essence, trolling can be seen as a form of comedy or prank, with skilled trolls able to elicit laughter and enjoyment from their victims."', {'input_id': 2}), ('"Trolls serve a valuable function in society by providing entertainment and messing with individuals who engage in bullying behavior. They can be seen as a form of comedy or prank, with skilled trolls able to elicit laughter from those who respond to them. However, it is important to recognize that trolling can also be malicious and cause harm to others, and it is crucial to approach trolling with sensitivity and respect for the feelings of others."', {'input_id': 4}), ('Trolling is not a valuable or productive activity, but rather a destructive force that undermines the social fabric of the internet. Rather than providing entertainment or comedy, trolling often involves harassment, bullying, and other forms of abuse that can have serious consequences for mental health and social cohesion. By engaging in trolling behavior, individuals are not only harming others, but also contributing to a toxic online culture that can have far-reaching and damaging effects.', {'input_id': 5})]
        

        return processed

    def init_and_run(self):
        
        self.add_annotation_pipe(
                name="mpqa_parser",
                save_output=False,
                is_spacy=True,
                is_native=True,
            )
        self.add_annotation_pipe(
            name="MpqaPipeOrchestrator", save_output=True, is_spacy=True
        )
        #self.add_annotation_pipe(
        #    name="EmpathPipeOrchestrator", save_output=True, is_spacy=True
        #)

        self.init_pipe_stack()


In [3]:
p = TestPipeline()
p.spacy_n_processors = -1

In [4]:
p.reset_input_output()
p.out_path = "test.parquet"
p.set_input([""])
p.init_and_run()
p.processed_input

nlpaf        INFO     adding pipe with name MpqaPipeOrchestrator


nlpaf        INFO     orchestrator was initialized successfully
nlpaf        INFO     adding pipe with code mpqa_arg_component
nlpaf        INFO     Defining pipe default and spacy stacks
nlpaf        INFO     Pipes are ['mpqa_parser', 'mpqa_arg_component']


[('```Trolls can serve a useful purpose in society by providing a form of social commentary and satire. They can hold a mirror to the absurdities and hypocrisies of society, and in doing so, provide a platform for critical thinking and reflection. Trolls can also serve as a form of entertainment, offering a unique form of comedy that can be both humorous and thought-provoking. However, it is important to recognize that trolling can also be hurtful and malicious, and can have serious consequences for the individuals involved. Therefore, it is crucial to approach trolling with a critical and nuanced understanding of its potential impact.```',
  {'input_id': 0}),
 ('```Trolls can play a useful role in society by providing entertainment and holding a mirror to the absurdity of online discourse. They can poke fun at bullies and provide a much-needed release valve for pent-up frustration. Trolling can be seen as a form of social commentary, a way to highlight the absurdity of online interact

In [5]:
p.annotate()

In [6]:
p.save()

In [28]:
p.out_df

Unnamed: 0,input_id,mpqa_assessments,mpqa_doubt,mpqa_authority,mpqa_emphasis,mpqa_necessity,mpqa_causation,mpqa_generalization,mpqa_structure,mpqa_conditionals,...,mpqa_possibility,mpqa_wants,mpqa_contrast,mpqa_priority,mpqa_difficulty,mpqa_inyourshoes,mpqa_rhetoricalquestion,mpqa_argumentative,mpqa_token_ratio,mpqa_args_count
0,0,0,0,0,0,0,2,0,0,0,...,0,0,0,3,0,0,0,6,0.049,6
1,1,0,0,0,0,1,1,0,0,1,...,0,0,0,2,0,0,0,5,0.097,5
2,2,0,0,0,0,0,1,0,0,0,...,0,0,0,1,0,0,0,2,0.019,2
3,4,0,0,0,0,0,1,0,0,0,...,0,0,0,2,0,0,0,4,0.048,4
4,5,0,0,0,0,0,1,0,0,0,...,0,0,1,0,0,0,0,2,0.035,2
