In [None]:
%pip install amrlib
%pip install sacrebleu
%pip install unidecode
%pip install word2number
%unzip /usr/share/nltk_data/corpora/wordnet.zip -d /usr/share/nltk_data/corpora/

In [6]:
import amrlib
import penman
import json
import pandas as pd
import sacrebleu
import os
import time
import re
import copy
import nltk
from nltk.corpus import wordnet
import string
from tqdm import tqdm

### test AMR-text conversion models

In [12]:
stog = amrlib.load_stog_model('/kaggle/input/amr-lda-01/models/parse_xfm_bart_large')
graphs = stog.parse_sents(['He wanted the girl to believe him.', 'If i were you, i would give up'])
for graph in graphs:
    print(graph)

# ::snt He wanted the girl to believe him.
(w / want-01
      :ARG0 (h / he)
      :ARG1 (b / believe-01
            :ARG0 (g / girl)
            :ARG1 h))
# ::snt If i were you, i would give up
(h / have-condition-91
      :ARG1 (g / give-up-07
            :ARG0 (ii / i))
      :ARG2 (y / you
            :domain ii))


In [13]:
gtos = amrlib.load_gtos_model('/kaggle/input/amr-lda-01/models/generate_t5wtense')
sents, _ = gtos.generate(graphs, use_tense=True)
for sent in sents:
    print(sent)

He wanted the girl to believe him.
If I were you, I'd give up.


### AMR-LDA

In [14]:
def bleu(targets, predictions, smooth=1.0):
    """Computes BLEU score.
    
    Args:
    targets: list of strings or list of list of strings if multiple references are present.
    predictions: list of strings
    
    Returns:
    bleu_score across all targets and predictions
    """
    bleu_score = sacrebleu.sentence_bleu(predictions[0], targets,
                                       smooth_method="exp",
                                       smooth_value=smooth,
                                       lowercase=False,
                                       tokenize="intl")
    return {"bleu": bleu_score.score}

In [15]:
def swappable_conditions_contraposition(g):
    for inst in g.instances():
        if (inst.target == 'have-condition-91'  ## this if is working for contraposition law
         and len(g.edges(source=inst.source, role=':ARG1')) == 1
         and len(g.edges(source=inst.source, role=':ARG2')) == 1):
            yield inst.source

def swappable_conditions_contraposition_negative_sample_root(g):
    for inst in g.instances():
        if (len(g.edges(source=inst.source, role=':condition')) == 1):
            yield inst.source

def swappable_conditions_commutative(g):
    for inst in g.instances():
        if (inst.target == 'and'   ## this elif is working for commutative law
         and len(g.edges(source=inst.source, role=':op1')) == 1
         and len(g.edges(source=inst.source, role=':op2')) == 1):
            yield inst.source

def swappable_conditions_implication(g):
    for inst in g.instances():
        if (inst.target == 'or'   ## this elif is working for implication law
         and len(g.edges(source=inst.source, role=':op1')) == 1
         and len(g.edges(source=inst.source, role=':op2')) == 1):
            yield inst.source

def quantifier_target_extractor(g):
    for inst in g.instances():
        if (len(g.edges(source=inst.source, role=':quant')) == 1):
            yield inst.source

    for inst in g.instances():
        if (len(g.edges(source=inst.source, role=':mod')) == 1):
            yield  inst.source

In [None]:
def swappable_conditions_contraposition_2(g):
    graph = penman.encode(g)
    if ":ARG" in graph and ":condition" in graph:
        start = graph.index("(")
        end = graph.index(":condition")
        # split_new_condition = ":condition "+ graph[start:end] +":polarity -)"
        split_new_condition = ":condition " + graph[start:end]
        # split_old_condition = graph[end+len(":condition "):len(graph)-2] + "\n" +":polarity -" + "\n"
        split_old_condition = graph[end + len(":condition "):len(graph) - 2]
        pattern = r':degree \([^)]+\)'      

        if ":polarity -" not in split_new_condition and ":polarity -" not in split_old_condition:
            split_new_condition = split_new_condition + ":polarity -)"
            split_old_condition = split_old_condition + "\n" +":polarity -" + "\n"
        elif ":polarity -" in split_new_condition and ":polarity -" in split_old_condition:
            split_new_condition = split_new_condition.replace(":polarity -","") + ")"
            split_old_condition = split_old_condition.replace(":polarity -","") + "\n"
            split_new_condition = re.sub(pattern, '', split_new_condition)
            split_old_condition = re.sub(pattern, '', split_old_condition)          
        elif ":polarity -" not in split_new_condition and ":polarity -" in split_old_condition:
            split_new_condition = split_new_condition + ":polarity -)"
            split_old_condition = split_old_condition.replace(":polarity -","") + "\n"
            split_old_condition = re.sub(pattern, '', split_old_condition)          
        elif ":polarity -" in split_new_condition and ":polarity -" not in split_old_condition:
            split_new_condition = split_new_condition.replace(":polarity -","") + ")"
            split_new_condition = re.sub(pattern, '', split_new_condition)
            split_old_condition = split_old_condition + "\n" +":polarity -" + "\n"          

        new_contructed_graph = split_old_condition + split_new_condition + ")"
        # decoded_g = penman.decode(new_contructed_graph)
        # return decoded_g
        return new_contructed_graph

In [17]:
def swappable_conditions_contraposition_negative_sample(g):
    swap_condition = list(swappable_conditions_contraposition_negative_sample_root(g))
    if len(swap_condition) > 0:
        z0 = swap_condition[0]
        z1 = g.edges(source=z0, role=":condition")[0].target
        if (z0, ':polarity', '-') not in g.triples and (z1, ':polarity', '-') not in g.triples:
            g.triples.append((z1, ":polarity", '-'))
        elif (z0, ':polarity', '-') in g.triples and (z1, ':polarity', '-') in g.triples:
            g.triples.remove((z1, ":polarity", '-'))
        elif (z0, ':polarity', '-') not in g.triples and (z1, ':polarity', '-') in g.triples:
            g.triples.remove((z1, ":polarity", '-'))
        elif (z0, ':polarity', '-') in g.triples and (z1, ':polarity', '-') not in g.triples:
            g.triples.append((z1, ":polarity", '-'))

        new_graph = penman.encode(g)
        return new_graph

In [18]:
def swappable_conditions_implication_2(g):
    g_temp = copy.deepcopy(g)
    graph = penman.encode(g_temp)
    if ":ARG" in graph and ":condition" in graph:
        swap_condition = list(swappable_conditions_contraposition_negative_sample_root(g_temp))
        if len(swap_condition) > 0:
            z0 = swap_condition[0]
            z1 = g_temp.edges(source=z0, role=":condition")[0].target
            if (z1, ':polarity', '-') not in g_temp.triples:
                g_temp.triples.append((z1, ":polarity", '-'))
            elif (z1, ':polarity', '-') in g_temp.triples:
                g_temp.triples.remove((z1, ":polarity", '-'))
        graph = penman.encode(g_temp)

        start = graph.index("(")
        end = graph.index(":condition")
        # split_new_condition = ":condition "+ graph[start:end] +":polarity -)"
        split_new_condition = ":op2 " + graph[start:end] + ")"
        # split_old_condition = graph[end+len(":condition "):len(graph)-2] + "\n" +":polarity -" + "\n"
        split_old_condition = ":op1 " + graph[end + len(":condition "):len(graph) - 2] + ")"

        new_contructed_graph = "(root / or \n" + split_old_condition + "\n"+split_new_condition + ")"
        # updated_grapg.triples.append(('root', ':op1', z1))
        # decoded_g = penman.decode(new_contructed_graph)
        # return decoded_g
        return new_contructed_graph

In [19]:
def swappable_conditions_implication_2_negative_samples_generation(g):
    graph = penman.encode(g)
    if ":ARG" in graph and ":condition" in graph:
        start = graph.index("(")
        end = graph.index(":condition")
        # split_new_condition = ":condition "+ graph[start:end] +":polarity -)"
        split_new_condition = ":op2 " + graph[start:end] + ")"
        # split_old_condition = graph[end+len(":condition "):len(graph)-2] + "\n" +":polarity -" + "\n"
        split_old_condition = ":op1 " + graph[end + len(":condition "):len(graph) - 2] + ")"

        new_contructed_graph = "(root / or \n" + split_old_condition + "\n"+split_new_condition + ")"
        # updated_grapg.triples.append(('root', ':op1', z1))
        # decoded_g = penman.decode(new_contructed_graph)
        # return decoded_g
        return new_contructed_graph

In [20]:
def contraposition(graphs, sentence_list, logic_word_list):
    return_list = []
    label_list = []
    sentence_and_tag_list = []
    if graphs is None:
        return
    for index, graph in enumerate(graphs):
        if graph is None:
            continue
        g = penman.decode(graph)
        negative_sample_g = copy.deepcopy(g)
        if len(list(swappable_conditions_contraposition(g))) != 0:
            swap_condition = list(swappable_conditions_contraposition(g))
            z0 = swap_condition[0]
            z1 = g.edges(source=z0, role=':ARG1')[0].target
            z5 = g.edges(source=z0, role=':ARG2')[0].target
            g.triples.remove((z0, ':ARG1', z1))  # remove the triples
            g.triples.remove((z0, ':ARG2', z5))
            g.triples.append((z0, ':ARG1', z5))  # add the replacements
            g.triples.append((z0, ':ARG2', z1))
            if (z1, ':polarity', '-') not in g.triples and (z5, ':polarity', '-') not in g.triples:
                g.triples.append((z1, ':polarity', '-'))  # add polarity -
                g.triples.append((z5, ':polarity', '-'))
                negative_sample_g.triples.append((z5, ':polarity', '-'))
            elif (z1, ':polarity', '-') in g.triples and (z5, ':polarity', '-') in g.triples:
                g.triples.remove((z1, ':polarity', '-'))  # add polarity -
                g.triples.remove((z5, ':polarity', '-'))
                negative_sample_g.triples.remove((z5, ':polarity', '-'))
            elif (z1, ':polarity', '-') not in g.triples and (z5, ':polarity', '-') in g.triples:
                g.triples.append((z1, ':polarity', '-'))  # add polarity -
                g.triples.remove((z5, ':polarity', '-'))
                negative_sample_g.triples.remove((z5, ':polarity', '-'))
            elif (z1, ':polarity', '-') in g.triples and (z5, ':polarity', '-') not in g.triples:
                g.triples.remove((z1, ':polarity', '-'))  # add polarity -
                g.triples.append((z5, ':polarity', '-'))
                negative_sample_g.triples.append((z5, ':polarity', '-'))
            new_graph = penman.encode(g)
            return_list.append(new_graph)
            label_list.append(1)
            sentence_and_tag_list.append([sentence_list[index], logic_word_list[index]])

            ## append negative samples
            negative_sample_graph = penman.encode(negative_sample_g)
            return_list.append(negative_sample_graph)
            label_list.append(0)
            sentence_and_tag_list.append([sentence_list[index],logic_word_list[index]])
            # return_list.append(g)
        else:
            return_result = swappable_conditions_contraposition_2(g)
            negative_return_result = swappable_conditions_contraposition_negative_sample(g)
            if return_result is not None:
                return_list.append(return_result)
                label_list.append(1)
                sentence_and_tag_list.append([sentence_list[index], logic_word_list[index]])
            if negative_return_result is not None:
                return_list.append(negative_return_result)
                label_list.append(0)
                sentence_and_tag_list.append([sentence_list[index], logic_word_list[index]])
    return return_list, label_list, sentence_and_tag_list

In [21]:
def commutative(graphs, sentence_list, logic_word_list):
    return_list = []
    label_list = []
    sentence_and_tag_list = []
    if graphs is None:
        return
    for index, graph in enumerate(graphs):
        if graph is None:
            continue
        g = penman.decode(graph)
        negative_sample_g = copy.deepcopy(g)
        if len(list(swappable_conditions_commutative(g))) != 0:
            swap_condition = list(swappable_conditions_commutative(g))
            z0 = swap_condition[0]
            z1 = g.edges(source=z0, role=':op1')[0].target
            z5 = g.edges(source=z0, role=':op2')[0].target
            g.triples.remove((z0, ':op1', z1))  # remove the triples
            g.triples.remove((z0, ':op2', z5))
            g.triples.append((z0, ':op1', z5))  # add the replacements
            g.triples.append((z0, ':op2', z1))
            new_graph = penman.encode(g)
            return_list.append(new_graph)
            label_list.append(1)
            sentence_and_tag_list.append([sentence_list[index], logic_word_list[index]])

            if (z1, ':polarity', '-') not in g.triples and (z5, ':polarity', '-') not in g.triples:
                negative_sample_g.triples.append((z5, ':polarity', '-'))
            elif (z1, ':polarity', '-') in g.triples and (z5, ':polarity', '-') in g.triples:
                negative_sample_g.triples.remove((z5, ':polarity', '-'))
            elif (z1, ':polarity', '-') not in g.triples and (z5, ':polarity', '-') in g.triples:
                negative_sample_g.triples.remove((z5, ':polarity', '-'))
            elif (z1, ':polarity', '-') in g.triples and (z5, ':polarity', '-') not in g.triples:
                negative_sample_g.triples.append((z5, ':polarity', '-'))

            negative_sample_graph = penman.encode(negative_sample_g)
            return_list.append(negative_sample_graph)
            label_list.append(0)
            sentence_and_tag_list.append([sentence_list[index], logic_word_list[index]])

    return return_list, label_list, sentence_and_tag_list

In [22]:
def implication(graphs, sentence_list, logic_word_list):
    return_list = []
    label_list = []
    sentence_and_tag_list = []
    if graphs is None:
        return
    for graph_index, graph in enumerate(graphs):
        if graph is None:
            continue
        g = penman.decode(graph)
        negative_sample_g = copy.deepcopy(g)
        if len(list(swappable_conditions_implication(g))) != 0:
            swap_condition = list(swappable_conditions_implication(g))
            z0 = swap_condition[0]
            for index, item in enumerate(g.triples):
                if item[0] == z0 and item[1] == ":instance":
                    g.triples[index] = item[:2] + ('have-condition-91',)
                    break
            for index, item in enumerate(g.triples):
                if item[0] == z0 and item[1] == ":op1":
                    g.triples[index] = item[:1] + (':ARG1',) + item[2:3]
                    break
            for index, item in enumerate(g.triples):
                if item[0] == z0 and item[1] == ":op2":
                    g.triples[index] = item[:1] + (':ARG2',) + item[2:3]
                    break

            z1 = g.edges(source=z0, role=':ARG1')[0].target
            z5 = g.edges(source=z0, role=':ARG2')[0].target
            g.triples.remove((z0, ':ARG1', z1))
            g.triples.remove((z0, ':ARG2', z5))
            g.triples.append((z0, ':ARG1', z5))
            g.triples.append((z0, ':ARG2', z1))
            if (z1, ':polarity', '-') not in g.triples:
                g.triples.append((z1, ':polarity', '-'))
            elif (z1, ':polarity', '-') in g.triples:
                g.triples.remove((z1, ':polarity', '-'))

            new_graph = penman.encode(g)
            return_list.append(new_graph)
            label_list.append(1)
            sentence_and_tag_list.append([sentence_list[graph_index], logic_word_list[graph_index]])

            swap_condition_negative_sample = list(swappable_conditions_implication(negative_sample_g))
            z0_neg = swap_condition_negative_sample[0]
            z1_neg = negative_sample_g.edges(source=z0_neg, role=':op1')[0].target
            z5_neg = negative_sample_g.edges(source=z0_neg, role=':op2')[0].target
            if (z1_neg, ':polarity', '-') in negative_sample_g.triples:
                negative_sample_g.triples.remove((z1_neg, ':polarity', '-'))
            elif (z5_neg, ':polarity', '-') in negative_sample_g.triples:
                negative_sample_g.triples.remove((z5_neg, ':polarity', '-'))
            else:
                negative_sample_g.triples.append((z5_neg, ':polarity', '-'))
            new_negative_graph = penman.encode(negative_sample_g)
            return_list.append(new_negative_graph)
            label_list.append(0)
            sentence_and_tag_list.append([sentence_list[graph_index], logic_word_list[graph_index]])

        elif len(list(swappable_conditions_contraposition(g))) != 0:
            swap_condition = list(swappable_conditions_contraposition(g))
            z0 = swap_condition[0]
            z1 = g.edges(source=z0, role=':ARG1')[0].target
            z5 = g.edges(source=z0, role=':ARG2')[0].target
            g.triples.remove((z0, ':ARG1', z1))  # remove the triples
            g.triples.remove((z0, ':ARG2', z5))
            g.triples.append((z0, ':ARG1', z5))  # add the replacements
            g.triples.append((z0, ':ARG2', z1))
            if (z5, ':polarity', '-') not in g.triples:
                g.triples.append((z5, ':polarity', '-'))
            elif (z5, ':polarity', '-') in g.triples:
                g.triples.remove((z5, ':polarity', '-'))

            for index, item in enumerate(g.triples):
                if item[0] == z0 and item[1] == ":instance":
                    g.triples[index] = item[:2] + ('or',)
                    break
            for index, item in enumerate(g.triples):
                if item[0] == z0 and item[1] == ":ARG1":
                    g.triples[index] = item[:1] + (':op1',) + item[2:3]
                    break
            for index, item in enumerate(g.triples):
                if item[0] == z0 and item[1] == ":ARG2":
                    g.triples[index] = item[:1] + (':op2',) + item[2:3]
                    break
            new_graph = penman.encode(g)
            return_list.append(new_graph)
            label_list.append(1)
            sentence_and_tag_list.append([sentence_list[graph_index], logic_word_list[graph_index]])

            swap_condition_negative_sample = list(swappable_conditions_contraposition(negative_sample_g))
            z0_neg = swap_condition_negative_sample[0]
            z1_neg = negative_sample_g.edges(source=z0_neg, role=':ARG1')[0].target
            z5_neg = negative_sample_g.edges(source=z0_neg, role=':ARG2')[0].target
            if (z1_neg, ':polarity', '-') in negative_sample_g.triples:
                negative_sample_g.triples.remove((z1_neg, ':polarity', '-'))
            elif (z5_neg, ':polarity', '-') in negative_sample_g.triples:
                negative_sample_g.triples.remove((z5_neg, ':polarity', '-'))
            else:
                negative_sample_g.triples.append((z5_neg, ':polarity', '-'))
            new_negative_graph = penman.encode(negative_sample_g)
            return_list.append(new_negative_graph)
            label_list.append(0)
            sentence_and_tag_list.append([sentence_list[graph_index], logic_word_list[graph_index]])

        else:
            return_result = swappable_conditions_implication_2(g)
            negative_result = swappable_conditions_implication_2_negative_samples_generation(g)
            if return_result is not None:
                return_list.append(return_result)
                label_list.append(1)
                sentence_and_tag_list.append([sentence_list[graph_index], logic_word_list[graph_index]])
            if negative_result is not None:
                return_list.append(negative_result)
                label_list.append(0)
                sentence_and_tag_list.append([sentence_list[graph_index], logic_word_list[graph_index]])
    return return_list, label_list, sentence_and_tag_list

In [25]:
def double_negation(graphs, sentence_list, logic_word_list):
    return_list = []
    negative_list = []
    label_list = []
    sentence_and_tag_list = []
    return_sents = []
    returned_sentence_and_tag_list = []
    if graphs is None:
        return
    for index, graph in enumerate(graphs):
        if graph is None:
            continue
        g = penman.decode(graph)
        updated_g = copy.deepcopy(g)
        negative_g = copy.deepcopy(g)
        if ":polarity -" not in graph:  ## We only consider the case that the sentence does not have negation.
            z0 = updated_g.instances()[0].source
            updated_g.triples.append((z0, ':polarity', '-'))
            temp_graph = penman.encode(updated_g)
            start = temp_graph.index("\n")
            return_list.append(temp_graph[start+1:len(temp_graph)])
            sentence_and_tag_list.append([sentence_list[index], logic_word_list[index]])
            negative_list.append(penman.encode(negative_g))

    if len(return_list) > 0:
        gtos = amrlib.load_gtos_model("/kaggle/input/amr-lda-01/models/generate_t5wtense")
        sents, _ = gtos.generate(return_list)
        punctuation_string = string.punctuation

        for idx, sent in enumerate(sents):
            temp_sent = copy.deepcopy(sent)
            for i in punctuation_string:
                temp_sent = temp_sent.replace(i, '')
            splited_sent = temp_sent.split()
            for stem in splited_sent:
                if len(wordnet.synsets(stem)) > 0:
                    syn = wordnet.synsets(stem)[0]
                    good = wordnet.synset(syn.name())
                    antonym = good.lemmas()[0].antonyms()
                    if len(antonym) > 0:
                        if wordnet.synsets(antonym[0].name())[0].pos() == 'a':
                            sent = sent.replace(stem,antonym[0].name())
                            returned_sentence_and_tag_list.append(sentence_and_tag_list[idx])
                            return_sents.append(sent)
                            label_list.append(1)
                            break

        neg_sents, _ = gtos.generate(negative_list)

        for idx, sent in enumerate(neg_sents):
            temp_sent = copy.deepcopy(sent)
            for i in punctuation_string:
                temp_sent = temp_sent.replace(i, '')
            splited_sent = temp_sent.split()
            for stem in splited_sent:
                if len(wordnet.synsets(stem)) > 0:
                    syn = wordnet.synsets(stem)[0]
                    good = wordnet.synset(syn.name())
                    antonym = good.lemmas()[0].antonyms()
                    if len(antonym) > 0:
                        if wordnet.synsets(antonym[0].name())[0].pos() == 'a':
                            sent = sent.replace(stem, antonym[0].name())
                            returned_sentence_and_tag_list.append(sentence_and_tag_list[idx])
                            return_sents.append(sent)
                            label_list.append(0)
                            break

    return return_sents, label_list, returned_sentence_and_tag_list

In [24]:
sentence_list = []
logic_word_list = []
dataframe_list = []
dataframe_list_single_sentences = []
flag = "Synthetic"

if flag == "Synthetic":
    dataframe_synthetic = pd.read_csv("/kaggle/input/amr-lda-01/output_result/synthetic_sentences.csv")
    dataframe_list.append(dataframe_synthetic)
    dataframe_synthetic_2 = pd.read_csv("/kaggle/input/amr-lda-01/output_result/synthetic_single_no_logic_words_sentences.csv")
    dataframe_list_single_sentences.append(dataframe_synthetic_2)

In [26]:
keywords_list = [flag]
data = []
df = pd.DataFrame(data, columns=['Origin', 'Original_Sentence', 'Generated_Sentence', 'BLEU_Score', 'Label', 'Tag',
                                     'logic_words'])

In [35]:
for idx, item in enumerate(dataframe_list_single_sentences):
    for index, row in item.iterrows():
        sentence_list.append(row['Sentences'])
        logic_word_list.append(row['Logic-words'])

    graphs = stog.parse_sents(sentence_list)
    double_negation_list, double_negation_label_list, double_negation_sentence_and_tag_list = [], [], []
    double_negation_list, double_negation_label_list, double_negation_sentence_and_tag_list = double_negation(graphs, sentence_list, logic_word_list)

    ## To convert graphs to sentences
    if len(double_negation_list) > 0:
        # sents, _ = gtos.generate(double_negation_list)
        for sent_id in tqdm(range(len(double_negation_list))):
            bleu_score = bleu([double_negation_sentence_and_tag_list[sent_id][0]], [double_negation_list[sent_id]])
            df.loc[len(df)] = {'Origin': keywords_list[idx], 'Original_Sentence': double_negation_sentence_and_tag_list[sent_id][0],
                               'Generated_Sentence': double_negation_list[sent_id], 'BLEU_Score': bleu_score['bleu'],
                               'Label': double_negation_label_list[sent_id], 'Tag': "Double negation law",
                               'logic_words': double_negation_sentence_and_tag_list[sent_id][1]}

100%|██████████| 728/728 [00:00<00:00, 731.68it/s]


In [36]:
for idx, item in enumerate(dataframe_list):
    for index, row in item.iterrows():
        sentence_list.append(row['Sentences'])
        logic_word_list.append(row['Logic-words'])

    graphs = stog.parse_sents(sentence_list)

    ## logical equivalence
    ## contraposition law
    # graphs = ["# ::snt If you can use a computer, you have keyboarding skills.\n(z0 / have-condition-91\n      :ARG1 (z1 / have-03\n            :ARG0 (z2 / you)\n            :ARG1 (z3 / skill\n                  :topic (z4 / keyboard-01)))\n      :ARG2 (z5 / possible-01\n            :ARG1 (z6 / use-01\n                  :ARG0 z2\n                  :ARG1 (z7 / computer))))"]

    contraposition_list, contraposition_label_list, contraposition_sentence_and_tag_list = [], [], []
    commutative_list, commutative_label_list, commutative_sentence_and_tag_list = [], [], []
    implication_list, implication_label_list, implication_sentence_and_tag_list = [], [], []
    double_negation_list, double_negation_label_list, double_negation_sentence_and_tag_list = [], [], []

    contraposition_list, contraposition_label_list, contraposition_sentence_and_tag_list = contraposition(graphs, sentence_list, logic_word_list)
    commutative_list, commutative_label_list, commutative_sentence_and_tag_list = commutative(graphs, sentence_list, logic_word_list)
    implication_list, implication_label_list, implication_sentence_and_tag_list = implication(graphs, sentence_list, logic_word_list)


    ## To convert graphs to sentences
    # gtos = amrlib.load_gtos_model("./pretrained_models/model_generate_t5wtense-v0_1_0")
    if len(contraposition_list) > 0:
        sents, _ = gtos.generate(contraposition_list)
        for sent_id in tqdm(range(len(sents))):
            bleu_score = bleu([contraposition_sentence_and_tag_list[sent_id][0]], [sents[sent_id]])
            df.loc[len(df)] = {'Origin':keywords_list[idx],'Original_Sentence': contraposition_sentence_and_tag_list[sent_id][0], 'Generated_Sentence': sents[sent_id], 'BLEU_Score': bleu_score['bleu'], 'Label': contraposition_label_list[sent_id], 'Tag':"Contraposition law", 'logic_words':contraposition_sentence_and_tag_list[sent_id][1]}
    if len(commutative_list) > 0:
        sents, _ = gtos.generate(commutative_list)
        for sent_id in tqdm(range(len(sents))):
            bleu_score = bleu([commutative_sentence_and_tag_list[sent_id][0]], [sents[sent_id]])
            df.loc[len(df)] = {'Origin':keywords_list[idx],'Original_Sentence': commutative_sentence_and_tag_list[sent_id][0], 'Generated_Sentence': sents[sent_id], 'BLEU_Score': bleu_score['bleu'], 'Label': commutative_label_list[sent_id], 'Tag':"Commutative law", 'logic_words':commutative_sentence_and_tag_list[sent_id][1]}
    if len(implication_list) > 0:
        sents, _ = gtos.generate(implication_list)
        for sent_id in tqdm(range(len(sents))):
            bleu_score = bleu([implication_sentence_and_tag_list[sent_id][0]], [sents[sent_id]])
            df.loc[len(df)] = {'Origin':keywords_list[idx],'Original_Sentence': implication_sentence_and_tag_list[sent_id][0], 'Generated_Sentence': sents[sent_id], 'BLEU_Score': bleu_score['bleu'], 'Label': implication_label_list[sent_id], 'Tag':"Implication law", 'logic_words':implication_sentence_and_tag_list[sent_id][1]}

    df.to_csv(keywords_list[idx]+"_xfm_t5wtense_logical_equivalence_list.csv", index = None, encoding = 'utf8')

100%|██████████| 3466/3466 [00:05<00:00, 592.31it/s]
100%|██████████| 3680/3680 [00:05<00:00, 614.41it/s]
100%|██████████| 6922/6922 [00:13<00:00, 507.75it/s]


### train test split

In [37]:
import pandas as pd

dataframe = pd.read_csv("/kaggle/working/Synthetic_xfm_t5wtense_logical_equivalence_list.csv")

df_shuffled = dataframe.sample(frac=1).reset_index(drop=True)

total_lines = df_shuffled.shape[0]

trainset_index = int(total_lines * 0.8)
training_set = df_shuffled.iloc[:trainset_index]
validation_set = df_shuffled.iloc[trainset_index:total_lines]

training_set.columns=['Origin','sentence1','sentence2','BLEU_Score','label','Tag','logic_words']
validation_set.columns=['Origin','sentence1','sentence2','BLEU_Score','label','Tag','logic_words']

training_set = training_set.drop(['Origin','BLEU_Score','Tag','logic_words'], axis=1)
validation_set = validation_set.drop(['Origin','BLEU_Score','Tag','logic_words'], axis=1)

training_set.to_csv("Synthetic_xfm_t5wtense_train.csv",index = None,encoding = 'utf8')
validation_set.to_csv("Synthetic_xfm_t5wtense_validation.csv",index = None,encoding = 'utf8')

### stage-1 finetuning (contrastive learning)

In [None]:
!pip install evaluate

In [6]:
import argparse
import json
import logging
import math
import os
import random
from pathlib import Path

import datasets
import torch
from datasets import load_dataset
from torch.utils.data import DataLoader
from tqdm.auto import tqdm

import evaluate
import transformers
from accelerate import Accelerator
from accelerate.logging import get_logger
from accelerate.utils import set_seed
from huggingface_hub import Repository
from transformers import (
    AutoConfig,
    AutoModelForSequenceClassification,
    AutoTokenizer,  
    DataCollatorWithPadding,
    PretrainedConfig,
    SchedulerType,
    default_data_collator,
    get_scheduler,
)
from transformers.utils import check_min_version, get_full_repo_name, send_example_telemetry
from transformers.utils.versions import require_version

In [7]:
class BERTConfig:
    # dataset
    data_dir = "/kaggle/input/amr-lda-01/output_result"  # Subject for data path: Biology, Law
    train_data_file_name = "Synthetic_xfm_t5wtense_train.csv"
    validate_data_file_name = "Synthetic_xfm_t5wtense_validation.csv"
    # test_data_file_name = "synthetic_logical_equivalence_sentence_pair_testset.csv"

    # pretrained model
    pretrained_model_name = "Transformers/bert-base-cased/"  # pretrained model: BERT, BioBERT, RoBERTa, SBERT

    # save model
    saved_fig_dir = "result/figure/"
    saved_model_dir = "result/saved_models/"  # save model after fine-tune

    # load model
    load_model_sub_dir_name = "epoch_3"  # for load model from a specific sub dir, e.g: epoch_5
    predict_result_dir = "result/predict/"

    # train + predict parameters
    GPU_ID = "1"
    num_labels = 2  # The number of output labels -- 1 for MSE Loss Regression; other for classification.
    batch_size = 16  # for DataLoader (when fine-tuning BERT on a specific task, 16 or 32 is recommended)
    epochs = 4  # Number of training epochs (we recommend between 2 and 4)
    lr = 5e-5  # Optimizer parameters: learning_rate - default is 5e-5, our notebook had 2e-5
    eps = 1e-8  # Optimizer parameters: adam_epsilon  - default is 1e-8.
    seed = 2022  # Set the seed value all over the place to make this reproducible.

    pct_close = 0.1  # predict correct threshold

In [8]:
def parse(self, kwargs):
    '''
    user can update the default hyperparamter
    '''
    for k, v in kwargs.items():
        if not hasattr(self, k):
            raise Exception('config has No key: {}'.format(k))
        setattr(self, k, v)

    print('*************************************************')
    print('user config:')
    for k, v in self.__class__.__dict__.items():
        if not k.startswith('__'):
            print("{} => {}".format(k, getattr(self, k)))

    print('*************************************************')

BERTConfig.parse = parse
opt = BERTConfig()

In [9]:
os.environ["CUDA_VISIBLE_DEVICES"] = opt.GPU_ID

In [12]:
check_min_version("4.22.0.dev0")
logger = get_logger(__name__)

task_to_keys = {
    "cola": ("sentence", None),
    "mnli": ("premise", "hypothesis"),
    "mrpc": ("sentence1", "sentence2"),
    "qnli": ("question", "sentence"),
    "qqp": ("question1", "question2"),
    "rte": ("sentence1", "sentence2"),
    "sst2": ("sentence", None),
    "stsb": ("sentence1", "sentence2"),
    "wnli": ("sentence1", "sentence2"),
}