In [None]:
!pip install allennlp==2.10.1
!pip install allennlp-models==2.10.1

In [None]:
!python -m spacy download en_core_web_sm

In [9]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [1]:
from allennlp_models.pretrained import load_predictor
predictor = load_predictor("structured-prediction-constituency-parser")



In [None]:
test_sentence = "The old woman was sitting under a tree and sipping coffee."
test_sentence = test_sentence.rstrip('?:!.,;')
print (test_sentence)
parser_output = predictor.predict(sentence=test_sentence)
print (parser_output)

In [None]:
tree_string = parser_output["trees"]
print (tree_string)

In [None]:
from nltk import tokenize
from nltk.tree import Tree

tree = Tree.fromstring(tree_string)
print (tree)
print (tree.pretty_print())

In [None]:
# now we are splitting based on rightmost noun or verb phrase
def get_flattened(t):
    sent_str_final = None
    if t is not None:
        sent_str = [" ".join(x.leaves()) for x in list(t)]
        sent_str_final = [" ".join(sent_str)]
        sent_str_final = sent_str_final[0]
    return sent_str_final

def get_right_most_VP_or_NP(parse_tree,last_NP = None,last_VP = None):
    if len(parse_tree.leaves()) == 1:
        return last_NP,last_VP
    last_subtree = parse_tree[-1]
    if last_subtree.label() == "NP":
        last_NP = last_subtree
    elif last_subtree.label() == "VP":
        last_VP = last_subtree

    return get_right_most_VP_or_NP(last_subtree,last_NP,last_VP)


last_nounphrase, last_verbphrase =  get_right_most_VP_or_NP(tree)
last_nounphrase_flattened = get_flattened(last_nounphrase)
last_verbphrase_flattened = get_flattened(last_verbphrase)

In [None]:
#we are getting ending phrase from here
import re
def get_termination_portion(main_string, sub_string):
    combined_sub_string = sub_string.replace(" ", "")
    main_string_list = main_string.split()
    last_index = len(main_string_list)
    for i in range(last_index):
        check_string_list = main_string_list[i:]
        check_string = "".join(check_string_list)
        check_string = check_string.replace(" ", "")
        if check_string == combined_sub_string:
            return " ".join(main_string_list[:i])

    return None

longest_phrase_to_use = max(last_nounphrase_flattened, last_verbphrase_flattened,key = len)
print ("Ending phrase: ", longest_phrase_to_use)

longest_phrase_to_use = re.sub(r"-LRB- ", "(", longest_phrase_to_use)
longest_phrase_to_use = re.sub(r" -RRB-", ")", longest_phrase_to_use)


split_sentence = get_termination_portion(test_sentence, longest_phrase_to_use)
print ("Original sentence : ",test_sentence)
print ("Original sentence after splitting at ending phrase: ",split_sentence)

Now we have to complete the sentence using gpt2 so that false statement is generated

In [None]:
!pip install transformers==4.25.1
!pip install tokenizers==0.9.4
!pip install sentencepiece==0.1.97
!pip install --no-dependencies transformers==2.9.0

In [None]:
!pip install sacremoses==0.0.53

In [None]:
import tensorflow as tf
from transformers import TFGPT2LMHeadModel, GPT2Tokenizer

GPT2tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
GPT2model = TFGPT2LMHeadModel.from_pretrained("gpt2",pad_token_id=GPT2tokenizer.eos_token_id)


In [13]:
partial_sentence = "The old woman was sitting under a tree and"
input_ids = GPT2tokenizer.encode(partial_sentence,return_tensors='tf')
print (input_ids)
maximum_length = len(partial_sentence.split())+40

tf.Tensor([[ 464 1468 2415  373 5586  739  257 5509  290]], shape=(1, 9), dtype=int32)


In [14]:
# Activate top_k sampling and top_p sampling with only from 90% most likely words
sample_outputs = GPT2model.generate(
    input_ids,
    do_sample=True,
    max_length=maximum_length,
    top_p=0.80, # 0.85
    top_k=30,   #30
    repetition_penalty  = 10.0,
    num_return_sequences=10
)

In [None]:
import nltk
nltk.download('punkt')
from nltk import tokenize
generated_sentences=[]

for i, sample_output in enumerate(sample_outputs):
    decoded_sentence = GPT2tokenizer.decode(sample_output, skip_special_tokens=True)
    # final_sentence = decoded_sentence
    final_sentence = tokenize.sent_tokenize(decoded_sentence)[0]
    generated_sentences.append(final_sentence)
    print (i,": ",final_sentence)

In [34]:
def falsesentence(text):
  text=text.rstrip('?:!.,;')
  parser_output = predictor.predict(sentence=text)
  tree_string = parser_output["trees"]
  tree=Tree.fromstring(tree_string)
  last_nounphrase, last_verbphrase =  get_right_most_VP_or_NP(tree)
  last_nounphrase_flattened = get_flattened(last_nounphrase)
  last_verbphrase_flattened = get_flattened(last_verbphrase)
  longest_phrase_to_use = max(last_nounphrase_flattened, last_verbphrase_flattened,key = len)
  longest_phrase_to_use = re.sub(r"-LRB- ", "(", longest_phrase_to_use)
  longest_phrase_to_use = re.sub(r" -RRB-", ")", longest_phrase_to_use)
  split_sentence = get_termination_portion(text, longest_phrase_to_use)
  partial_sentence=split_sentence
  input_ids = GPT2tokenizer.encode(partial_sentence,return_tensors='tf')
  maximum_length = len(partial_sentence.split())+40
  sample_outputs = GPT2model.generate(
    input_ids,
    do_sample=True,
    max_length=maximum_length,
    top_p=0.80, # 0.85
    top_k=30,   #30
    repetition_penalty  = 10.0,
    num_return_sequences=4
    )
  for i, sample_output in enumerate(sample_outputs):
    decoded_sentence = GPT2tokenizer.decode(sample_output, skip_special_tokens=True)
    final_sentence = tokenize.sent_tokenize(decoded_sentence)[0]
    print (i,": ",final_sentence)


In [29]:
text="The old woman was sitting under a tree and sipping coffee."

In [30]:
falsesentence(text)

0 :  The old woman was sitting under a tree and had been playing with her dog, when the man came running out of one window.
1 :  The old woman was sitting under a tree and she began to sob.
2 :  The old woman was sitting under a tree and staring at the sky.
3 :  The old woman was sitting under a tree and the man with him.


In [47]:
text2="In 2011 ,Indian cricket team won the ODI cricket world cup for second time ."

In [48]:
falsesentence(text2)

0 :  In 2011,Indian cricket team's lead in the league came at 16-17 with only one wicket from its opening over.
1 :  In 2011,Indian cricket team's captain Virat Kohli had admitted to a "shameful" incident involving the national teams when he was asked about his alleged involvement in an anti-Hindu rally on March 17.
2 :  In 2011,Indian cricket team won a match with an unbeaten record of 9-4 in its first two matches.
3 :  In 2011,Indian cricket team won the world championship in Pakistan, a match which will be watched on TV every day by over 2 million people.
