# purpose: 
Experimentation with multiple question answering modules on sample reviews which have been tagged


In [57]:
import random
import torch
# USE-
from  transformers  import  AutoTokenizer, AutoModelWithLMHead, pipeline


In [59]:


model_name = "MaRiOrOsSi/t5-base-finetuned-question-answering"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelWithLMHead.from_pretrained(model_name)
question = "does the text speak about reviews?"
context = "42 is the answer to life, the universe and everything"
# context = "I bought this product based on the high rating, but it turned out to be completely different from what I expected. The rating must be misleading."
# context = " I am totally convinced the good reviews are fake. This by far being one of the worst movies I have ever seen. What a horrible story line I was so bored. The cast stands outside for part of this movie, while people disappear, they argue. Then they move into the lodge and the rest of the cast disappears. That's it that's the movie, no explanation, no climax , just a poorly written script . That is how it ends just with nothing that what they give you. I don't even understand the whole point of this story because there was none. I don't think anyone could ever give this a good review.” 
# context = "I am totally convinced the good reviews are fake. This by far being one of the worst movies I have ever seen. What a horrible story line I was so bored"
input = f"question: {question} context: {context}"
encoded_input = tokenizer([input],
                             return_tensors='pt',
                             max_length=512,
                             truncation=True)
output = model.generate(input_ids = encoded_input.input_ids,
                            attention_mask = encoded_input.attention_mask)
output = tokenizer.decode(output[0], skip_special_tokens=True)
print(output)


yes


In [56]:
# question = "Is the text talking about misleading rating?"
question = 'what is spoken about?'
context = "42 is the answer to life, the universe and everything"
# context = "I bought this product based on the high rating, but it turned out to be completely different from what I expected. The rating must be misleading."
# context = 'ratings are bad'
input = f"question: {question} context: {context}"
encoded_input = tokenizer([input],
                             return_tensors='pt',
                             max_length=512,
                             truncation=True)
output = model.generate(input_ids = encoded_input.input_ids,
                            attention_mask = encoded_input.attention_mask)
output = tokenizer.decode(output[0], skip_special_tokens=True)
print(output)

life, the universe, and everything


In [27]:
import torch
from transformers import AutoTokenizer, AutoModelForQuestionAnswering

model_name = "bert-large-uncased-whole-word-masking-finetuned-squad"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForQuestionAnswering.from_pretrained(model_name)

# text = "In recent years, there has been a growing concern about the accuracy of rating systems. Many believe that these ratings are misleading and do not accurately reflect the quality of products or services. This has led to calls for increased regulation of these systems to ensure that consumers are not misled."
text = "I bought this product based on the high rating, but it turned out to be completely different from what I expected. The rating must be misleading."

question = "Is the text talking about ratings?"

encoded_dict = tokenizer.encode_plus(question, text, return_tensors='pt', max_length=512)
input_ids = encoded_dict['input_ids']
attention_mask = encoded_dict['attention_mask']

with torch.no_grad():
    output = model(input_ids, attention_mask)
    answer = tokenizer.decode(output[0].argmax(dim=-1).tolist()[0])

if "Yes" in answer:
    print("The text is talking about misleading ratings.")
else:
    print("The text is not talking about misleading ratings.")


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


The text is not talking about misleading ratings.


In [1]:
# In the above cell, misclassification can be witnessed!

In [31]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch

# load the T5 tokenizer and model
tokenizer = T5Tokenizer.from_pretrained('t5-large')
model = T5ForConditionalGeneration.from_pretrained('t5-large')

# define the input question and context

question = "Is the text talking about ratings?"
text = "I bought this product based on the high rating, but it turned out to be completely different from what I expected. The rating must be misleading."

# context = "France is a country located in Western Europe. Its capital is Paris."

# tokenize the input question and context
input_string = f"answer: {question} context: {context}"
input_ids = tokenizer.encode(input_string, return_tensors='pt')

# generate an answer using the model
outputs = model.generate(input_ids=input_ids)
answer = tokenizer.decode(outputs[0])

# print the generated answer
print(answer)


<pad> not_duplicate</s>


In [36]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch

# load the BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')

# define the topic and sentence to check
topic = ["reviews", 'fake reviews', 'misleading reviews', 'misleading ratings']
# context = "Machine learning is a subfield of artificial intelligence."
context =' Finally, I noticed that some of the five star reviewers are personal friends or relatives of the authors, which makes me wonder about the credibility there.'
# context = "I bought this product based on the high rating, but it turned out to be completely different from what I expected. The rating must be misleading."
# context = 'I consider myself an open minded reader, a person of varying tastes and a patience to learn or understand different approaches to the art of literature.But this book? Oh my days, could you say anymore words and manage to get nowhere with a narrative.90 pages in and I barely understood what was going on, no narrative flow and too much filler. Was this man drunk or high when he wrote this? Were the reviewers also somewhat inebriated? Surely bothNot for me, probably not for you either. As another Amazon review said - Painful.'
# tokenize the sentence and topic using the tokenizer
inputs = tokenizer.encode_plus(context, topic, add_special_tokens=True, return_tensors='pt')

# use the model to generate a prediction for the relevance of the sentence to the topic
outputs = model(**inputs)
prediction = torch.argmax(outputs.logits)

# print the prediction (0 for not relevant, 1 for relevant)
if prediction == 1:
    print("The sentence is relevant to the topic.")
else:
    print("The sentence is not relevant to the topic.")


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

The sentence is not relevant to the topic.


In [39]:
import openai
import re

# Set up the OpenAI API credentials
openai.api_key = "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"

# Define the question and context
# question = "What is the capital of France?"
# context = "France is a country located in Western Europe. Its capital is Paris."

# context = "just read it realms of layers etc i like it only one of a kind"
# context = 'an amazing array of photographs and knowledge the hubble has given us this is an excellent story of some really great achievements '
context = "product is not misleading"
question = "yes/no is there anything spoken about misleading ratings in the text?"

# Use the Davinci engine to generate the answer
response = openai.Completion.create(
    engine="davinci",
    prompt=f"Question: {question}\nContext: {context}\nAnswer:",
    max_tokens=100,
    n=1,
    stop=None,
    temperature=0.7,
)

# Extract the answer from the response
answer = response.choices[0].text.strip()

# Remove any unwanted characters from the answer
answer = re.sub(r'[\n\t]+', '', answer)

# Print the answer
print(answer)




The code above uses the OpenAI API to generate an answer to a question based on a given context. It sets up the OpenAI API credentials and defines the question and context variables.

To generate the answer, it makes a request to the OpenAI API using the openai.Completion.create() method. The prompt includes the question and context, and the response is generated using the Davinci engine. The max_tokens, n, stop, and temperature parameters are used to control the response generation.

The generated answer is extracted from the response and stored in the answer variable. The code then removes any unwanted characters from the answer using regular expressions. Finally, the answer is printed.

In [51]:
# Define the input prompt
input_prompt = "Context: I am totally convinced the good reviews are fake. This by far being one of the worst movies I have ever seen. What a horrible story line I was so bored. The cast stands outside for part of this movie, while people disappear, they argue. Then they move into the lodge and the rest of the cast disappears. That's it that's the movie, no explanation, no climax , just a poorly written script .. Question: Is there a mention of fake reviews?"

# Define the parameters for the API call
parameters = {
  'model': 'davinci',
  'prompt': input_prompt,
  'max_tokens': 100,
  'temperature': 0.6,
  'n': 1,
  'stop': None
}

# Make the API call
response = openai.Completion.create(**parameters)

# Extract the answer from the API response
answer = response.choices[0].text.strip()

In [52]:
answer

'Posted: Jul 25, 2014 - 9:11 PM By: Mr. Marshall (Member)\n\nI think you\'re confusing that movie with another one.\n\n\n\nI\'m not sure. I saw it on Netflix and I thought it was called "The Strangers".\n\n\n\nIt is not the one I am thinking of, because I saw that one awhile back and I didn\'t like it either.\n\n\n\nThe one I saw recently was about a husband and wife'

In [2]:
print("End of Notebook!")

End of Notebook!
