In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from transformers import AutoTokenizer
from transformers import TFAutoModelForQuestionAnswering
from datasets import Dataset

## 1. Getting predictions on raw data.

In [2]:
def final_func_1(X):
    """
        Argument-> X as python dictionary with 2 keys as text(context) and sentiment(question) and their values as list 
            of strings.
        Return-> Extracted word/phrase (answer) from the text(context) based on the sentiment(question).
    """
    
    # Pre-Processing raw data for final predictions.
    
    # converting simple python dictionary to huggingface dataset format.
    
    dataset = Dataset.from_dict(X)
    
    MAX_LENGTH = 105
    
    # loading saved roberta-base tokenizer to tokenize the text into input IDs that model can make sense of.
    
    tokenizer = AutoTokenizer.from_pretrained("saved_models/roberta-base/roberta_base_tokenizer",local_files_only =True)
    
    def process_data(examples):
        questions = examples["sentiment"]
        context = examples["text"]
        inputs = tokenizer(
            questions,
            context,
            max_length = MAX_LENGTH,
            padding="max_length",
            return_offsets_mapping = True,   
        )
        # Assigning None values to all offset mapping of tokens which are not the context tokens.
        for i in range(len(inputs["input_ids"])):
            offset = inputs["offset_mapping"][i]
            sequence_ids = inputs.sequence_ids(i)
            inputs["offset_mapping"][i] = [
                o if sequence_ids[k] == 1 else None for k, o in enumerate(offset)
            ]
        return inputs
    
    processed_raw_data = dataset.map(
        process_data,
        batched = True
    )
    
    # converting dataset format into tf dataset.
    tf_raw_dataset = processed_raw_data.to_tf_dataset(
        columns=["input_ids", "attention_mask"],
        shuffle=False,
        batch_size=1,
    )
    
    
   
    # loading saved roberta-base model
    model = TFAutoModelForQuestionAnswering.from_pretrained("saved_models/roberta-base/roberta_base",local_files_only=True)
    
    # final predictions.
    outputs = model.predict(tf_raw_dataset)
    start_logits = outputs.start_logits
    end_logits = outputs.end_logits
    
    # Post Processing.
    # Using start_logits and end_logits to generate the final answer from the given context.
    n_best = 20

    def predict_answers(inputs):
        predicted_answer = []
        for i in range(len(inputs["offset_mapping"])):
            start_logit = inputs["start_logits"][i]
            end_logit = inputs["end_logits"][i]
            context = inputs["text"][i]
            offset = inputs["offset_mapping"][i]
            start_indexes = np.argsort(start_logit)[-1: -n_best - 1:-1].tolist()
            end_indexes = np.argsort(end_logit)[-1: -n_best - 1: -1].tolist()

            flag = False
            for start_index in start_indexes:
                for end_index in end_indexes:
                    # skip answer that are not in the context.
                    if offset[start_index] is None or offset[end_index] is None:
                        continue
                    # skip answer with length that is either < 0
                    if end_index < start_index:
                        continue
                    flag = True
                    answer = context[offset[start_index][0]: offset[end_index][1]]
                    predicted_answer.append(answer)
                    break
                if flag:
                    break
            if not flag:
                predicted_answer.append(answer)
        return {"predicted_answer":predicted_answer}
    
    processed_raw_data.set_format("pandas")
    
    processed_raw_df =  processed_raw_data[:]
    processed_raw_df["start_logits"] = start_logits.tolist()
    processed_raw_df["end_logits"] = end_logits.tolist()
    processed_raw_df["text"] = X["text"]
    
    final_data = Dataset.from_pandas(processed_raw_df)
    final_data = final_data.map(predict_answers,batched=True)
    
    return final_data["predicted_answer"]
    

In [3]:
x = {"text":["Well this is a good joke","This movie is very bad."], "sentiment":["positive","negative"]}
final_func_1(x)

  0%|          | 0/1 [00:00<?, ?ba/s]

All model checkpoint layers were used when initializing TFRobertaForQuestionAnswering.

All the layers of TFRobertaForQuestionAnswering were initialized from the model checkpoint at saved_models/roberta-base/roberta_base.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaForQuestionAnswering for predictions without further training.


  0%|          | 0/1 [00:00<?, ?ba/s]

['good joke', 'bad.']

## 2. Evaluating performance on raw data.

In [4]:
def final_func_2(X,Y):
    
    """
        Argument-> X as python dictionary with 2 keys as text(context) and sentiment(question) and their values as list 
            of strings.
        Argument- Y as list of answers to the questions given in the context.
        
        Return-> jaccard score between predictions and ground truth.
    """
    
    # Pre-Processing raw data for final predictions.
    
    # converting simple python dictionary to huggingface dataset format.
    
    dataset = Dataset.from_dict(X)
    
    MAX_LENGTH = 105
    
    # loading saved roberta-base tokenizer to tokenize the text into input IDs that model can make sense of.
    
    tokenizer = AutoTokenizer.from_pretrained("saved_models/roberta-base/roberta_base_tokenizer",local_files_only =True)
    
    def process_data(examples):
        questions = examples["sentiment"]
        context = examples["text"]
        inputs = tokenizer(
            questions,
            context,
            max_length = MAX_LENGTH,
            padding="max_length",
            return_offsets_mapping = True,   
        )
        # Assigning None values to all offset mapping of tokens which are not the context tokens.
        for i in range(len(inputs["input_ids"])):
            offset = inputs["offset_mapping"][i]
            sequence_ids = inputs.sequence_ids(i)
            inputs["offset_mapping"][i] = [
                o if sequence_ids[k] == 1 else None for k, o in enumerate(offset)
            ]
        return inputs
    
    processed_raw_data = dataset.map(
        process_data,
        batched = True
    )
    
    # converting dataset format into tf dataset.
    tf_raw_dataset = processed_raw_data.to_tf_dataset(
        columns=["input_ids", "attention_mask"],
        shuffle=False,
        batch_size=1,
    )
    
    
   
    # loading saved roberta-base model
    model = TFAutoModelForQuestionAnswering.from_pretrained("saved_models/roberta-base/roberta_base",local_files_only=True)
    
    # final predictions.
    outputs = model.predict(tf_raw_dataset)
    start_logits = outputs.start_logits
    end_logits = outputs.end_logits
    
    # Post Processing.
    # Using start_logits and end_logits to generate the final answer from the given context.
    n_best = 20

    def predict_answers(inputs):
        predicted_answer = []
        for i in range(len(inputs["offset_mapping"])):
            start_logit = inputs["start_logits"][i]
            end_logit = inputs["end_logits"][i]
            context = inputs["text"][i]
            offset = inputs["offset_mapping"][i]
            start_indexes = np.argsort(start_logit)[-1: -n_best - 1:-1].tolist()
            end_indexes = np.argsort(end_logit)[-1: -n_best - 1: -1].tolist()

            flag = False
            for start_index in start_indexes:
                for end_index in end_indexes:
                    # skip answer that are not in the context.
                    if offset[start_index] is None or offset[end_index] is None:
                        continue
                    # skip answer with length that is either < 0
                    if end_index < start_index:
                        continue
                    flag = True
                    answer = context[offset[start_index][0]: offset[end_index][1]]
                    predicted_answer.append(answer)
                    break
                if flag:
                    break
            if not flag:
                predicted_answer.append(context)
        return {"predicted_answer":predicted_answer}
    
    processed_raw_data.set_format("pandas")
    
    processed_raw_df =  processed_raw_data[:]
    processed_raw_df["start_logits"] = start_logits.tolist()
    processed_raw_df["end_logits"] = end_logits.tolist()
    processed_raw_df["text"] = X["text"]
    
    final_data = Dataset.from_pandas(processed_raw_df)
    final_data = final_data.map(predict_answers,batched=True)
    
    
    # defining jaccard score
    def jaccard(str1, str2): 
        a = set(str1.lower().split()) 
        b = set(str2.lower().split())
        if (len(a)==0) & (len(b)==0): return 0.5
        c = a.intersection(b)
        return float(len(c)) / (len(a) + len(b) - len(c))
    
    
    theoritcal_answers = Y
    predicted_val_answers = final_data["predicted_answer"]
    # calculating the jaccard score
    score = 0
    for i in range(len(theoritcal_answers)):
        score += jaccard(theoritcal_answers[i],predicted_val_answers[i])
    
    score /= len(theoritcal_answers)
    return score
    

In [5]:
x = {"text": ['Glad I went out, glad I didn`t leave early, and glad to be afterpartying it up @ Beth`s  I`m back!',
             ' I know! I`m so slow its horrible. DON`T TELL ON ME!'],
    "sentiment":["positive","negative"]}
y = ["glad","horrible."]

In [6]:
print("Jaccard score between predictions and ground truth: ", final_func_2(x,y))

  0%|          | 0/1 [00:00<?, ?ba/s]

All model checkpoint layers were used when initializing TFRobertaForQuestionAnswering.

All the layers of TFRobertaForQuestionAnswering were initialized from the model checkpoint at saved_models/roberta-base/roberta_base.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaForQuestionAnswering for predictions without further training.


  0%|          | 0/1 [00:00<?, ?ba/s]

Jaccard score between predictions and ground truth:  1.0
