In [1]:
import os
import logging
import pandas as pd
import datasets
from dotenv import load_dotenv
from ml_wrappers.model import OpenaiWrapperModel
from transformers import pipeline
import numpy as np

In [2]:
def replace_error_chars(message:str):
    message = message.replace('`', '')
    return message

In [3]:
dataset = datasets.load_dataset("squad", split="train")
dataset

Dataset({
    features: ['id', 'title', 'context', 'question', 'answers'],
    num_rows: 87599
})

In [4]:
questions = []
context = []
answers = []
prompts = []
template = 'Answer the question given the context.\n\ncontext:\n{context}\n\nquestion:\n{question}'
for row in dataset:
    context.append(row['context'])
    questions.append(row['question'])
    answers.append(replace_error_chars(row['answers']['text'][0]))
    templated_prompt = template.format(context=row['context'], question=row['question'])
    prompts.append(templated_prompt)

In [5]:
data = pd.DataFrame({
    'context': context,
    'questions': questions,
    # 'answers': answers,
    'prompt' : prompts})
test_data = data[:3]
data.head()

Unnamed: 0,context,questions,prompt
0,"Architecturally, the school has a Catholic cha...",To whom did the Virgin Mary allegedly appear i...,Answer the question given the context.\n\ncont...
1,"Architecturally, the school has a Catholic cha...",What is in front of the Notre Dame Main Building?,Answer the question given the context.\n\ncont...
2,"Architecturally, the school has a Catholic cha...",The Basilica of the Sacred heart at Notre Dame...,Answer the question given the context.\n\ncont...
3,"Architecturally, the school has a Catholic cha...",What is the Grotto at Notre Dame?,Answer the question given the context.\n\ncont...
4,"Architecturally, the school has a Catholic cha...",What sits on top of the Main Building at Notre...,Answer the question given the context.\n\ncont...


In [6]:
class template(object):
    def __init__(self, model):
        self.model = model

    def predict(self, dataset):
        dummy = 'This is a dummy answer'
        return np.array([dummy for _ in range(len(dataset))])
        # template = 'Answer the question given the context.'
        # for i, (context, question) in enumerate(zip(dataset['context'], dataset['questions'])):
        #     templated_question = template + '\n\ncontext: ' + context + '\nquestion: ' + question
        #     if isinstance(dataset, pd.DataFrame):
        #         dataset.iloc[i]['questions'] = templated_question
        #     else:
        #         dataset['questions'] = templated_question
        # return self.model.predict(dataset)

# Model and Evaluator

In [7]:
openai_model = OpenaiWrapperModel(
    os.getenv('OPENAI_API_TYPE'),
    os.getenv('OPENAI_API_ENDPOINT'),
    os.getenv('OPENAI_API_VERSION'),
    os.getenv('OPENAI_API_KEY'),
    engine='gpt-4')

pipeline_model = template(openai_model)

In [8]:
eval_model = OpenaiWrapperModel(
    os.getenv('OPENAI_API_TYPE'),
    os.getenv('OPENAI_API_ENDPOINT'),
    os.getenv('OPENAI_API_VERSION'),
    os.getenv('OPENAI_API_KEY'),
    engine='gpt-4')

# RAI Insights

In [9]:
from responsibleai_text import RAITextInsights, ModelTask
from raiwidgets import ResponsibleAIDashboard

Dataset download attempt 1 of 4


In [10]:
test_data.head()

Unnamed: 0,context,questions,prompt
0,"Architecturally, the school has a Catholic cha...",To whom did the Virgin Mary allegedly appear i...,Answer the question given the context.\n\ncont...
1,"Architecturally, the school has a Catholic cha...",What is in front of the Notre Dame Main Building?,Answer the question given the context.\n\ncont...
2,"Architecturally, the school has a Catholic cha...",The Basilica of the Sacred heart at Notre Dame...,Answer the question given the context.\n\ncont...


In [11]:
rai_insights = RAITextInsights(
    pipeline_model, test_data, None,
    task_type=ModelTask.GENERATIVE_TEXT)

feature extraction: 3it [00:01,  2.21it/s]

model task
ModelTask.CLASSIFICATION
self dataset
   positive_words  negative_words  negation_words  negated_entities  \
0              50               0               0                 0   
1              50               0               0                 0   
2              52               0               0                 0   

   named_persons  sentence_length  \
0              3              827   
1              2              805   
2              3              832   

                                              prompt  
0  Answer the question given the context.\n\ncont...  
1  Answer the question given the context.\n\ncont...  
2  Answer the question given the context.\n\ncont...  





In [12]:
# TODO: Remove this once the insights object is updated to handle this
rai_insights.temp_questions = test_data['questions']
rai_insights.temp_context = test_data['context']
rai_insights.temp_eval_model = eval_model

In [13]:
rai_insights.error_analysis.add()
# rai_insights.compute()

In [14]:
ResponsibleAIDashboard(rai_insights)

ResponsibleAI started at http://localhost:8705


<raiwidgets.responsibleai_dashboard.ResponsibleAIDashboard at 0x15bc95868c0>