In [1]:
import os
import logging
import pandas as pd
import datasets
from ml_wrappers.model import OpenaiWrapperModel
from transformers import pipeline
import numpy as np

In [2]:
dataset = datasets.load_dataset("squad", split="train")
dataset

Dataset({
    features: ['id', 'title', 'context', 'question', 'answers'],
    num_rows: 87599
})

In [3]:
questions = []
context = []
answers = []
prompts = []
template = 'Answer the question given the context.\n\ncontext:\n{context}\n\nquestion:\n{question}'
for row in dataset:
    context.append(row['context'])
    questions.append(row['question'])
    answers.append(row['answers']['text'][0])
    templated_prompt = template.format(context=row['context'], question=row['question'])
    prompts.append(templated_prompt)

In [4]:
data = pd.DataFrame({
    # 'context': context,
    # 'questions': questions,
    # 'answers': answers,
    'prompt' : prompts})
test_data = data[:3]
data.head()

Unnamed: 0,prompt
0,Answer the question given the context.\n\ncont...
1,Answer the question given the context.\n\ncont...
2,Answer the question given the context.\n\ncont...
3,Answer the question given the context.\n\ncont...
4,Answer the question given the context.\n\ncont...


In [5]:
class template(object):
    def __init__(self, model):
        self.model = model

    def predict(self, dataset):
        dummy = 'This is a dummy answer'
        return np.array([dummy for _ in range(len(dataset))])
        # template = 'Answer the question given the context.'
        # for i, (context, question) in enumerate(zip(dataset['context'], dataset['questions'])):
        #     templated_question = template + '\n\ncontext: ' + context + '\nquestion: ' + question
        #     if isinstance(dataset, pd.DataFrame):
        #         dataset.iloc[i]['questions'] = templated_question
        #     else:
        #         dataset['questions'] = templated_question
        # return self.model.predict(dataset)

# Model and Evaluator

In [6]:
openai_model = OpenaiWrapperModel(
    os.getenv('OPENAI_API_TYPE'),
    os.getenv('OPENAI_API_ENDPOINT'),
    os.getenv('OPENAI_API_VERSION'),
    os.getenv('OPENAI_API_KEY'),
    engine='gpt-4')

pipeline_model = template(openai_model)

In [7]:
eval_model = OpenaiWrapperModel(
    os.getenv('OPENAI_API_TYPE'),
    os.getenv('OPENAI_API_ENDPOINT'),
    os.getenv('OPENAI_API_VERSION'),
    os.getenv('OPENAI_API_KEY'),
    engine='gpt-4')

# RAI Insights

In [8]:
from responsibleai_text import RAITextInsights, ModelTask
from raiwidgets import ResponsibleAIDashboard

Dataset download attempt 1 of 4


In [9]:
test_data.head()

Unnamed: 0,prompt
0,Answer the question given the context.\n\ncont...
1,Answer the question given the context.\n\ncont...
2,Answer the question given the context.\n\ncont...


In [10]:
rai_insights = RAITextInsights(
    pipeline_model, test_data, None,
    task_type=ModelTask.GENERATIVE_TEXT,
    text_column='prompt')

feature extraction: 0it [00:00, ?it/s]

feature extraction: 3it [00:00,  3.04it/s]
Failed to parse metric `This is a dummy answer`: invalid literal for int() with base 10: 'This is a dummy answer'
Failed to parse metric `This is a dummy answer`: invalid literal for int() with base 10: 'This is a dummy answer'
Failed to parse metric `This is a dummy answer`: invalid literal for int() with base 10: 'This is a dummy answer'


computing coherence score
coherence score
[0, 0, 0]
ext_dataset
['positive_words', 'negative_words', 'negation_words', 'negated_entities', 'named_persons', 'sentence_length', 'target_score']
   positive_words  negative_words  negation_words  negated_entities  \
0              50               0               0                 0   
1              50               0               0                 0   
2              52               0               0                 0   

   named_persons  sentence_length  target_score  
0              3              827             5  
1              2              805             5  
2              3              832             5  


In [11]:
rai_insights.error_analysis.add()
rai_insights.compute()

Error Analysis
Current Status: Generating error analysis reports.
Current Status: Finished generating error analysis reports.
Time taken: 0.0 min 0.3656380000002173 sec


In [12]:
ResponsibleAIDashboard(rai_insights)

ResponsibleAI started at http://localhost:8704


<raiwidgets.responsibleai_dashboard.ResponsibleAIDashboard at 0x2858abb3e20>