# Imports

In [40]:
import os
import jsonlines
from tqdm.notebook import tqdm

import lamini
import pandas as pd

from contract_poc.utils import read_pdf, build_prompts_from_dataframe
from contract_poc.qa_pipeline import QuestionAnswerPipeline, load_qa_prompts, save_answers
from contract_poc.answer_pipeline import AnswerPipeline

# Data Wrangling

In [19]:
eval_data = []
with jsonlines.Reader(open(os.path.join("eval_sets","gold-test-set.jsonlines"), "rb")) as reader:
    for line in reader:
        eval_data.append(line)

In [21]:
eval_data[:4]

[{'question': 'What countries does Uber operate in?',
  'answer': 'Our technology is available in approximately 72 countries around the world, principally in the United States (“U.S.”) and Canada, Latin America, Europe, the Middle East, Africa, and Asia (excluding China and Southeast Asia).'},
 {'question': 'What countries does Lyft operate in?',
  'answer': 'United states and select cities in Canada'},
 {'question': 'Do Uber and Lyft operate in Mexico?',
  'answer': 'Uber operates in Mexico including ride sharing and delivery services. Lyft only operates in the United States and select cities in Canada.'},
 {'question': "Think step by step. First, consider the largest spanish speaking north american country that lyft operates in. Second describe any recent regulation that impacts Lyft's business in that market.",
  'answer': 'Lyft only operates in canada and the united states.'}]

In [27]:
eval_df = pd.DataFrame(eval_data)
eval_df.head()

Unnamed: 0,question,answer
0,What countries does Uber operate in?,Our technology is available in approximately 7...
1,What countries does Lyft operate in?,United states and select cities in Canada
2,Do Uber and Lyft operate in Mexico?,Uber operates in Mexico including ride sharing...
3,"Think step by step. First, consider the larges...",Lyft only operates in canada and the united st...
4,"Think step by step. First, consider the larges...","Uber operates in mexico. Since April 2019, Mex..."


# Helper Functions

In [23]:
from lamini.generation.generation_node import GenerationNode
from lamini.generation.base_prompt_object import PromptObject
from lamini.generation.generation_pipeline import GenerationPipeline

class GenPipeline(GenerationPipeline):
    def __init__(self, model_name):
        super(GenPipeline, self).__init__()

        self.generation_node = GenerationNode(model_name=model_name)

    def forward(self, x):
        x = self.generation_node(x, output_type={"answer":"str"})
        return x

In [32]:
def simple_prompt_generator(
    df: pd.DataFrame, 
    input_col: str = "question", 
    output_col: str = "answer",
):
    for idx, row in df.iterrows():
        yield PromptObject(
            prompt = row[input_col],
            data = {
                "question": row[input_col],
                "expected_output": row[output_col],
            }
        )

In [34]:
eval_df

Unnamed: 0,question,answer
0,What countries does Uber operate in?,Our technology is available in approximately 7...
1,What countries does Lyft operate in?,United states and select cities in Canada
2,Do Uber and Lyft operate in Mexico?,Uber operates in Mexico including ride sharing...
3,"Think step by step. First, consider the larges...",Lyft only operates in canada and the united st...
4,"Think step by step. First, consider the larges...","Uber operates in mexico. Since April 2019, Mex..."
5,"In Q4 2021, how many advertisers did Uber have?","During the fourth quarter of 2021, active adve..."
6,What role does word of mouth play in Lyft go t...,Lyft benefits from positive word of mouth in t...
7,What is the biggest macro trend that has impac...,The covid 19 pandemic resulted in substantiall...
8,How has Uber and Lyft's reputation impacted th...,Uber has previously received significant media...
9,Think step by step. First consider the adverti...,"Uber has over 170,000 advertising merchants. L..."


In [54]:
def extract_json_save_csv(path: str, eval_df: pd.DataFrame):
    items = []
    with jsonlines.open(path, "r") as reader:
        for item in reader:
            print(item)
            answer = {
                "Question": item["question"],
                "Model Response": item["answer"],
                "Gold Response": eval_df[eval_df["question"].str.contains(item["question"])]["answer"].iloc[0]
            }
            items.append(answer)
    return items

In [56]:
text = "Think step by step. First consider the advertising marketplace on Uber. How big is it? Second, consider the word of mouth marketing campaign carried out by Lyft. What challenges has it faced? Finally, compare and contrast the different approaches, considering the relative strengths and weaknesses of each approach."

In [58]:
eval_df

Unnamed: 0,question,answer
0,What countries does Uber operate in?,Our technology is available in approximately 7...
1,What countries does Lyft operate in?,United states and select cities in Canada
2,Do Uber and Lyft operate in Mexico?,Uber operates in Mexico including ride sharing...
3,"Think step by step. First, consider the larges...",Lyft only operates in canada and the united st...
4,"Think step by step. First, consider the larges...","Uber operates in mexico. Since April 2019, Mex..."
5,"In Q4 2021, how many advertisers did Uber have?","During the fourth quarter of 2021, active adve..."
6,What role does word of mouth play in Lyft go t...,Lyft benefits from positive word of mouth in t...
7,What is the biggest macro trend that has impac...,The covid 19 pandemic resulted in substantiall...
8,How has Uber and Lyft's reputation impacted th...,Uber has previously received significant media...
9,Think step by step. First consider the adverti...,"Uber has over 170,000 advertising merchants. L..."


In [57]:
eval_df[eval_df["question"].str.contains(text)]

Unnamed: 0,question,answer


In [55]:
responses = extract_json_save_csv("responses/gold_test_responses_v1.jsonl", eval_df)

{'prompt': 'What countries does Uber operate in?', 'question': 'What countries does Uber operate in?', 'answer': "Our technology is available in approximately 72 countries around the world, principally in the United States ('U.S.') and Canada, and in Europe, the Middle East, Africa, and Asia (excluding China and Southeast Asia)."}
{'prompt': 'What countries does Lyft operate in?', 'question': 'What countries does Lyft operate in?', 'answer': 'Our technology is available in approximately 72 countries around the world, principally in the United States and Canada. We offer rides in a variety of markets under different brand names, including Lyft in the United States and Canada and Lyft Rentals in California and Florida. Our platform connects drivers with riders in many cities to provide convenient transportation around a city or long-distance trips across countries. Our network spans the United States, Canada, Australia, Africa, Asia (excluding China), Europe, the Middle East, Latin Ameri

IndexError: single positional indexer is out-of-bounds

In [46]:
responses[0]

{'Question': 'What countries does Uber operate in?',
 'Model Response': "Our technology is available in approximately 72 countries around the world, principally in the United States ('U.S.') and Canada, and in Europe, the Middle East, Africa, and Asia (excluding China and Southeast Asia).",
 'Gold Response': ''}

In [51]:
eval_df[eval_df["question"].str.contains(responses[0]["Question"])]["answer"].iloc[0]

'Our technology is available in approximately 72 countries around the world, principally in the United States (“U.S.”) and Canada, Latin America, Europe, the Middle East, Africa, and Asia (excluding China and Southeast Asia).'

# Tuning Experiments

In [33]:
generation_pipeline = GenPipeline(model_name = "d9341ae5cfc60d5ed9e173954387404b6419ee201aa2baacb79fe4a6cef532a1")
answers = generation_pipeline.call(simple_prompt_generator(eval_df))
await save_answers(answers, path="gold_test_responses_v1.jsonl", print_outputs=False)

Saving answers: 0 answers [00:00, ? answers/s]