In [None]:
%reload_ext autoreload
%autoreload 2
from modules.evaluation import generate_examples, llm_assisted_evaluation
from modules.document_transformers import add_metadata_to_documents
from modules.loaders import get_reddit_loader, get_wiki_loader, get_custom_flipper_loader
from modules.retrievers import get_tavily_retriever, vector_store_retriever_from_loaders
from modules.evaluation import generate_examples, llm_assisted_evaluation
from modules.vector_stores import chroma_vector_store, FAISS_vector_store, doc_array_in_memory_store
from modules.utils import save_docs_to_jsonl, load_docs_from_jsonl
from langchain_openai import ChatOpenAI

In [None]:

import os

tavily_api_key = os.environ.get('TAVILY_API_KEY', '')
api_key = os.environ.get('OPENAI_API_KEY', '')

if not api_key:
    print('You need to set the OPENAI_API_KEY environment variable to use this script.')
if not tavily_api_key:
    print('You need to set the TAVILY_API_KEY environment variable to use this script.')

In [None]:
llm_model = "gpt-4-0125-preview"

# Loading the data

In [None]:
flipper_loader = get_custom_flipper_loader()
reddit_loader = get_reddit_loader()

In [None]:
#import nest_asyncio
#nest_asyncio.apply()

flipper_loader.requests_per_second = 2
flipper_docs = flipper_loader.load()

In [None]:

flipper_docs[1]

# Add metadata

In [None]:
enhanced_flipper_docs = add_metadata_to_documents(flipper_docs)
enhanced_flipper_docs[:5]

In [None]:
len(enhanced_flipper_docs)

In [None]:
save_docs_to_jsonl(enhanced_flipper_docs, 'data/flipper_docs_with_metadata.jsonl')

In [None]:
enhanced_flipper_docs2 = load_docs_from_jsonl('data/flipper_docs_with_metadata.jsonl')
len(enhanced_flipper_docs2)

In [None]:
import json
print(
    *[json.dumps(d.metadata) for d in enhanced_flipper_docs[:5]],
    sep="\n\n---------------\n\n",
)

In [None]:
#chroma_db = chroma_vector_store(flipper_docs, api_key)
faiss_db = FAISS_vector_store(enhanced_flipper_docs, api_key)

In [None]:
faiss_db.similarity_search("All possible shots in Medival Madness", 5)

# Create retriever tools for the Agent

In [None]:
flipper_retriever = faiss_db.as_retriever()

In [None]:
from langchain.tools.retriever import create_retriever_tool
from langchain_community.tools.tavily_search import TavilySearchResults


retriever_tool = create_retriever_tool(
    flipper_retriever,
    "pinball_search",
    "Search for information about Pinball! Use metadata if it helps to answer the question. Do not combine multiple sources of information to produce answers.",
)

tavity_tool = TavilySearchResults(name="tavily_search")


# Create the Agents

In [None]:
from langchain_openai import ChatOpenAI
from langchain import hub
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.agents import create_openai_functions_agent
from langchain.agents import AgentExecutor

In [None]:
# Baseline agent
from langchain_community.tools.tavily_search import TavilySearchResults

tools = [tavity_tool]

prompt = hub.pull("hwchase17/openai-functions-agent")
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
agent_baseline = create_openai_functions_agent(llm, tools, prompt)
agent_executor_baseline = AgentExecutor(agent=agent_baseline, tools=tools, verbose=True)

In [None]:
# Custom agent
# Get the prompt to use - you can modify this!
tools = [retriever_tool]
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a pinball agent, always giving as short and concise answers as possible to pinball-related questions. You do not know anything else but pinball. You can use the `pinball_search` tool to find information about pinball.",
        ),
        ("user", "{query}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)
llm = ChatOpenAI(model=llm_model, temperature=0)
agent_custom = create_openai_functions_agent(llm, tools, prompt)


In [None]:
# Combined agent
tools = [retriever_tool, tavity_tool]
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a pinball agent, always giving as short and concise answers as possible to pinball-related questions. You have two ways of finding information. `pinball_search` is great for finding rules and general information about the game. The `tavity_search` tool can be used to find strategy and gameplay related information, or more information if pinball search doesn't return anything useful.",
        ),
        ("user", "{query}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)
llm = ChatOpenAI(model=llm_model, temperature=0)
agent_combined = create_openai_functions_agent(llm, tools, prompt)

In [None]:
from langchain.agents.format_scratchpad.openai_tools import (
    format_to_openai_tool_messages,
)
tools = [retriever_tool]
llm_with_tools = llm.bind_tools(tools)


from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser

#agent = create_openai_functions_agent(llm, tools, prompt)
agent_chained = (
    {
        "query": lambda x: x["query"],
        "agent_scratchpad": lambda x: format_to_openai_tool_messages(
            x["intermediate_steps"]
        ),
    }
    | prompt
    | llm_with_tools
    | OpenAIToolsAgentOutputParser()
)

In [None]:
agent_executor_baseline = AgentExecutor(agent=agent_baseline, tools=[tavity_tool])

In [None]:
agent_executor_custom = AgentExecutor(agent=agent_custom, tools=[retriever_tool])

In [None]:
agent_executor_combined = AgentExecutor(agent=agent_combined, tools=[retriever_tool,tavity_tool])

In [None]:
agent_executor_chained = AgentExecutor(agent=agent_chained,  tools=[retriever_tool])

# Evaluate the different Agents

In [None]:
import langchain
langchain.debug = False
agent_executor_baseline.invoke({"input": "How do you get a multi-ball in Addams Family?"})

In [None]:
agent_executor_custom.invoke(
    {
        "query": "How do you get a multi-ball in Addams Family?",
    }
)

In [None]:
agent_executor_combined.invoke({"query": "What is the best strategy in Addams Family?"})

In [None]:
agent_executor_chained.invoke({"input": "How do you get a multi ball in Addams Family?"})

In [315]:

example_genertor_llm = ChatOpenAI(model=llm_model, temperature=0)
new_samples = generate_examples(example_genertor_llm, enhanced_flipper_docs)



New examples: [{'query': 'In the game of 4 Square by D. Gottlieb & Co., released in 1971, how does a player increase the score value of the top rollovers, and what is the new score value after this increase?', 'answer': 'After the first sequence is completed in the game of 4 Square, the top rollovers score 200 points, increased from their original score value.'}, {'query': 'In the Addams Family pinball game, what is the initial jackpot value during Multi-Ball(TM) play, and how can it be increased to its maximum value?', 'answer': 'The initial jackpot value during Multi-Ball(TM) play is 10M. It can be increased to its maximum value of 25M by hitting the Train Wreck target and by making center ramp shots, each of which adds 1M to the jackpot.'}, {'query': 'In the game "The Addams Family Gold Edition" pinball, what is the starting value of the Jackpot during Multiball, and how can it be increased?', 'answer': 'The starting value of the Jackpot during Multiball is either 5 million or 15 mi

In [None]:
"""with open('data/pinball_examples.json', 'w') as file:
    json.dump(new_samples, file)"""

In [291]:
automatic_samples = json.load(open('data/pinball_examples.json'))

In [299]:
hard_coded_samples = json.load(open('data/manual_pinball_examples.json'))

In [303]:
total_samples = automatic_samples + hard_coded_samples
predictions = automatic_samples + hard_coded_samples

In [305]:

for idx,sample in enumerate(predictions):
    query = sample['query']
    result = agent_executor_chained.invoke({"query":query})['output']
    predictions[idx]['result'] = result




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `pinball_search` with `{'query': '4 Square 1971 pop bumpers scoring'}`


[0m[36;1m[1;3mPinball Archive Rule Sheet: 4 Square

  4 Square

Written by Tim Meighan


"4 Square" was released by D. Gottlieb & Co. in 1971.
More information is available from the
Internet Pinball Database.

GAME ELEMENTS

The 4 Square playfield includes two sets of targets and three sets
of rollovers, with each set consisting of four items.  Each item
in a given set has a corresponding indicator light marked with a
unique number value: 1, 2, 3, or  4.  The upper central area of the
playfield also contains three pop bumpers arranged in a triangle
configuration.  There are also two kicking rubber islands located
along each side just above the flippers.  The lower central portion
of the playfield is wide open and contains four rows of sequence
indicator lights, with each row consisting of 4 lights numbered
from 1 to 4.

SCORING

Four pop bu

In [None]:
from langchain.evaluation.qa import QAEvalChain

llm = ChatOpenAI(temperature=0, model=llm_model)

eval_chain = QAEvalChain.from_llm(llm)

In [308]:
from langchain.evaluation import load_evaluator

evaluator = load_evaluator("pairwise_embedding_distance")

  warn_deprecated(


In [311]:
total_scores = []
for idx,sample in enumerate(predictions):
    answer = sample['answer']
    result = sample['result']
    score = evaluator.evaluate_string_pairs(prediction=answer, prediction_b=result)
    total_scores.append(score)

In [312]:
total_scores

[{'score': 0.11709912728835314},
 {'score': 0.09955259675258987},
 {'score': 0.09665532881810146},
 {'score': 0.039461424654903876},
 {'score': 0.05555065302281237},
 {'score': 0.23773264237181102},
 {'score': 0.22168380286012423}]

In [322]:
from modules.evaluation import evaluate_agent
scores_chained, average_score_chained = evaluate_agent(agent_executor_chained, enhanced_flipper_docs,use_previous_samples=True)


Using previous examples
Done generating examples, invoking agent...


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `pinball_search` with `{'query': '4 Square pop bumpers scoring'}`


[0m[36;1m[1;3mPinball Archive Rule Sheet: 4 Square

  4 Square

Written by Tim Meighan


"4 Square" was released by D. Gottlieb & Co. in 1971.
More information is available from the
Internet Pinball Database.

GAME ELEMENTS

The 4 Square playfield includes two sets of targets and three sets
of rollovers, with each set consisting of four items.  Each item
in a given set has a corresponding indicator light marked with a
unique number value: 1, 2, 3, or  4.  The upper central area of the
playfield also contains three pop bumpers arranged in a triangle
configuration.  There are also two kicking rubber islands located
along each side just above the flippers.  The lower central portion
of the playfield is wide open and contains four rows of sequence
indicator lights, with each row con

In [323]:
scores_chained, average_score_chained = evaluate_agent(agent_executor_combined, enhanced_flipper_docs,use_previous_samples=True)

Using previous examples
Done generating examples, invoking agent...


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `pinball_search` with `{'query': '4 Square 1971 pop bumpers scoring'}`


[0m[36;1m[1;3mPinball Archive Rule Sheet: 4 Square

  4 Square

Written by Tim Meighan


"4 Square" was released by D. Gottlieb & Co. in 1971.
More information is available from the
Internet Pinball Database.

GAME ELEMENTS

The 4 Square playfield includes two sets of targets and three sets
of rollovers, with each set consisting of four items.  Each item
in a given set has a corresponding indicator light marked with a
unique number value: 1, 2, 3, or  4.  The upper central area of the
playfield also contains three pop bumpers arranged in a triangle
configuration.  There are also two kicking rubber islands located
along each side just above the flippers.  The lower central portion
of the playfield is wide open and contains four rows of sequence
indicator lights, with each ro

In [324]:
scores_chained, average_score_chained

([0.12924004117265586,
  0.0987160006111395,
  0.09738548167764227,
  0.017257464866357264,
  0.04684341642141887,
  0.12924004117265586,
  0.09945849920106564,
  0.09738548167764227,
  0.021082022626003005,
  0.05318920091579726],
 0.07897976503423779)