In [1]:
import openai
# import streamlit as st
import re
from typing import Union

import os
from langchain.chains import ConversationalRetrievalChain, RetrievalQA
from langchain.callbacks import get_openai_callback
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.indexes import VectorstoreIndexCreator
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
from langchain.vectorstores import Chroma
from langchain.schema import HumanMessage, SystemMessage
from langchain.agents import AgentType, Tool, initialize_agent

from langchain.agents import (
    AgentExecutor,
    AgentOutputParser,
    LLMSingleActionAgent,
    Tool,
)
from langchain.chains import LLMChain
from langchain.llms import OpenAI
from langchain.prompts import StringPromptTemplate
from langchain.schema import AgentAction, AgentFinish
from langchain.utilities import SerpAPIWrapper

import random

from system_prompt\
  import create_system_prompt, GUIDE_FOR_USERS
from constants import RETRIEVER_SEARCH_DEPTH, VECTORSTORE_DIRECTORY_NAME, TOP_K_SEARCH_RESULTS, MODEL_NAME
from my_secrets import OPENAI_API_KEY


print("Make sure you run setup.py with updated source data, and SPRs")

# if "openai_api_key" not in st.session_state:
    # random_number = random.random()
    # print(random_number)
    # if random_number <= 1:
    #   openai_api_key = shayan_key
    #   print('Setting key shayan')
      
    # else: 
    #   openai_api_key = jim_key
    #   print('Setting key jim')
openai.api_key = OPENAI_API_KEY
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
# st.session_state['openai_api_key'] = OPENAI_API_KEY

if os.path.exists(VECTORSTORE_DIRECTORY_NAME):
  print("loading up vec database..\n")
  vectorstore = Chroma(persist_directory=VECTORSTORE_DIRECTORY_NAME, embedding_function=OpenAIEmbeddings())

retriever=vectorstore.as_retriever(
     search_kwargs={"k": RETRIEVER_SEARCH_DEPTH, "top_k":TOP_K_SEARCH_RESULTS}, 
    #  search_type='mmr'
    # use mmr with minimum threshold
    # test against "Help me grade this students outcome: "DEsired outcome: average trip time, Actual outcome decreased trip time" 
    # with mmr it wasn't able to search for the rubric because the first result was desired outcome, and then no rubric in the others
    )

# RetrievalQA.from_chain_type(
    # llm=llm, chain_type="stuff", retriever=ruff_db.as_retriever()
# )
llm=ChatOpenAI(model=MODEL_NAME, temperature=0)


retrieval_chain_tool = RetrievalQA.from_chain_type(
  llm=llm,
  chain_type="stuff",
  retriever=retriever,
  verbose=True,
)

# chain = ConversationalRetrievalChain.from_llm(
#   llm= ChatOpenAI(model=MODEL_NAME, temperature=0),
#   retriever=retriever,
#   verbose=True,
# )


Make sure you run setup.py with updated source data, and SPRs
loading up vec database..



In [2]:
prompt = """
what are use cases
"""
system_prompt = SystemMessage(content=create_system_prompt())

result = retrieval_chain_tool({"query": prompt, "chat_history": [system_prompt]})



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [3]:
result['result']

'Use cases are specific circumstances or sets of circumstances in which the actor (usually the customer) performs a particular job to be done. They are specific to each job to be done and each actor. Use cases should be important, distinct, clearly named, and accurate. They should not include the actor, the job to be done, or the outcome. Examples of use cases could be "Shopping online" or "Deodorizing refrigerators." They are used to help define product features, pricing, designs, packaging, and marketing messages.'

In [4]:
tools = [
    Tool(
        name="Product management course retrieval",
        func=retrieval_chain_tool.run,
        description=
        "useful for when you need to answer questions about product management, Job to be done, actors,\
              outcomes, grading and rubrics, problem space, personas, and use cases. Input should be a fully formed\
                    question, not referencing any obscure pronouns from the conversation before.",
    )
]

In [5]:
# Set up the base template
template = create_system_prompt() + """

Answer the following questions as best you can. You have access to the following tools:

{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Question: {input}
{agent_scratchpad}"""

In [6]:
from langchain.prompts import StringPromptTemplate
from typing import Callable

def get_tools(query):
    # docs = retriever.get_relevant_documents(query)
    return tools

# Set up a prompt template
class CustomPromptTemplate(StringPromptTemplate):
    # The template to use
    template: str
    ############## NEW ######################
    # The list of tools available
    tools_getter: Callable

    def format(self, **kwargs) -> str:
        # Get the intermediate steps (AgentAction, Observation tuples)
        # Format them in a particular way
        intermediate_steps = kwargs.pop("intermediate_steps")
        thoughts = ""
        for action, observation in intermediate_steps:
            thoughts += action.log
            thoughts += f"\nObservation: {observation}\nThought: "
        # Set the agent_scratchpad variable to that value
        kwargs["agent_scratchpad"] = thoughts
        ############## NEW ######################
        tools = self.tools_getter(kwargs["input"])
        # Create a tools variable from the list of tools provided
        kwargs["tools"] = "\n".join(
            [f"{tool.name}: {tool.description}" for tool in tools]
        )
        # Create a list of tool names for the tools provided
        kwargs["tool_names"] = ", ".join([tool.name for tool in tools])
        return self.template.format(**kwargs)

prompt = CustomPromptTemplate(
    template=template,
    tools_getter=get_tools,
    # This omits the `agent_scratchpad`, `tools`, and `tool_names` variables because those are generated dynamically
    # This includes the `intermediate_steps` variable because that is needed
    input_variables=["input", "intermediate_steps"],
)

In [7]:
agent = initialize_agent(
    tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True
)

In [8]:
llm_chain = LLMChain(
    llm=llm, 
    prompt=prompt,
    verbose=True
)

In [9]:
class CustomOutputParser(AgentOutputParser):
    def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
        # Check if agent should finish
        if "Final Answer:" in llm_output:
            return AgentFinish(
                # Return values is generally always a dictionary with a single `output` key
                # It is not recommended to try anything else at the moment :)
                return_values={"output": llm_output.split("Final Answer:")[-1].strip()},
                log=llm_output,
            )
        # Parse out the action and action input
        regex = r"Action\s*\d*\s*:(.*?)\nAction\s*\d*\s*Input\s*\d*\s*:[\s]*(.*)"
        match = re.search(regex, llm_output, re.DOTALL)
        if not match:
            raise ValueError(f"Could not parse LLM output: `{llm_output}`")
        action = match.group(1).strip()
        action_input = match.group(2)
        # Return the action and action input
        return AgentAction(
            tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output
        )

In [10]:
output_parser = CustomOutputParser()

tool_names = [tool.name for tool in tools]
agent = LLMSingleActionAgent(
    llm_chain=llm_chain,
    output_parser=output_parser,
    stop=["\nObservation:"],
    allowed_tools=tool_names,
)

In [11]:
agent_executor = AgentExecutor.from_agent_and_tools(
    agent=agent, tools=tools, verbose=True
)

In [12]:
agent_executor.run(
    "what are use cases"
)



[1m> Entering new AgentExecutor chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
    # Mission
    You are a helpful assistant for Product Management Essentials, a course taught by Jim Berardone at Carnegie Mellon University. The course is targeted towards software engineers.

    Teaching assistants will need help grading students work.
    When asked to grade, use relevant rubrics to give a score at the start of your answer followed by a short explanation.
    Students will need help with coming up with ideas, or questions about the concepts.
    --
    
    # Context
    The following is distilled list of all the topics in the course packed as succinct statements, assertions, associations, concepts, analogies, and metaphors:
    - Product Management Essentials: Software engineers learn product management fundamentals.
- Instructor: Jim Berardone, Carnegie Mellon University.
- Course Objective: Simplify and clarify product management c

'In product management, use cases refer to specific scenarios in which the customer (also known as the actor) performs a particular task or "Job To Be Done". They serve as target scenarios for your product idea\'s application. Each use case can lead to different product features, products, pricing, designs, packaging, marketing messages, and more. Use cases should be clear, specific, significant, and accurately named. They should not include the actor, the job to be done, or the outcome. Examples of use cases could be "Shopping online", "Deodorizing refrigerators", or "Remote meetings on iOS devices".'