# Utilizing tools in LangChain

In [None]:
from langchain.agents import tool, Tool, create_react_agent, create_structured_chat_agent, AgentExecutor
from langchain_openai import OpenAI
from langchain import hub
from langchain.tools import StructuredTool

from langchain import LLMChain, PromptTemplate
from langchain.callbacks.base import BaseCallbackHandler

from langchain_openai import ChatOpenAI
from langchain.evaluation import load_evaluator

import time
import os

openai_api_key = os.getenv('OPENAI_API_KEY') 

## Creating custom tools

In [None]:
# Define the calculate_ltv tool function
@tool
def calculate_ltv(company_name: str) -> str:
    """Generate the LTV for a company."""
    avg_churn = 0.25
    avg_revenue = 1000
    historical_LTV = avg_revenue / avg_churn

    report = f"LTV Report for {company_name}\n"
    report += f"Avg. churn: ${avg_churn}\n"
    report += f"Avg. revenue: ${avg_revenue}\n"
    report += f"historical_LTV: ${historical_LTV}\n"
    return report

# Define the tools list
tools = [Tool(name="LTVReport",
              func=calculate_ltv,
              description="Use this for calculating historical LTV.")]

# Initialize the appropriate agent type
llm = OpenAI(model_name="gpt-3.5-turbo-instruct", temperature=0, openai_api_key=openai_api_key)

prompt = hub.pull("hwchase17/react")

agent = create_react_agent(
    llm,
    tools,
    prompt=prompt,
)

agent_executor = AgentExecutor(
    agent=agent, tools=tools, handle_parsing_errors=True, verbose=True, max_iterations=5
)

input = (
    "Run a financial report that calculates historical LTV for Hooli"
)
agent_executor.invoke({"input": input})

## Scaling custom tools

In [None]:
def calculate_wellness_score(sleep_hours, exercise_minutes, healthy_meals, stress_level):
    """Calculate a Wellness Score based on sleep, exercise, nutrition, and stress management."""
    max_score_per_category = 25

    sleep_score = min(sleep_hours / 8 * max_score_per_category, max_score_per_category)
    exercise_score = min(exercise_minutes / 30 * max_score_per_category, max_score_per_category)
    nutrition_score = min(healthy_meals / 3 * max_score_per_category, max_score_per_category)
    stress_score = max_score_per_category - min(stress_level / 10 * max_score_per_category, max_score_per_category)

    total_score = sleep_score + exercise_score + nutrition_score + stress_score
    return total_score

# Create a structured tool from calculate_wellness_score()
tools = [StructuredTool.from_function(calculate_wellness_score)]

# Initialize the appropriate agent type and tool set
llm = OpenAI(model_name="gpt-3.5-turbo-instruct", temperature=0, openai_api_key=openai_api_key)

prompt = hub.pull("hwchase17/react")

agent = create_structured_chat_agent(
    llm,
    tools,
    prompt=prompt,
)

wellness_tool = tools[0]
result = wellness_tool.func(sleep_hours=8, exercise_minutes=14, healthy_meals=10, stress_level=20)
print(result)

## Formatting tools as OpenAI functions

In [None]:
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.utils.function_calling import convert_to_openai_function

# Create an LTVDescription class to manually add a function description
class LTVDescription(BaseModel):
    query: str = Field(description='Calculate an extremely simple historical LTV')

# Format the calculate_ltv tool function so it can be used by OpenAI models
@tool(args_schema=LTVDescription)
def calculate_ltv(company_name: str) -> str:
    """Generate the LTV for a company to pontificate with."""
    avg_churn = 0.25
    avg_revenue = 1000
    historical_LTV = avg_revenue / avg_churn

    report = f"Pontification Report for {company_name}\n"
    report += f"Avg. churn: ${avg_churn}\n"
    report += f"Avg. revenue: ${avg_revenue}\n"
    report += f"historical_LTV: ${historical_LTV}\n"
    return report

print(convert_to_openai_function(calculate_ltv))

# Troubleshouting methods for optimization

## Callbacks for troubleshouting

In [None]:
# Complete the CallingItIn class to return the prompt, model_name, and temperature
class CallingItIn(BaseCallbackHandler):
    def on_llm_start(self, serialized, prompts, invocation_params, **kwargs):
        print(prompts) 
        print(invocation_params["model_name"])  
        print(invocation_params["temperature"]) 

llm = OpenAI(model_name="gpt-3.5-turbo-instruct", streaming=True, openai_api_key=openai_api_key)
prompt_template = "What do {animal} like to eat?"
prompt=PromptTemplate.from_template(prompt_template)
chain = prompt | llm

config = {
    'callbacks' : [CallingItIn()]
}

# Call the model with the parameters needed by the prompt
output = chain.invoke({"animal": "wombats"}, config=config)
print(output)

## Callbacks for troubleshouting

In [None]:
# Complete the PerformanceMonitoringCallback class to return the token and time
class PerformanceMonitoringCallback(BaseCallbackHandler):
  def on_llm_new_token(self, token: str, **kwargs) -> None:
    print(f"Token: {repr(token)} generated at time: {time.time()}")

llm = OpenAI(model_name="gpt-3.5-turbo-instruct", openai_api_key=openai_api_key, temperature=0, streaming=True)
prompt_template = "Describe the process of photosynthesis."
prompt=PromptTemplate.from_template(prompt_template)

chain = prompt | llm

config = {
    'callbacks' : [PerformanceMonitoringCallback()]
}

# Call the chain with the callback
output = chain.invoke({}, config=config)
print("Final Output:", output)

# Evaluating model output in LangChain

## Built-in evaluation criteria

In [None]:
# Load evaluator, assign it to criteria, and return result
evaluator = load_evaluator("criteria", criteria="relevance", llm=ChatOpenAI(openai_api_key=openai_api_key))

# Evaluate the input and prediction
eval_result = evaluator.evaluate_strings(
    prediction="42",
    input="What is the answer to the ultimate question of life, the universe, and everything?",
)

print(eval_result)

## Custom evaluation criteria

In [None]:
# Add a scalability criterion to custom_criteria
custom_criteria = {
    "market_potential": "Does the suggestion effectively assess the market potential of the startup?",
    "innovation": "Does the suggestion highlight the startup's innovation and uniqueness in its sector?",
    "risk_assessment": "Does the suggestion provide a thorough analysis of potential risks and mitigation strategies?",
    "scalability": "Does the suggestion address the startup's scalability and growth potential?"
}

# Criteria an evaluator from custom_criteria
evaluator = load_evaluator("criteria", criteria=custom_criteria, llm=ChatOpenAI(openai_api_key=openai_api_key))

# Evaluate the input and prediction
eval_result = evaluator.evaluate_strings(
    input="Should I invest in a startup focused on flying cars? The CEO won't take no for an answer from anyone.",
    prediction="No, that is ridiculous.")

print(eval_result)

## Evaluation chains

In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.evaluation.qa.eval_chain import QAEvalChain

loader = PyPDFLoader('documents/attention-is-all-you-need.pdf')
data = loader.load()
chunk_size = 200
chunk_overlap = 50

# Split the quote using RecursiveCharacterTextSplitter
splitter = RecursiveCharacterTextSplitter(
    chunk_size=chunk_size,
    chunk_overlap=chunk_overlap)
docs = splitter.split_documents(data) 

question_set = [{'question': 'What is the primary architecture presented in the document?', 'answer': 'The Transformer.'}, {'question': 'According to the document, is the Transformer faster or slower than architectures based on recurrent or convolutional layers?', 'answer': 'The Transformer is faster.'}, {'question': 'Who is the primary author of the document?', 'answer': 'Ashish Vaswani.'}]

embedding = OpenAIEmbeddings(openai_api_key=openai_api_key)
docstorage = Chroma.from_documents(docs, embedding)
llm = OpenAI(model_name="gpt-3.5-turbo-instruct", openai_api_key=openai_api_key)

qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docstorage.as_retriever(), input_key="question")

# Generate the model responses using the RetrievalQA chain and question_set
predictions = qa.batch(question_set)

# Define the evaluation chain
eval_chain = QAEvalChain.from_llm(llm)

# Evaluate the ground truth against the answers that are returned
results = eval_chain.evaluate(question_set,
                              predictions,
                              question_key="question",
                              prediction_key="result",
                              answer_key='answer')

for i, q in enumerate(question_set):
    print(f"Question {i+1}: {q['question']}")
    print(f"Expected Answer: {q['answer']}")
    print(f"Model Prediction: {predictions[i]['result']}\n")
    
print(results)