In [36]:
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain.chains import LLMChain

In [37]:
import os
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = 'langchain_api_key'

In [38]:
os.environ['OPENAI_API_KEY'] = 'openai_api_key'

In [47]:
llm = ChatOpenAI(model_name="gpt-4o", temperature=0)

In [48]:
prompt_template = PromptTemplate(
    input_variables=["question", "answer"],
    template="""
You are an evaluator for a knowledge model.  
Your task is to judge the RELEVANCE of the ANSWER to the USER QUESTION.  

### Evaluation Goal:
- Check how well the answer focuses on the user's question.
- Penalize answers that contain unnecessary or unrelated details.

### USER QUESTION:
{question}

### ANSWER:
{answer}

### Scoring Criteria:
1 – Very irrelevant. The answer does not address the question or contains mostly unrelated content.  
2 – Somewhat irrelevant. The answer touches on the question but is filled with unnecessary or unrelated details.  
3 – Moderately relevant. The answer addresses the question but includes some off-topic information.  
4 – Mostly relevant. The answer is focused on the question with only minor irrelevant parts.  
5 – Fully relevant. The answer directly addresses the question without any irrelevant details.

Respond only in JSON:
{{
  "score": <number>,
  "justification": "<brief reason>"
}}

"""
)

In [49]:
evaluation_chain = (prompt_template | llm)

In [50]:
question = """
what is health according to the author?
"""

In [51]:
answer_text = """
According to the author, overall health of citizens could be addressed by other methods because health is influenced by many factors, not just exercise. 
"""

# answer_text = """
# According to the author, the overall health of citizens can be addressed by methods other than launching more sports facilities because health depends on multiple factors, not solely on access to exercise. While the author acknowledges that increasing sports facilities can encourage physical activity and improve concentration, optimism, and productivity—as seen in Hanoi, where citizens became healthier due to local exercise equipment—this approach only partially improves public health. 

# """

In [52]:
result = evaluation_chain.invoke({"question": question, "answer": answer_text})

In [53]:
print(result.content)

```json
{
  "score": 1,
  "justification": "The answer does not directly address the user's question about the author's definition of health. It instead discusses methods to address health and factors influencing it, which is unrelated to the specific question asked."
}
```
