In [7]:
import pandas as pd
import numpy as np
import os
from io import StringIO 
import json
from langchain.document_loaders import HuggingFaceDatasetLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_core.output_parsers import JsonOutputParser
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
from transformers import AutoTokenizer, pipeline
from langchain import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS, Chroma
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_core.runnables import RunnablePassthrough
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.prompts import ChatPromptTemplate, PromptTemplate




In [4]:
BASE_FOLDER = "./test_data"
QUESTION_FILE =  "document_questions.xlsx"
RAW_DATA_FOLDER = "raw_text"
MODEL = 'gpt-3.5-turbo-0301'

from openai import OpenAI
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", "<your OpenAI API key if not set as env var>"))

In [5]:
df = pd.read_excel(os.path.join(BASE_FOLDER, question_file))

In [11]:
question = 'What is meant by "computational finance"?'
answer = 'Computational finance refers to the use of computational tools to craft models that provide insights into investor heterogeneity and dynamics in financial settings. Agent-based computational models view financial markets as interacting groups of learning, boundedly-rational agents. These models require computational tools when analytic solutions are impossible.'

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

# Option 1

- Uses Langchain's internal JSON output parser
- Forces Enum class type on grade output

In [27]:
from enum import Enum

class gradeEnum(str,Enum):
    correct = "correct"
    incorrect = "incorrect"
    
class LLMEvalResult(BaseModel):
    grade: gradeEnum = Field(description="Final grade label. Accepted labels : Correct, Incorrect")
    description: str = Field(description="Explanation of why the specific grade was assigned. Must be concise. Not more than 2 sentences")

In [28]:
json_parser = JsonOutputParser(pydantic_object=LLMEvalResult)

qa_eval_prompt_text = """
You are a teacher evaluating a test. 
You are provided with a question along with an answer for the question written by a student. Evaluate the question-answer pair and provide feedback.
{format_instructions}
Question : {question}
Answer : {answer}
"""

qa_eval_prompt = PromptTemplate(
    template=qa_eval_prompt_text,
    input_variables=["question","answer"],
    partial_variables={"format_instructions": json_parser.get_format_instructions()},
)

In [12]:
llm_eval_chain = qa_eval_prompt | llm | json_parser

In [13]:
llm_eval_chain.invoke({'question':question, 'answer':answer})

{'grade': 'Correct',
 'description': 'The answer provides a clear and concise explanation of computational finance, highlighting the use of computational tools in crafting models for financial settings.'}

In [33]:
# Checking the length of the prompt
len(qa_eval_prompt.invoke({'question':"", 'answer':""}).text)

1165

# Option 2 
- No explicit constraint on grade value output

In [14]:
qa_eval_prompt_text = """
You are a teacher evaluating a test. 
You are provided with a question along with an answer for the question written by a student. Evaluate the question-answer pair and provide feedback in the following JSON format :
{{"grade" : Final grade label. Accepted labels : Correct, Incorrect,
"description" : Explanation of why the specific grade was assigned. Must be concise. Not more than 2 sentences
}}


Question : {question}
Answer : {answer}
"""

qa_eval_prompt = ChatPromptTemplate.from_template(qa_eval_prompt_text)
    

In [15]:
llm_eval_chain = qa_eval_prompt | llm 

In [16]:
llm_eval_output = llm_eval_chain.invoke({'question':question, 'answer':answer})

In [18]:
json.load(StringIO(llm_eval_output.content))

{'grade': 'Correct',
 'description': 'The answer provides a clear and accurate definition of computational finance, explaining its use of computational tools in financial settings and the concept of agent-based computational models.'}

In [26]:
# Checking the length of the prompt
len(qa_eval_prompt.invoke({'question':"", 'answer':""}).messages[0].content)

423

# Bulk Eval Function