# Baseline without RAG.

Import packages.

In [111]:
import os
import pandas as pd

# from datetime import datetime
from dotenv import load_dotenv
from json import loads, dumps
from langchain.tools import tool
# from langchain.tools.retriever import create_retriever_tool

# from pydantic import BaseModel, Field
from typing import Annotated, Sequence, TypedDict, Literal

from IPython.display import Image, display

# from langchain_chroma import Chroma
# from langchain_community.document_loaders import PyPDFLoader

from langchain_core.messages import BaseMessage, HumanMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate

from langchain_openai import OpenAIEmbeddings, ChatOpenAI
# from langchain_text_splitters import RecursiveCharacterTextSplitter

from langfuse import Langfuse, get_client
from langfuse.langchain import CallbackHandler
from langfuse.openai import openai  # Don't delete this line.

from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, END, StateGraph
from langgraph.graph.message import add_messages
from langgraph.prebuilt import tools_condition, ToolNode

Set up environment variables.

In [3]:
# Set up environment variables.
load_dotenv(override=True)
openai_api_key = os.getenv('OPENAI_API_KEY')

# Monitor if the environment variables are ready.
if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
else:
    print("OpenAI API Key not set - please head to the troubleshooting guide in the setup folder")

OpenAI API Key exists and begins sk-proj-


In [4]:
langfuse_secret_key = os.getenv('LANGFUSE_SECRET_KEY')
langfuse_public_key = os.getenv('LANGFUSE_PUBLIC_KEY')
langfuse = Langfuse(
    secret_key = langfuse_secret_key,
    public_key = langfuse_public_key,
    host="https://cloud.langfuse.com"
    # host = "https://us.cloud.langfuse.com"
)

langfuse = get_client()
 
# Verify connection
if langfuse.auth_check():
    print("Langfuse client is authenticated and ready!")
else:
    print("Authentication failed. Please check your credentials and host.")

Langfuse client is authenticated and ready!


Define supporting variables and functions.

In [None]:
CONFIG = {
    # "vector_store_directory": "./chroma_db",
    # "vector_store_collection_name": "langchain_rag",
    # "chunk_size": 500,
    # "chunk_overlap": 50
}

In [115]:
def format_job_descriptions(jd_dataframe):
    if isinstance(jd_dataframe, pd.Series):
        jd_dataframe = jd_dataframe.to_frame().T
    result = jd_dataframe.to_json(orient='records')
    return result

In [112]:
df = pd.read_csv("data/X_test.csv")
test = df.iloc[0:2, :]
if isinstance(test, pd.Series):
        test = test.to_frame().T

In [114]:
json_records = test.to_json(orient='records')
parsed = loads(json_records)
print(dumps(parsed, indent=4) )

[
    {
        "title": "Driver / Tour Guide for Coach & 4WD Coach Tours",
        "abstract": "This role includes conducting 1-20 Day Tours to a range of locations all over Australia with mature passengers for a well established family company.",
        "job_description_clean": "Casey Australia Tours require a experienced Coach Drivers / Tour Guide for the rest of the 2019 season of extended motel and/or camping tours starting mid/late June The role requires driving, minor coach maintenance, cleaning, guiding and commentating plus some cooking. The ideal person should have; 1. A sense of humour and engaging personality 2. Initiative and problem solving skills 3. Confidence and be a good public speaker 4. A current manual HR drivers licence with F endorsement. (or ability to get one quickly) 5. A safe driving record 6. Clear Criminal History 7. Experience in some field that can be related 8. First Aid Training 9. Good Navigation in remote and city areas Handy assets to have; 1. Mecha

In [None]:
@tool
def check_basic_information(job_descriptions) -> dict:
    """
    This function consumes job_descriptions and return basic information with good formatted strings.
    """
    if isinstance(job_descriptions, pd.Series):
        job_descriptions = job_descriptions.to_frame().T

    list_of_dicts = job_descriptions.to_dict(orient='records')
    
    jobs_info = []
    for i, job_dict in enumerate(list_of_dicts):
        jobs_info.append({
            "job_index": i,
            "fields": job_dict
        })

    result = {
        "summary": "The information of each job advertisement is as follows.",
        "jobs": jobs_info
    }

    return result

In [101]:
tools = [check_basic_information]

Define Graph Sate.

In [102]:
class AgentState(TypedDict):
    """
    Represents the state of graph.

    Attributes:
        messages: A list of messages in the conversation, including user input and agent outputs.

    Notes:
        The add_messages function defines how an update should be processed.
        Default is to replace. add_messages says "append".
    """
    messages: Annotated[Sequence[BaseMessage], add_messages]

Deinfe Basic Nodes.

Input: job description strings.

Host node

Conditional Edge: whether the question is about to check job descriptions.

If not, return generate.

If yes, go to next node.

Get industry information and professional skills.

Get general requirements.

Evaluate the job advertisement.

Refine the job advertisement.



In [None]:
def host(state):
    """
    Invokes the whole workflow and monitor the state.
    Given the state, it will decide to use the retriever tool, or simply end with an answer.

    Args:
        state (messages): The current state

    Returns:
        dict: The updated state with the agent response appended to messages
    """
    
    print("---CALL AGENT---")
    messages = state["messages"]
    model = ChatOpenAI(temperature=0, streaming=True, model="gpt-4-turbo")
    model = model.bind_tools(tools)
    response = model.invoke(messages)
    
    return {"messages": [response]}