In [1]:
'''
RAG - Job Requirements Validation Against a Resume

Instead of a simple question, this sample program validates if the resume of a candidate 
can satisfy the necessary job requirements.

1. The logic starts with Multi-Query translation of each Job Requirement to multiple sub-questions.
2. The logic queries each sub-question against the resume and aggregates 
all the answers from sub-questions as the aggregated context. (similar to RAG Decomposition) 
3. The final logic queries the aggregated context with the requirement and provides a summary to 
identify if this candidate meets this job requirements. 


Input arguments to the step_to_step_fusion are
1. Template: how to generate the sub-questions from a requirement
2. Template2: How to generate the summary of each requirement
3. Requirements: The requirement list
4. Resume: The resume of a candidate. 

'''

import os
from dotenv import load_dotenv
load_dotenv()

#if (GCP_PROJECT_ID == None): print ("Not set")
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
LANGCHAIN_API_KEY = os.getenv('LANGCHAIN_API_KEY')
os.environ['LANGCHAIN_TRACING_V2'] = 'false'  #true for trace
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ["LANGCHAIN_PROJECT"] = "RAG - Multi-Query-Translation"

In [28]:

#Without using the Chain/Pipeline but same processes

from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.load import dumps, loads
from PyPDF2 import PdfReader
from langchain.prompts import ChatPromptTemplate
#Using CharaterTextSplitter maybe have better results than RecursiveCharacterTextSplitter
#text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=40)
text_splitter = CharacterTextSplitter()
#keep the default separator ('\n\n') is better than separator="\n"

def extract_pdf_text(file_path):
    pdf_file = PdfReader(file_path)
    text_data = ''
    for pg in pdf_file.pages:
        text_data += pg.extract_text()
    return text_data


def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    # Flatten list of lists, and convert each Document to string
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # Get unique documents
    unique_docs = list(set(flattened_docs))
    # Return
    return [loads(doc) for doc in unique_docs]

    
def step_by_step_query(template, template2, requirements, resume):
    from langchain_core.output_parsers import CommaSeparatedListOutputParser
    #question = "List all the companies Sam have worked for, please also list the year he worked for those companies"
# Multi Query: Different Perspectives
 
    pdf_text = extract_pdf_text(resume)
    pdf_texts = text_splitter.split_text(pdf_text)
    split_docs = text_splitter.create_documents(pdf_texts)
    ''' Don't use OpenAIEmbeddings
    client = OpenAI()
    embedding_repsonse = client.embeddings.create(input=sentences, 
                                      model="text-embedding-3-large")
     '''
    
    vectorstore = Chroma.from_documents(documents=split_docs, 
                                        embedding=OpenAIEmbeddings(model="text-embedding-3-small"))
    retriever = vectorstore.as_retriever(search_kwargs={"k": 4}, max_tokens_limit=10000)
    
    requirements_list = requirements.split('\n\n\n\n')
    print (requirements_list)
    prompt_perspectives = ChatPromptTemplate.from_template(template)
    for requirement in requirements_list:
        #print ("internal_loop...", requirement)
        prompt_perspectives_output =  prompt_perspectives.invoke(requirement)
        #print("Output after prompt_perspectives:", prompt_perspectives_output)

        chat_openai_output = ChatOpenAI(temperature=0)(prompt_perspectives_output)
        ##print("Output after ChatOpenAI:", chat_openai_output)
        #output_parser = CommaSeparatedListOutputParser()
        parser_output = StrOutputParser().invoke(chat_openai_output)
        #print("output after StrOutputParser:", parser_output)
        #str_output_parser_output = output_parser(chat_openai_output)
        #print("Output after StrOutputParser:", str_output_parser_output)
        generate_queries= (lambda x: x.split("\n"))(parser_output)

        #print ("gen...", generate_queries)

        #retrieval_chain = generate_queries | retriever.map() | get_unique_union
 
        #docs = retrieval_chain.invoke({"question":question})

        retriever_output = retriever.map().invoke(generate_queries)
        get_union_doc_output = get_unique_union(retriever_output)

        #print("output after get_unique_union", get_union_doc_output[0])

        #docs = retrieval_chain.invoke({"question":question})
        #docs = get_union_doc_output

        #print("output after retriever map()", retriever_output)
        internal_loop (get_union_doc_output, generate_queries, template2, requirement, resume)
        
        
def internal_loop (get_union_doc_output, generate_queries, template2, requirement, resume):

    prompt = ChatPromptTemplate.from_template(template2)

    llm = ChatOpenAI(temperature=0)
    answers_pair = ""
    for question in generate_queries:
        #print("question... ", question)
        prompt_outputf =  prompt.invoke({"context": get_union_doc_output[0], 
                                        "question": question})
        #print("output from prompt again", prompt_outputf)
        llm_output = llm(prompt_outputf)
        subquestion_summary= StrOutputParser().invoke(llm_output)
        #print ("sub_summary...", subquestion_summary)  
        #based on decomposition we collect results of all subquestions  
        answers_pair = answers_pair + subquestion_summary
    
    #Final Logic Step (3)
    templatef = """Here is a set of subqestion and answer pairs:
    
    {answers_pair}
    Use the subquestion and answer pairs above to summary the similarity, in precetage, 
    if the candidate can satisfy this requirements: 
    {requirement}"""
    
    promptf = ChatPromptTemplate.from_template(templatef)
    final_prompt = promptf.invoke({"answers_pair": answers_pair, "requirement": requirement})
    final_answers = llm(final_prompt)
    print("\n\n\nRequirement....", requirement)
    print ("\nValidation Summary ...", final_answers)
    

In [29]:

resume='.\docs\samcyangResume_Gen123.pdf'

#Requirements are gathered from a job postion on job board.
requirements="""Develop and coordinate logistics CDRLs.\n\n\n\n
Must have experience with LORA, SERD, LMI and/or spare analysis\n\n\n\n
Must have experience with business rules, TMCR, IETM and/or CSDB\n\n\n\n
Must have experience with US military program office is desired\n\n\n\n
Must have Government proposal experience is desired\n\n\n\n
Must be able to obtain and hold a US secret security clearance\n\n\n\n
Have Experience manage all phases of the program from conceptualization through completion, including acquisition, planning, and managing technical performance to ensure quality, business, and financial objectives are attained.\n\n\n\n
Participate with engineering to establish design concepts, criteria, and engineering efforts for product research, development, testing, and integration.\n\n\n\n
Interact with internal and external representatives at various levels concerning resolution of technical and scheduling issues. Act as primary contact for Logistics/Sustainment program issues. \n\n\n\n
Influence, and coordinate the preparation of proposals, business plans, proposal work statements and specifications, operating budgets, and financial terms/conditions of contract(s). Influence contract acquisitions, negotiations, and modifications.\n\n\n\n
"""

template = """
You are an AI language model assistant. Your task is to tell me how a
resume is closely matching (in percentage) to the requirements:  

{requirements} 

given by the user. In order to help to better validate the similarity between
the requirement and candidate resume, this task, you can generate 2 sub-questions 
from each given requirement.  

By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 

Also, please provide recommendations if this candidate can meet the requirement 
before moving into next stage
"""


template2 = """Your task is to identify the similarity of a resume to the 
job requirement: {question}.  Based on the requirement, please rank the 
similarity, in percentage, against the context listed below, and also provide
a short summary with recommendations. 

{context}.  

Please note, if a technical keyword listed in the requirement can't be identified in 
the context above then, the similarity will be low in percentage, such as 0%, 

"""




step_by_step_query(template, template2, requirements, resume)

['Develop and coordinate logistics CDRLs.', '\nMust have experience with LORA, SERD, LMI and/or spare analysis', '\nMust have experience with business rules, TMCR, IETM and/or CSDB', '\nMust have experience with US military program office is desired', '\nMust have Government proposal experience is desired', '\nMust be able to obtain and hold a US secret security clearance', '\nHave Experience manage all phases of the program from conceptualization through completion, including acquisition, planning, and managing technical performance to ensure quality, business, and financial objectives are attained.', '\nParticipate with engineering to establish design concepts, criteria, and engineering efforts for product research, development, testing, and integration.', '\nInteract with internal and external representatives at various levels concerning resolution of technical and scheduling issues. Act as primary contact for Logistics/Sustainment program issues. ', '\nInfluence, and coordinate the

In [None]:
'''requirements="""Manage all phases of the program from conceptualization through completion, including acquisition, planning, and managing technical performance to ensure quality, business, and financial objectives are attained.\n\n\n\n
Develop and coordinate logistics CDRLs.\n\n\n\n
Participate with engineering to establish design concepts, criteria, and engineering efforts for product research, development, testing, and integration.\n\n\n\n
Interact with internal and external representatives at various levels concerning resolution of technical and scheduling issues. Act as primary contact for Logistics/Sustainment program issues. \n\n\n\n
Influence, and coordinate the preparation of proposals, business plans, proposal work statements and specifications, operating budgets, and financial terms/conditions of contract(s). Influence contract acquisitions, negotiations, and modifications.\n\n\n\n
Ensure technical leadership and excellence is maintained by participating in the planning, attraction, selection, retention, and development of the required management, professional, and technical talent.\n\n\n\n
Act as the primary customer contact for Logistics/Sustainment program activities and may lead program review sessions with customers to discuss cost, schedule, and technical performance.\n\n\n\n
Manage the development of and ensure the security of proprietary technology and maintain the strict confidentiality of sensitive information.\n\n\n\n
Typically requires a Bachelors, Masters or PhD in engineering, science or a related technical field as well as five or more years of progressively complex program or logistics administration experience. May substitute equivalent experience in lieu of education.\n\n\n\n
Demonstrates technical expertise and application of logistics management principles, concepts, and practice as well as leadership skills including organizing, planning, scheduling, and coordinating workloads to meet established deadlines or milestones.\n\n\n\n
Must be able to develop solutions to problems that require coordination of technologies across program requirements.\n\n\n\n
Strong communication, leadership, presentation, and interpersonal skills are required to enable an effective interface with other departments, all levels of management, professional and support staff, customers, potential customers, and government representatives.\n\n\n\n
Customer focused, must be able to work on a self-initiated basis and in a team environment, and able to work extended hours and travel as required.\n\n\n\n
Experience with LORA, SERD, LMI and/or spare analysis\n\n\n\n
Experience with business rules, TMCR, IETM and/or CSDB\n\n\n\n
Experience with US military program office is desired\n\n\n\n
Government proposal experience is desired\n\n\n\n
Must be able to obtain and hold a US secret security clearance\n\n\n\n"""
'''