In [1]:
from langchain.vectorstores.chroma import Chroma
from langchain.chat_models import ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import ConversationalRetrievalChain
from langchain.schema import HumanMessage, AIMessage
from dotenv import load_dotenv
import time

In [11]:
def make_chain():
    model = ChatOpenAI(
        model_name="gpt-3.5-turbo",
        temperature="0",
        # verbose=True
    )
    embedding = OpenAIEmbeddings()

    vector_store = Chroma(
        collection_name="resume_sample_1",
        embedding_function=embedding,
        persist_directory="data/chroma/resume",
    )

    return ConversationalRetrievalChain.from_llm(
        model,
        retriever=vector_store.as_retriever()
        #         return_source_documents=True,
        # verbose=True,
    )

In [13]:
load_dotenv()


chain = make_chain()
chat_history = []

query = """
    "This is a resume. Can you give me the name, phone, email,\ 
    previous working experience includes companies, title, working dates, \
    and college, major in collages? \
    Give the answer in json format. If not available, please put 'null' \
    For example, {"name": <name>, \
                "phone": <phone>, \
                "email": <email>, \ 
                "working_experiences": [
                    {"company": <company>, \
                    "title": <title>, \
                    "working_date": <working_dates> },\
                    {"company": <company>, \
                    "title": <title>, \
                    "working_date": <working_dates> }\
                    ],
                "college": <college>, \
                "major": <major> \
                }
    "
    """
start_time = time.time()

response = chain({"question": query, "chat_history": chat_history})
answer = response["answer"]
chat_history.append(HumanMessage(content=query))
chat_history.append(AIMessage(content=response["answer"]))

print(f"Answer: {answer}")

end_time = time.time()
execution_time = end_time - start_time
print(f"Execution time: {execution_time} seconds")

Answer: {
  "name": "Do Ngoc Tan",
  "phone": "+84 772954324",
  "email": "darkknightkhtn2008@gmail.com",
  "working_experiences": [
    {
      "company": "A Startup in Data Privacy",
      "title": "Lead Engineer",
      "working_date": "April 2023 - Now"
    },
    {
      "company": "WorldQuant, Vietnam",
      "title": "Senior Engineer/Lead Engineer (C++)",
      "working_date": "May 2020 - April 2023"
    },
    {
      "company": "Codix, Vietnam",
      "title": "C&SQL Developer",
      "working_date": "July 2013 - February 2014"
    },
    {
      "company": "Renesas, Vietnam",
      "title": "Embedded Software Engineer",
      "working_date": "March 2012 - July 2013"
    },
    {
      "company": "MaunaKea Technologies, Vietnam and Paris",
      "title": "R&D Software Engineer",
      "working_date": "February 2014 - February 2018"
    },
    {
      "company": "Grasshopper Asia, Vietnam and Singapore",
      "title": "High Frequency Trading Software Engineer (C++/Rust)",
    

In [29]:
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain

def make_chain_retrieval():
    llm = OpenAI(
        model_name="gpt-3.5-turbo",
        temperature=0,
        # verbose=True
    )
    embedding = OpenAIEmbeddings()

#     vector_store = Chroma(
#         collection_name="resume_sample_1",
#         embedding_function=embedding,
#         persist_directory="data/chroma/resume",
#     )
    vector_store = FAISS.load_local("data/faiss/resume_sample_1", embedding)
    
    return RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vector_store.as_retriever())


In [30]:
qa = make_chain_retrieval()

query = """
    This is a resume. Can you give me the name, phone, email,\ 
    previous working experience includes companies, title, working dates, \
    and college, major in collages? \
    Give the answer in json format. If not available, please put 'null' \
    For example, {"name": <name>, \
                "phone": <phone>, \
                "email": <email>, \ 
                "working_experiences": [
                    {"company": <company>, \
                    "title": <title>, \
                    "working_date": <working_dates> },\
                    {"company": <company>, \
                    "title": <title>, \
                    "working_date": <working_dates> }\
                    ],
                "college": <college>, \
                "major": <major> \
                }
    """
start_time = time.time()

answer = qa.run(query)

print(f"{answer}")

end_time = time.time()
execution_time = end_time - start_time
print(f"Execution time: {execution_time} seconds")

{
    "name": "Do Ngoc Tan",
    "phone": "+84 772954324",
    "email": "darkknightkhtn2008@gmail.com",
    "working_experiences": [
        {
            "company": "A Startup in Data Privacy",
            "title": "Lead Engineer",
            "working_date": "April 2023 - Now"
        },
        {
            "company": "WorldQuant, Vietnam",
            "title": "Senior Engineer/Lead Engineer (C++)",
            "working_date": "Unknown"
        },
        {
            "company": "Codix, Vietnam",
            "title": "C&SQL Developer",
            "working_date": "July 2013 - Feb 2014"
        },
        {
            "company": "Renesas, Vietnam",
            "title": "Embedded Software Engineer",
            "working_date": "March 2012 - July 2013"
        },
        {
            "company": "MaunaKea Technologies, Vietnam and Paris",
            "title": "R&D Software Engineer",
            "working_date": "Feb 2014 - Feb 2018"
        },
        {
            "company": "World