In [58]:
import os

# Creating the LLM


In [59]:
api_key = os.getenv("AZURE_OPENAI_API_KEY")
endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")

from langchain_openai import AzureChatOpenAI

llm = AzureChatOpenAI(
    openai_api_version="2024-02-01",
    deployment_name="gpt-35-turbo-16k",
    openai_api_key=api_key,
    openai_api_type=endpoint,
)

# Loading our doucments


In [60]:
from langchain_community.document_loaders import PyPDFLoader

document_paths = ["content/resume.pdf"]

data = []

for path in document_paths:
    loader = PyPDFLoader(path)
    page = loader.load_and_split()
    data.extend(page)

# Split the documents into smaller chunk


In [61]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=20,
    length_function=len,
    is_separator_regex=False,
)

texts = text_splitter.split_documents(data)

# Set up the Vector Database and Embeddings


In [62]:
from langchain_openai import AzureOpenAIEmbeddings
from langchain.vectorstores import FAISS

embeddings = AzureOpenAIEmbeddings()

document_search = FAISS.from_documents(texts, embeddings)
retriever = document_search.as_retriever()

# Set up the LLM Chain


In [63]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

template = """"
    Answer the question based on the context: {context}
    
    Question: {question}
    
    You are a helpful assistant that helps to give recruiters or manager answers on my capabilities.
    You are to answer their questions that give them more insights on my skill set and past experiences
    When the user refer You, it is refering to the candidate.
    
    Example:
    
    Q:Where are you studying now?
    
    A:I study at NUS.
"""

prompt = ChatPromptTemplate.from_template(template)

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [64]:
sample_query = "What is the purpose of the guide?"

response = chain.invoke(sample_query)

print(response)

The purpose of the guide is to provide recruiters or managers with more insights on the candidate's skill set and past experiences.


In [65]:
import pandas as pd


def similar_dist(query: str, vectorstore):
    """
    Return Dataframe that consist of the text and the its distance from the query vector
    """

    distance_list = []

    similar_score = vectorstore.similarity_search_with_score(query)

    for n in range(0, len(similar_score)):
        text = vectorstore.similarity_search_with_score(query)[n][0].page_content
        distance = vectorstore.similarity_search_with_score(query)[n][1]

        distance_list.append((text, distance))

    return pd.DataFrame(distance_list, columns=["text", "distance to query"])

In [66]:
# This function uses as similarity score to find the most similar text to the query
# Can think of other methods to evaulate the similarity between the query and the text
similar_dist(sample_query, document_search)

Unnamed: 0,text,distance to query
0,and enhancing the interview selection process.,0.479918
1,requirements gathering and documentation proce...,0.480015
2,efficiency.,0.491519
3,allow users to book reservations for various f...,0.49191


In [67]:
import gradio as gr

In [68]:
def source_question_answer(
    query: str, vectorstore: FAISS = document_search, chain=chain
):
    """
    Return answer to the query
    """

    response = chain.invoke(query)
    return response

In [69]:
def chat(chat_history, user_input):

    # input_docs = vectorstore.similarity_search(user_input,k=3)
    # qa_chain = load_qa_chain(llm, chain_type="stuff")
    # chain_response = qa_chain.run(input_documents=input_docs, question=user_input)

    chain_response = source_question_answer(user_input)

    print(chain_response)
    response = ""
    for letter in "".join(
        chain_response
    ):  # [bot_response[i:i+1] for i in range(0, len(bot_response), 1)]:
        response += letter + ""

        yield chat_history + [(user_input, response)]

In [78]:
with gr.Blocks() as demo:
    gr.Markdown("""
    # Chatbot Demo
    **Welcome!** Chat with my assistant to get answers on my skills and past experiences.
    
    ### Example Questions
    - What previous experience do you have?
    - What is your availability for internship?
    """)
    
    examples = [
        ["What previous experience do you have?"],
        ["What is your availability for internship?"],
    ]
    
    chatbot = gr.Chatbot()
    message = gr.Textbox(placeholder="Type your message here...", show_label=False)
    message.submit(chat, [chatbot, message], chatbot)
    gr.Examples(examples, inputs=message)


demo.queue().launch(debug=True, share=True)

Running on local URL:  http://127.0.0.1:7865
Running on public URL: https://d9595427e41f21083a.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


Based on the provided context, the candidate's availability for an internship is as follows:

1. Hologi c Business Analytics Intern: May 2023 - Nov 2023
2. TikTok AI Data Operations Intern: Dec 2023 - Apr 2024
3. AI Singapore AI Engineer Intern: May 2024 - Present
4. NUS Business Analytics Centre Data Analyst: Oct 2023 - Feb 2024

Please note that the availability mentioned above is based on the internship durations mentioned in the context.
