In [1]:
import openai
import os
from dotenv import load_dotenv
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import AzureChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain


load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENAI_DEPLOYMENT_ENDPOINT = os.getenv("OPENAI_DEPLOYMENT_ENDPOINT")
OPENAI_DEPLOYMENT_NAME = os.getenv("OPENAI_DEPLOYMENT_NAME")
OPENAI_MODEL_NAME = os.getenv("OPENAI_MODEL_NAME")
OPENAI_EMBEDDING_DEPLOYMENT_NAME = os.getenv("OPENAI_EMBEDDING_DEPLOYMENT_NAME")
OPENAI_EMBEDDING_MODEL_NAME = os.getenv("OPENAI_EMBEDDING_MODEL_NAME")
OPENAI_DEPLOYMENT_VERSION = os.getenv("OPENAI_DEPLOYMENT_VERSION")

#init Azure OpenAI
openai.api_type = "azure"
openai.api_version = OPENAI_DEPLOYMENT_VERSION
openai.api_base = OPENAI_DEPLOYMENT_ENDPOINT
openai.api_key = OPENAI_API_KEY


In [3]:
pdfs = os.scandir('resume')
loaders = []
for i in pdfs:
    loaders.append(PyPDFLoader(i.path))

docs = []
for loader in loaders:
    docs.extend(loader.load())


In [6]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 200
)
splits = text_splitter.split_documents(docs)
len(splits)

33

In [7]:
embedding=OpenAIEmbeddings(deployment=OPENAI_EMBEDDING_DEPLOYMENT_NAME,model=OPENAI_EMBEDDING_MODEL_NAME, chunk_size=1)

In [8]:
vectordb = FAISS.from_documents(
    documents=splits,
    embedding=embedding
)

vectordb.save_local("dbs/documentation/faiss_index")

100%|██████████| 33/33 [00:08<00:00,  3.68it/s]


In [9]:

llm = AzureChatOpenAI(    
                  deployment_name=OPENAI_DEPLOYMENT_NAME,
                      model=OPENAI_MODEL_NAME,
                      openai_api_base=OPENAI_DEPLOYMENT_ENDPOINT,
                      openai_api_version=OPENAI_DEPLOYMENT_VERSION,
                      openai_api_key=OPENAI_API_KEY,
                      temperature=0.0
                     )

In [10]:
# Build prompt
template = """You are Talent Acquisition bot, you will be reccomending people based on their skills, use the given context to answer the question below. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

In [11]:
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

In [12]:
retriever=vectordb.as_retriever()
qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory,
    combine_docs_chain_kwargs={"prompt": QA_CHAIN_PROMPT}
)

In [13]:
question = "give me a list of candidates with python as a skill"
result = qa.run(question)
print(result)

Ankit Kamanalli is a highly skilled Full-stack Developer with expertise in Python3, along with Java, React.js, and Azure DevOps. Sanya Garg also has experience with Python as a secondary skill, along with Java Full Stack development and AWS DevOps tools. Thanks for asking!
