In [None]:
#Install dependencies
!pip install langchain InstructorEmbedding sentence_transformers faiss-gpu pypdf

In [None]:
#pdf loader
from langchain.document_loaders import PyPDFLoader
#textsplitter
from langchain.text_splitter import RecursiveCharacterTextSplitter
# Embeddings
from langchain.embeddings import HuggingFaceEmbeddings
from InstructorEmbedding import INSTRUCTOR
from langchain.embeddings import HuggingFaceInstructEmbeddings
#storing vector embeddings
from langchain.vectorstores import FAISS
from langchain import HuggingFaceHub #to connect llm models from huggingface
from langchain.chains.question_answering import load_qa_chain #to creating q&a chain from langchain

In [None]:
#setting up Hugging face API
import os
os.environ['HUGGINGFACEHUB_API_TOKEN'] = ''

In [None]:
#load the pdf
loader = PyPDFLoader("/content/Shaik_Asheesh_Resume.pdf")
pages = loader.load_and_split()

In [None]:
#split the document using text splitter from Langchain
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=200)
docs = text_splitter.split_documents(pages)

In [None]:
len(docs)

22

In [None]:
#initializing embeddings
instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl",
                                                      model_kwargs={"device": "cuda"})

Downloading (…)7f436/.gitattributes:   0%|          | 0.00/1.48k [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/270 [00:00<?, ?B/s]

Downloading (…)/2_Dense/config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/3.15M [00:00<?, ?B/s]

Downloading (…)0daf57f436/README.md:   0%|          | 0.00/66.3k [00:00<?, ?B/s]

Downloading (…)af57f436/config.json:   0%|          | 0.00/1.52k [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/4.96G [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

Downloading spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading (…)7f436/tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/2.40k [00:00<?, ?B/s]

Downloading (…)f57f436/modules.json:   0%|          | 0.00/461 [00:00<?, ?B/s]

load INSTRUCTOR_Transformer
max_seq_length  512


In [None]:
#creating vector db of text embeddings
db = FAISS.from_documents(docs, instructor_embeddings)

In [None]:
#initialize model from huggingface repo
llm=HuggingFaceHub(repo_id="google/flan-t5-xxl",
                                        model_kwargs={"temperature":0.5,
                                                      "max_length":200})

In [None]:
chain = load_qa_chain(llm, chain_type="stuff") #initialize llm and chain type
query = "What is the Name of the person in Resume provided" #ask question
docs = db.similarity_search(query) #perform similarity search in the vector database (db)
chain.run(input_documents=docs, question=query) #output the answer

'Shaik Asheesh'

In [None]:
chain = load_qa_chain(llm, chain_type="stuff")
query = "What is his total experience?"
docs = db.similarity_search(query)
chain.run(input_documents=docs, question=query)

'2.6 years'

In [None]:
chain = load_qa_chain(llm, chain_type="stuff")
query = "Does he know statistics?"
docs = db.similarity_search(query)
chain.run(input_documents=docs, question=query)

'Yes'

In [None]:
chain = load_qa_chain(llm, chain_type="stuff")
query = "what are the statistics concepts?"
docs = db.similarity_search(query)
chain.run(input_documents=docs, question=query)

'Hypothesis Testi ng, Z -test, T -test, ANOVA,'

In [None]:
chain = load_qa_chain(llm, chain_type="stuff")
query = "tell me about professional experience?"
docs = db.similarity_search(query)
chain.run(input_documents=docs, question=query)

'Associate Data Scientist, TheMathCompany, Hyderabad, Feb 2023 - present'

In [None]:
chain = load_qa_chain(llm, chain_type="stuff")
query = "what is the previous company he worked for"
docs = db.similarity_search(query)
chain.run(input_documents=docs, question=query)

'Accenture'

In [None]:
chain = load_qa_chain(llm, chain_type="stuff")
query = "what is the previous company he worked for along with time period"
docs = db.similarity_search(query)
chain.run(input_documents=docs, question=query)

'Accenture, Hyderabad, March 2021 - Jan 2023'

In [None]:
chain = load_qa_chain(llm, chain_type="stuff")
query = "what are the different roles he worked in Accenture"
docs = db.similarity_search(query)
chain.run(input_documents=docs, question=query)

'Application Development Analyst'

In [None]:
chain = load_qa_chain(llm, chain_type="stuff")
query = "what is the highest education qualification ?"
docs = db.similarity_search(query)
chain.run(input_documents=docs, question=query)

'B. Tech'

In [None]:
chain = load_qa_chain(llm, chain_type="stuff")
query = "Does he have certification in Data Science ?"
docs = db.similarity_search(query)
chain.run(input_documents=docs, question=query)

'Yes .'

In [None]:
#optional code to create a simple chatbot ;)

In [None]:
from langchain.chains import ConversationalRetrievalChain

In [None]:
from IPython.display import display
import ipywidgets as widgets

# Create conversation chain that uses our vectordb as retriver, this also allows for chat history management
qa = ConversationalRetrievalChain.from_llm(llm, db.as_retriever())

In [None]:
chat_history = []

def on_submit(_):
    query = input_box.value
    input_box.value = ""

    if query.lower() == 'exit':
        print("Thank you for using the Resume chatbot!")
        return

    result = qa({"question": query, "chat_history": chat_history})
    chat_history.append((query, result['answer']))

    display(widgets.HTML(f'<b>User:</b> {query}'))
    display(widgets.HTML(f'<b><font color="blue">Chatbot:</font></b> {result["answer"]}'))

print("Welcome to the Resume chatbot! Type 'exit' to stop.")

input_box = widgets.Text(placeholder='Please enter your question:')
input_box.on_submit(on_submit)

display(input_box)