In [3]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_groq import ChatGroq
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain
from langchain_core.output_parsers import JsonOutputParser
from IPython.display import JSON
from dotenv import load_dotenv

load_dotenv()

True

In [4]:
embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
llm = ChatGroq(model="Gemma2-9b-It")

  from tqdm.autonotebook import tqdm, trange


In [5]:
documents = PyPDFLoader("Ranjan_Padhi_Resume.pdf").load()
sp = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=100)
chunks = sp.split_documents(documents)

vector_store = FAISS.from_documents(chunks, embedding)

In [14]:
prompt = PromptTemplate.from_template(
    """
    You are an assistant who can retrieve information from given PDF Resume/CV context:                            
        <context>
        {context}
        </context>
        {input}
    """
)

input = """
    Extract Employee's name, years of experience, technical skills and other skills and return in below JSON format

        {{
            "empName" : Employee's name as string,
            "yoe" : Years of experince as number,
            "techSkills" : [list of technical skills]
            "otherSkills" : [list of other skills]
        }}
"""

In [15]:
doc_chain = create_stuff_documents_chain(llm, prompt, output_parser = JsonOutputParser())
retriever = vector_store.as_retriever()

chain = create_retrieval_chain(retriever, doc_chain)

In [18]:
chain.invoke({"input": input})["answer"]

{'empName': 'Ranjan Padhi',
 'yoe': 8,
 'skills': ['Java',
  'JavaScript',
  'HTML',
  'CSS',
  'Hibernate/RACLES',
  'Cloud',
  'Software Development',
  'System Design',
  'Problem Solving',
  'Design Patterns',
  'Dev/Team Management',
  'Agile',
  'React.js',
  'Spring Boot',
  'Micro Services']}