# Resume Data Extractor
> In this agent we will be generating the resume data in our normalized format!

In [23]:
from langchain_community.document_loaders import PyPDFLoader

file_path = './sample-resumes/suyash-resume.pdf'
loader = PyPDFLoader(file_path)
documents = loader.load()

In [24]:
# Create a LLM instance
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model_name="gpt-3.5-turbo")

In [25]:
from langchain import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser

prompt = PromptTemplate(
    input_variables=["format","text"],
    template="""
You're a very good document parser can you parse the following text into a resume. Please make sure to follow the following format:
```
{format}
```
====RESUME===
{text}

Give me output *STRICTLY* in JSON FORMAT
""")

format = """
# Sample Data
sample_resume: Resume = {
    "id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
    "profile_name": "Backend Developer Resume",

    "ResumeAdditionalAward": [
        {"title": "Best Software Engineer Award 2023"},
        {"title": "Top Contributor - Open Source"}
    ],

    "ResumeCertification": [
        {
            "certificate_name": "AWS Certified Solutions Architect",
            "certificate_link": "https://aws.amazon.com/certification/",
            "issued_by": "Amazon Web Services"
        },
        {
            "certificate_name": "Google Cloud Professional Developer",
            "certificate_link": "https://cloud.google.com/certification",
            "issued_by": "Google Cloud"
        }
    ],

    "ResumeEducation": [
        {
            "institution": "Indian Institute of Technology Bombay",
            "location": "Mumbai, India",
            "degree_type": "B.Tech",
            "field_of_study": "Computer Science",
            "start_month_year": "Aug 2017",
            "end_month_year": "May 2021",
            "score_metric": "CGPA",
            "score": "8.7"
        }
    ],

    "ResumeExperience": [
        {
            "employer": "Coditas",
            "Job_title": "Software Engineer",
            "location": "Pune, India",
            "start_month_year": "Jun 2021",
            "end_month_year": "Aug 2023"
        },
        {
            "employer": "Jash Datasciences",
            "Job_title": "Backend Developer",
            "location": "Remote",
            "start_month_year": "Sep 2023",
            "end_month_year": "Present"
        }
    ],

    "ResumePersonalInfo": {
            "first_name": "Suyash",
            "last_name": "Lawand",
            "email": "suyash@example.com",
            "phone": "+91-9876543210",
            "address": "Pune, Maharashtra, India",
            "dob": "1999-04-15",
            "job_title": "Backend Developer",
            "git_link": "https://github.com/suyashlawand",
            "linkedin_profile": "https://linkedin.com/in/suyashlawand",
            "portfolio_link": "https://suyashlawand.dev"
        },

    "ResumeProject": [
        {
            "project_name": "Menu-Card",
            "technologies_used": "NestJS, PostgreSQL, Docker",
            "project_link": "https://menyokard.com",
            "description": "Real-time café management platform with QR-based ordering and billing."
        },
        {
            "project_name": "AI Resume Evaluator",
            "technologies_used": "Next.js, LangChain, Python",
            "project_link": "https://aireview.ai",
            "description": "An AI-driven resume analyzer with mock interview functionality."
        }
    ],

    "ResumeSkillSet": [
        {
            "programming_languages": ["Python", "Java", "TypeScript"],
            "libraries_and_frameworks": ["NestJS", "React", "Spring Boot"],
            "tools_and_platforms": ["Docker", "Kubernetes", "GitHub Actions"],
            "databases": ["PostgreSQL", "MySQL", "MongoDB"],
            "concepts": ["Microservices", "REST APIs", "Distributed Systems"]
        }
    ],

    "InterviewQuestion": []
}
"""

chain = prompt | llm | JsonOutputParser()

resume = chain.invoke({'format': format, 'text': documents[0].page_content + '\n' + documents[1].page_content})

In [26]:
resume

{'id': 'a1b2c3d4-e5f6-7890-abcd-ef1234567890',
 'profile_name': 'Backend Developer Resume',
 'ResumeAdditionalAward': [{'title': 'Best Software Engineer Award 2023'},
  {'title': 'Top Contributor - Open Source'}],
 'ResumeCertification': [{'certificate_name': 'Build apps using NextJS v14 using App Router, Next Auth, Next UI, and TailwindCSS! Learn the latest version of NextJS! - Udemy'},
  {'certificate_name': 'Build ASP.NET Core Web API - Scratch To Finish (.NET 8 API) - Udemy'},
  {'certificate_name': "NestJS: The Complete Developer's Guide - Udemy"},
  {'certificate_name': 'Build a Backend REST API with Python & Django - Advanced - Udemy'}],
 'ResumeEducation': [{'institution': 'TSSM’s BHAIRABAI SAWANT COLLEGE OF ENGINEERING',
   'location': 'Pune, Maharashtra, India',
   'degree_type': "Bachelor's",
   'field_of_study': 'Computer Science Engineering',
   'start_month_year': '2021',
   'end_month_year': '2024',
   'score_metric': 'CGPA',
   'score': '8.3'},
  {'institution': 'DR. D.