In [None]:
! pip install langchain langchain-community langchain-openai pypdf

In [1]:
from pydantic import BaseModel, Field, HttpUrl
from typing import Optional, List

from langchain.prompts import PromptTemplate
from langchain.chat_models import init_chat_model
from langchain.output_parsers import PydanticOutputParser
from langchain_community.document_loaders import PyPDFLoader

from dotenv import load_dotenv, find_dotenv
import os

load_dotenv(find_dotenv())
api_key = os.getenv("OPENAI_API_KEY")

In [2]:
class ContactInfo(BaseModel):
    name: Optional[str] = Field(default=None, description="Full candidate name")
    email: Optional[str] = Field(default=None, description="Primary email address")
    phone: Optional[str] = Field(default=None, description="Primary phone number")
    location: Optional[str] = Field(default=None, description="City, State or City, Country")
    linkedin: Optional[HttpUrl] = Field(default=None, description="LinkedIn profile URL")
    github: Optional[HttpUrl] = Field(default=None, description="GitHub profile URL")
    website: Optional[HttpUrl] = Field(default=None, description="Personal website or portfolio URL")


class Education(BaseModel):
    institution: Optional[str] = Field(default=None)
    degree: Optional[str] = Field(default=None, description="Degree name e.g., Bachelor of Science")
    field_of_study: Optional[str] = Field(default=None, description="Major/Concentration")
    start_date: Optional[str] = Field(default=None, description="Start date, free-form string")
    end_date: Optional[str] = Field(default=None, description="End date or Present")
    gpa: Optional[str] = Field(default=None)


class Experience(BaseModel):
    company: Optional[str] = Field(default=None)
    title: Optional[str] = Field(default=None)
    start_date: Optional[str] = Field(default=None)
    end_date: Optional[str] = Field(default=None)
    location: Optional[str] = Field(default=None)
    description: Optional[str] = Field(default=None, description="Bullets or paragraph of responsibilities/impact")


class Certification(BaseModel):
    name: str
    issuer: Optional[str] = None
    date: Optional[str] = None
    credential_id: Optional[str] = None
    credential_url: Optional[HttpUrl] = None


class Resume(BaseModel):
    contact: ContactInfo = Field(default_factory=ContactInfo)
    education: List[Education] = Field(default_factory=list)
    experience: List[Experience] = Field(default_factory=list)
    skills: List[str] = Field(default_factory=list)
    certifications: List[Certification] = Field(default_factory=list)

In [3]:
resume_template = """
You are an AI assistant tasked with extracting structured information from a resume.

Only extract information that's present in the Resume class.

Resume Context:
{resume_text}
"""

prompt_template = PromptTemplate(
    template=resume_template,
    input_variables=["resume_text"]
)

In [4]:
parser = PydanticOutputParser(pydantic_object=Resume)

In [5]:
modle = init_chat_model(model = 'gpt-4o-mini', model_provider='openai').with_structured_output(Resume, method="function_calling")

In [6]:
file_path = './resume/ResumeEx2.pdf'
loader = PyPDFLoader(file_path)

docs = loader.load()

resume_text = "\n".join([doc.page_content for doc in docs])

len(resume_text)

6047

In [None]:
print(resume_text) 

In [None]:
promp = prompt_template.invoke({'resume_text': resume_text})
response = modle.invoke(promp)
response.model_dump()