In [1]:
!pip install -qU "langchain[google-vertexai]"

  You can safely remove it manually.[0m[33m
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-api-python-client 1.8.0 requires google-api-core<2dev,>=1.13.0, but you have google-api-core 2.24.1 which is incompatible.
kfp 2.5.0 requires requests-toolbelt<1,>=0.8.0, but you have requests-toolbelt 1.0.0 which is incompatible.[0m[31m
[0m

In [1]:
from typing import Optional, List
from pydantic import BaseModel, Field
from enum import Enum

import pandas as pd
from langchain.chat_models import init_chat_model

In [2]:
f = open("./data/linkedin_cv.txt", "r")
linkedin_target_resume = f.read()

In [3]:
llm = init_chat_model("gemini-2.0-flash-001", model_provider="google_vertexai", temperature = 0.3)

In [35]:
# Pydantic

class SkillType(str, Enum):
    """ SKill type"""
    TECHNICAL = 'technical'
    SOFT_SKILL = 'soft-skill'

class Skill(BaseModel):
    """Skill required for the job"""

    skill: str = Field(description="Required or preferred skill in candidate. This can be technical or soft skill ")
    skill_type: SkillType = Field(description="type of skill in candidate.")
    experience: int = Field(default=None, description="Experience associated the skill.",)
    is_required: bool = Field(default = True, description="Is this skill required or preferred")    

class Skills(BaseModel):
    """Extracted skills from resume"""
    skills: List[Skill]

class Experience(BaseModel):
    """Required years of experience and the specific areas of experience sought"""
    experience: str = Field(description=(
        "Detail the required years of experience and the specific areas of experience sought (e.g., "
        "5+ years of experience in software development," 
        "3+ years of experience in project management within the healthcare industry"
        "Experience with testing frameworks"
        "). Note the context of the experience.  "
    ))
    is_required: bool = Field(default = True, description="Is this required or preferred")    

class Qualification(BaseModel):
    """Extract qualifications/ education from resume"""
    degree: str = Field(description=(
        "Specify the necessary educational background (e.g., Bachelor's degree in Computer Science, "
        "Master's degree in Business Administration) "
        "or any required certifications or licenses (e.g., PMP, CPA, AWS Certified Solutions Architect). Be exact"
    ))
    is_required: bool = Field(default = True, description="Is this qualification required or preferred")    

class PersonalityTrait(BaseModel):
    """Specific personality trait present in job description"""
    trait: str = Field(description="Specific personality traits present in job description")

class ResumeFields(BaseModel):
    skills: List[Skill] = Field(description="List of all the skills")
    experiences: List[Experience]= Field(description="List of all the experiences")
    qualifications: List[Qualification] = Field(description="List of all the qualifications")
    personality_traits: List[PersonalityTrait] = Field(description="List of all the personality traits")
    
# structured_llm = llm.with_structured_output(Skills)
# structured_llm.invoke(linkedin_target_resume)

In [36]:
system_prompt = """
You are a highly skilled AI assistant specializing in analyzing job descriptions and extracting key requirements. Your goal is to meticulously identify and categorize the essential skills, qualifications, experience, and other attributes sought by employers.

**Your Task:**

Given a job description as input, extract the required fields based on schema into a structured format. Be comprehensive and specific, avoiding vague or generic terms.

**Instructions:**

1.  **Read the job description carefully and thoroughly.**
2.  **Focus on explicitly stated requirements.**  Infer requirements only when strongly implied and directly supported by the text.
3.  **Be specific and detailed.** Avoid generalizations. For example, instead of "programming skills," specify "Proficiency in Python and Java."
4.  **Prioritize "must-have" requirements.**  Distinguish between essential requirements and desired or "nice-to-have" qualifications if the job description makes that distinction. 
5.  **Maintain accuracy.**  Do not add information that is not present in the job description.
6.  **Conciseness is important,** but don't sacrifice accuracy or completeness.
"""

In [37]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            system_prompt
        ),
        ("human", "{text}"),
    ]
)

In [38]:
runnable = prompt | llm.with_structured_output(
    schema=ResumeFields
)

fields = runnable.invoke({"text": (
    f"{linkedin_target_resume}"
    "Extract all the relevant fields from the text given"
)})

Key '$defs' is not supported in schema, ignoring


In [39]:
from rich import print 
print(fields)

In [52]:
import pickle

with open('./outputs/linkedin_cv.pkl', 'wb') as f:
    pickle.dump(fields, f)

In [53]:
# with open('./outputs/linkedin_cv.pkl', 'rb') as f:
#     tmp = pickle.load(f)