In [1]:
import os
import sys

sys.path.append(os.path.abspath(".."))

In [2]:
import dagshub

dagshub.init(repo_owner="danirc2", repo_name="MLOps_RecruitAIr", mlflow=True)

In [4]:
import mlflow

mlflow.langchain.autolog()
mlflow.set_experiment("job-offer-criteria-extraction")

<Experiment: artifact_location='mlflow-artifacts:/071af919fef5423e9de028385f02d35a', creation_time=1758834223847, experiment_id='2', last_update_time=1758834223847, lifecycle_stage='active', name='job-offer-criteria-extraction', tags={}>

In [9]:
import string


prompt_template = """The following text is a job offer. From it, extract all the key requirements or
criteria that an applicant to this job offer must or should fulfill. Criteria can
be either explicit requirements that are clearly stated in the job offer, implicit
requirements that are not clearly stated but can be inferred from the job offer,
desirable qualities that are not strictly required but would make an applicant
more competitive, or any other relevant criteria that can be derived from the job offer.

For each criterion, provide:
- A title in one or two words at most.
- A brief but exhaustive description, which will be used later for evaluation, so
    it must be clear, precise, and unambiguous.
- An importance rating from 1 to 5, where 1 means "it would be okay for an applicant
    to have this criterion, but it's not bad if they don't" and 5 means "an applicant
    must absolutely have this criterion to be considered for the job".

Provide the output as a JSON object with a single field "key_criteria", which is
a list of dictionaries, each representing a key requirement or criterion with
the fields "title", "description", and "importance".

For example, if the job offer has the following text:

```
We are looking for a Junior Machine Learning Engineer with expertise and passion
in the Gen AI space.
```

You might extract the following key criteria:

```json
{
    "key_criteria": [
        {
            "title": "GenAI Expertise",
            "description": "Expertise and passion in the field of Generative AI.",
            "importance": 5
        },
        {
            "title": "ML Background",
            "description": "Background in Machine Learning engineering.",
            "importance": 3
        }
        ...
    ]
}
```

Or if the job offer has the following text:

```
BSc or MSc in Computer Science, Artificial Intelligence, Mathematics, Data Science,
or any other related discipline or commensurate work experience or demonstrated
competence. MSc related to Generative AI would be a plus.
Between 0 - 3 years of work experience, internships related to the job content
would also be valuable. Exposure to Gen AI in previous projects / internships.
```

You might extract the following key criteria:

```json
{
    "key_criteria": [
        {
            "title": "Educational Background",
            "description": "BSc or MSc in Computer Science, Artificial Intelligence, Mathematics, Data Science, or any other related discipline or commensurate work experience or demonstrated competence.",
            "importance": 5
        },
        {
            "title": "Generative AI Education",
            "description": "MSc related to Generative AI would be a plus.",
            "importance": 3
        },
        {
            "title": "Work Experience",
            "description": "Between 0 - 3 years of work experience, internships related to the job content would also be valuable.",
            "importance": 4
        },
        {
            "title": "Gen AI Exposure",
            "description": "Exposure to Generative AI in previous projects / internships.",
            "importance": 4
        }
        ...
    ]
}
```

Here's the job offer text:
${job_offer_text}
Provide the output as a JSON object only, without any additional text or explanation."""

In [10]:
sample_job_offer = """
We are looking for a Software Engineer with experience in Python and machine
learning. The ideal candidate should have at least 3 years of experience in
software development, a strong understanding of algorithms and data structures,
and the ability to work in a fast-paced environment. Familiarity with cloud
platforms like AWS or GCP is a plus. Excellent communication skills and the
ability to work in a team are essential.
"""

In [11]:
from recruitair.job_offers.models import KeyCriteriaResponse
from langchain_ollama import ChatOllama

with mlflow.start_run():
    mlflow.log_param("model", "dolphin3")
    mlflow.log_param("temperature", 0)
    llm = ChatOllama(model="dolphin3", temperature=0)
    prompt = string.Template(prompt_template).substitute(job_offer_text=sample_job_offer)
    response = llm.with_structured_output(KeyCriteriaResponse, method="json_schema").invoke(prompt)
    print(KeyCriteriaResponse.model_validate(response))

key_criteria=[KeyCriterion(title='Python Experience', description='Experience in Python programming.', importance=5), KeyCriterion(title='Machine Learning Knowledge', description='Knowledge and experience in machine learning.', importance=5), KeyCriterion(title='Software Development Experience', description='At least 3 years of experience in software development.', importance=5), KeyCriterion(title='Algorithms & Data Structures', description='Strong understanding of algorithms and data structures.', importance=4), KeyCriterion(title='Cloud Platforms Familiarity', description='Familiarity with cloud platforms like AWS or GCP.', importance=3), KeyCriterion(title='Communication Skills', description='Excellent communication skills.', importance=5), KeyCriterion(title='Teamwork Ability', description='Ability to work in a team.', importance=4)]
🏃 View run vaunted-zebra-320 at: https://dagshub.com/danirc2/MLOps_RecruitAIr.mlflow/#/experiments/2/runs/e231c3c548484b7ba123d11534c65b94
🧪 View exp

In [None]:
import mlflow

from recruitair.job_offers.models import KeyCriteriaResponse

prompt = mlflow.genai.register_prompt(
    name="job-offer-criteria-extraction",
    response_format=KeyCriteriaResponse,
    commit_message="Add job offer criteria extraction prompt",
    tags={"module": "job-offers"},
    template=,
)

2025/09/25 22:38:23 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for prompt version to finish creation. Prompt name: job-offer-criteria-extraction, version 1
