In [1]:
import os
import sys
import mlflow

sys.path.append(os.path.abspath(".."))

from recruitair.job_offers.models import KeyCriteriaResponse

mlflow.set_tracking_uri("http://nattech.fib.upc.edu:40380/")

In [2]:
mlflow.langchain.autolog()
mlflow.set_experiment("criteria-extraction")

<Experiment: artifact_location='mlflow-artifacts:/900495739893713381', creation_time=1759680270113, experiment_id='900495739893713381', last_update_time=1759680270113, lifecycle_stage='active', name='criteria-extraction', tags={'domain': 'recruitair',
 'mlflow.experimentKind': 'custom_model_development',
 'task': 'criteria-extraction'}>

In [3]:
MLFLOW_PROMPT_NAME = "criteria-extraction"
MLFLOW_PROMPT_VERSION = 3
OLLAMA_MODEL = "dolphin3"
OLLAMA_MODEL_VERSION = "8b"

In [4]:
import pandas as pd
import json

with open("../data/interim/preprocessed_jobs.jsonl", "r") as f:
    lines = f.readlines()

job_offers = []
criteria = []
for line in lines:
    job_offer = json.loads(line)
    job_offers.append({"job_offer_text": job_offer["job_description"]})
    criteria.append({"key_criteria": job_offer["criteria"]})

data = pd.DataFrame({"inputs": job_offers, "expectations": criteria})

In [5]:
from langchain_ollama import ChatOllama
from codecarbon import EmissionsTracker


@mlflow.trace
def predict(job_offer_text: str) -> KeyCriteriaResponse:
    tracker = EmissionsTracker(measure_power_secs=1, tracking_mode="process", save_to_file=False)
    tracker.start()
    llm = ChatOllama(model=f"{OLLAMA_MODEL}:{OLLAMA_MODEL_VERSION}", temperature=0)
    prompt = mlflow.genai.load_prompt(f"prompts:/{MLFLOW_PROMPT_NAME}/{MLFLOW_PROMPT_VERSION}")
    response = llm.with_structured_output(prompt.response_format, method="json_schema").invoke(
        prompt.format(job_offer_text=job_offer_text)
    )
    tracker.stop()
    mlflow.log_param("emissions-tracker/measure-power-secs", 1)
    mlflow.log_param("emissions-tracker/tracking-mode", "process")
    mlflow.log_metrics(tracker.final_emissions_data.__dict__)
    return KeyCriteriaResponse.model_validate(response)

In [6]:
import ollama
from functools import lru_cache
import numpy as np


@lru_cache
def get_embedding(text: str) -> np.ndarray:
    return np.array(ollama.embed("mxbai-embed-large:335m", input=text)["embeddings"][0])

In [7]:
from mlflow.entities import Feedback


@mlflow.genai.scorer(name="target-recall/embedding/mxbai-embed-large:335m")
def target_recall(outputs: KeyCriteriaResponse, expectations: KeyCriteriaResponse) -> Feedback:
    expectations = KeyCriteriaResponse.model_validate(expectations)
    # Compute the embeddings of the names of the all the extracted and target criteria:
    target_embeddings = []
    for target_criterion in expectations.key_criteria:
        target_embeddings.append(get_embedding(target_criterion.name))
    response_embeddings = []
    for response_criterion in outputs.key_criteria:
        response_embeddings.append(get_embedding(response_criterion.name))
    # Compute the cosine similarity matrix between the two sets of embeddings:
    similarity_matrix = np.inner(np.array(response_embeddings), np.array(target_embeddings))

    # We'll score as follows: For each target criterion, we'll find the most similar
    # response criterion, thus we'll have, for each target criterion, a score
    # between 0 and 1 representing how well it was matched. We'll then floor
    # everything below 0.8 to 0, and average the rest.
    # This means that if a target criterion was not matched with at least 0.8,
    # it will contribute 0 to the average. We'll call this "target recall score".
    # It can be interpreted as the "rich fraction" of target criteria that were well matched.
    scores = similarity_matrix.max(axis=0)
    target_recall_score = float(np.where(scores < 0.8, 0, scores).mean())
    return Feedback(value=target_recall_score)

In [8]:
from typing import List


@mlflow.genai.scorer(name="importance-mse/embedding/mxbai-embed-large:335m")
def importance_mse(outputs: KeyCriteriaResponse, expectations: KeyCriteriaResponse) -> Feedback:
    expectations = KeyCriteriaResponse.model_validate(expectations)
    # Compute the embeddings of the names of the all the extracted and target criteria:
    target_embeddings = []
    for target_criterion in expectations.key_criteria:
        target_embeddings.append(get_embedding(target_criterion.name))
    response_embeddings = []
    for response_criterion in outputs.key_criteria:
        response_embeddings.append(get_embedding(response_criterion.name))
    # Compute the cosine similarity matrix between the two sets of embeddings:
    similarity_matrix = np.inner(np.array(response_embeddings), np.array(target_embeddings))

    # Find which target is more likely to be for each response:
    response_to_target: List[int] = similarity_matrix.argmax(axis=1)
    response_to_target[similarity_matrix.max(axis=1) < 0.5] = -1  # Consider as unmatched if similarity < 0.5
    # Compute the mean square error of matched importance:
    mse = 0
    for i in range(len(outputs.key_criteria)):
        if response_to_target[i] == -1:
            mse += 100**2  # Penalize completely unmatched criteria
            continue
        response_criterion = outputs.key_criteria[i]
        target_criterion = expectations.key_criteria[response_to_target[i]]
        mse += (response_criterion.importance - target_criterion.importance) ** 2
    mse /= len(outputs.key_criteria)

    return Feedback(value=mse)

In [None]:
with mlflow.start_run(run_name="prompt-evaluation"):
    mlflow.log_param("ollama_model", OLLAMA_MODEL)
    mlflow.log_param("ollama_model_version", OLLAMA_MODEL_VERSION)
    mlflow.log_param("temperature", 0)
    mlflow.log_param("mlflow_prompt_name", MLFLOW_PROMPT_NAME)
    mlflow.log_param("mlflow_prompt_version", MLFLOW_PROMPT_VERSION)

    mlflow.genai.evaluate(
        predict_fn=predict,
        data=data,
        scorers=[
            target_recall,
            importance_mse,
        ],
    )

2025/10/06 12:39:18 INFO mlflow.models.evaluation.utils.trace: Auto tracing is temporarily enabled during the model evaluation for computing some metrics and debugging. To disable tracing, call `mlflow.autolog(disable=True)`.
2025/10/06 12:39:18 INFO mlflow.genai.utils.data_validation: Testing model prediction with the first sample in the dataset.
[codecarbon INFO @ 12:39:18] [setup] RAM Tracking...
[codecarbon INFO @ 12:39:18] [setup] CPU Tracking...
 Windows OS detected: Please install Intel Power Gadget to measure CPU

[codecarbon INFO @ 12:39:19] CPU Model on constant consumption mode: Intel(R) Core(TM) Ultra 7 255H
[codecarbon INFO @ 12:39:19] [setup] GPU Tracking...
[codecarbon INFO @ 12:39:19] No GPU found.
[codecarbon INFO @ 12:39:19] The below tracking methods have been set up:
                RAM Tracking Method: RAM power estimation model
                CPU Tracking Method: global constant
                GPU Tracking Method: Unspecified
            
[codecarbon INFO @ 12:3

🏃 View run prompt-evaluation at: http://nattech.fib.upc.edu:40380/#/experiments/900495739893713381/runs/b63220abfabc469c950d821fc04ce803
🧪 View experiment at: http://nattech.fib.upc.edu:40380/#/experiments/900495739893713381


[codecarbon INFO @ 12:39:23] Energy consumed for RAM : 0.000008 kWh. RAM Power : 10.0 W
[codecarbon INFO @ 12:39:23] Delta energy consumed for CPU with constant : 0.000012 kWh, power : 42.5 W
[codecarbon INFO @ 12:39:23] Energy consumed for All CPU : 0.000036 kWh
[codecarbon INFO @ 12:39:23] 0.000045 kWh of electricity used since the beginning.
[codecarbon INFO @ 12:39:24] Energy consumed for RAM : 0.000011 kWh. RAM Power : 10.0 W
[codecarbon INFO @ 12:39:24] Delta energy consumed for CPU with constant : 0.000013 kWh, power : 42.5 W
[codecarbon INFO @ 12:39:24] Energy consumed for All CPU : 0.000049 kWh
[codecarbon INFO @ 12:39:24] 0.000060 kWh of electricity used since the beginning.


MlflowException: Failed to run the prediction function specified in the `predict_fn` parameter. Input: {'job_offer_text': '### Business Development Executive\n\n**Company:** Innovate Dynamics Ltd.\n\n**Location:** Hyderabad, India\n\n**Role Overview:**\nJoin Innovate Dynamics Ltd. as a Business Development Executive, where you will play a pivotal role in driving business growth by generating leads and managing key client accounts. Leverage your expertise in economic analysis and marketing strategies to enhance our market presence and build strong client relationships.\n\n**Responsibilities:**\n- Develop and execute comprehensive business development plans to achieve company objectives.\n- Identify and engage prospective clients across diverse niches, including software development and outsourcing.\n- Manage and grow relationships with key accounts to drive long-term success.\n- Utilize digital marketing strategies to boost client engagement and retention.\n- Collaborate with cross-functional teams to develop tailored client solutions.\n- Utilize LinkedIn Sales Navigator to identify and approach new business leads.\n\n**Qualifications:**\n- Bachelorâ€™s degree in Business Administration, Economics, or related field.\n- 3-5 years of experience in business development or related fields.\n- Strong proficiency with MS Office Suite and data analysis tools like Stata.\n- Excellent verbal and written communication skills.\n- Ability to multitask and manage multiple projects simultaneously.'}. Error: [WinError 10061] No se puede establecer una conexión ya que el equipo de destino denegó expresamente dicha conexión



[codecarbon INFO @ 12:39:25] Energy consumed for RAM : 0.000014 kWh. RAM Power : 10.0 W
[codecarbon INFO @ 12:39:25] Delta energy consumed for CPU with constant : 0.000011 kWh, power : 42.5 W
[codecarbon INFO @ 12:39:25] Energy consumed for All CPU : 0.000060 kWh
[codecarbon INFO @ 12:39:25] 0.000074 kWh of electricity used since the beginning.
[codecarbon INFO @ 12:39:26] Energy consumed for RAM : 0.000016 kWh. RAM Power : 10.0 W
[codecarbon INFO @ 12:39:26] Delta energy consumed for CPU with constant : 0.000012 kWh, power : 42.5 W
[codecarbon INFO @ 12:39:26] Energy consumed for All CPU : 0.000072 kWh
[codecarbon INFO @ 12:39:26] 0.000088 kWh of electricity used since the beginning.
[codecarbon INFO @ 12:39:27] Energy consumed for RAM : 0.000019 kWh. RAM Power : 10.0 W
[codecarbon INFO @ 12:39:27] Delta energy consumed for CPU with constant : 0.000012 kWh, power : 42.5 W
[codecarbon INFO @ 12:39:27] Energy consumed for All CPU : 0.000084 kWh
[codecarbon INFO @ 12:39:27] 0.000103 kWh 