<div style="text-align: center; background-color: #b1d1ff; font-family: 'Trebuchet MS', Arial, sans-serif; color: white; padding: 20px; font-size: 40px; font-weight: bold; border-radius: 0 0 0 0; box-shadow: 0px 6px 8px rgba(0, 0, 0, 0.2);">
  Stage - Evaluate model
</div>

In [12]:
import os
import sys
sys.path.append(os.path.abspath(".."))

In [13]:
import re
import os
import json
import requests
import pandas as pd
import ast
from dotenv import load_dotenv
from difflib import SequenceMatcher
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from process_cv.info_extract import extract_info, extract_text_from_pdf

In [14]:
load_dotenv()
API_KEY = os.getenv("MISTRAL_API_KEY", "8h738jV32gjV9nO7l2nphveXhkhsKao5")
API_URL = "https://api.mistral.ai/v1/chat/completions"
MODEL_NAME = "mistral-medium"

## I. Human-Based Matching

Nhóm lựa chọn ngẫu nhiên 23CV và 10Job từ cơ sở dữ liệu. số cặp kết hợp sẽ là 23*10=230 cặp (cv, job).

Người đánh giá được yêu cầu chấm điểm mức độ phù hợp giữa CV và công việc trên thang điểm từ 0 đến 100, dựa trên các tiêu chí:

| Match Score | Mức độ phù hợp       | Ý nghĩa                                                                 |
|-------------|-----------------------|------------------------------------------------------------------------|
| 0 – 10      | Không phù hợp       | Không đúng chuyên ngành / khác lĩnh vực                               |
| 11 – 30     | Khá xa tiêu chí     | Có kỹ năng nền, nhưng thiếu nhiều yêu cầu trọng yếu                   |
| 31 – 60     | Phù hợp cơ bản      | Có thể làm được nếu được đào tạo thêm                                |
| 61 – 80     | Tương đối phù hợp   | Có nền tảng tốt, thiếu 1 vài kỹ năng cụ thể                           |
| 81 – 100    | Rất phù hợp         | Rất đúng yêu cầu, có kinh nghiệm, kỹ năng khớp rõ ràng               |


In [15]:
path_eval = '../database/survey.csv'
data_eval = pd.read_csv(path_eval)
df = pd.DataFrame(data_eval)
df

Unnamed: 0,CV,Job1,Job2,Job3,Job4,Job5,Job6,Job7,Job8,Job9,Job10
0,Cv1,60,43,20,55,30,68,65,40,42,71
1,Cv2,41,50,35,31,30,52,40,62,66,83
2,Cv3,52,20,30,31,80,61,55,35,38,50
3,Cv4,39,90,40,11,30,88,90,75,87,75
4,Cv5,45,80,75,31,30,92,95,78,81,80
5,Cv6,35,65,45,81,31,70,40,58,63,70
6,Cv7,33,95,20,11,36,92,30,92,95,65
7,Cv8,47,90,25,11,63,88,80,89,88,80
8,Cv9,55,25,35,20,90,75,45,15,50,60
9,Cv10,60,70,80,65,80,85,80,75,70,55


In [16]:
matching_score_human = df.drop(columns=['CV']).values.flatten().tolist()
print("Matching score human:", matching_score_human)

Matching score human: [60, 43, 20, 55, 30, 68, 65, 40, 42, 71, 41, 50, 35, 31, 30, 52, 40, 62, 66, 83, 52, 20, 30, 31, 80, 61, 55, 35, 38, 50, 39, 90, 40, 11, 30, 88, 90, 75, 87, 75, 45, 80, 75, 31, 30, 92, 95, 78, 81, 80, 35, 65, 45, 81, 31, 70, 40, 58, 63, 70, 33, 95, 20, 11, 36, 92, 30, 92, 95, 65, 47, 90, 25, 11, 63, 88, 80, 89, 88, 80, 55, 25, 35, 20, 90, 75, 45, 15, 50, 60, 60, 70, 80, 65, 80, 85, 80, 75, 70, 55, 15, 50, 20, 30, 35, 15, 45, 65, 60, 30, 80, 75, 90, 50, 85, 95, 65, 70, 75, 90, 15, 50, 20, 30, 35, 15, 45, 65, 60, 30, 25, 40, 20, 55, 45, 15, 20, 50, 45, 55, 80, 85, 75, 80, 95, 90, 85, 90, 75, 90, 10, 40, 50, 40, 70, 100, 60, 50, 40, 60, 10, 40, 40, 40, 40, 40, 40, 40, 40, 80, 10, 40, 40, 40, 60, 60, 70, 40, 30, 40, 10, 40, 40, 40, 50, 5, 30, 20, 10, 20, 10, 40, 40, 40, 50, 30, 40, 40, 30, 80, 10, 30, 70, 40, 40, 30, 30, 30, 30, 30, 10, 40, 40, 40, 40, 30, 40, 40, 80, 40, 90, 10, 10, 10, 10, 10, 10, 10, 10, 10]


## II. LLM-Based Matching

In [17]:
prompt_template = """
        You are a professional career advisor. Based on the candidate's CV and the job description, analyze and return structured feedback:

        CV:
        {cv_json}

        Job Description:
        {job_json}

        Return a JSON response in this format:
        {{
        "match_score": 0-100,
        "missing_skills": ["skill1", "skill2", "..."],
        "recommendations": [
            {{
            "skill": "skill name",
            "course": "course name",
            "link": "course url"
            }}
        ],
        "summary": "Short advice summary (3-4 lines)"
        }}

        Only return valid JSON. Do not include explanation or code block markers.
    """

def matching_score_llm(cv_info: dict, job_info: dict) -> dict:
    prompt = prompt_template.format(
        cv_json=json.dumps(cv_info, ensure_ascii=False),
        job_json=json.dumps(job_info, ensure_ascii=False)
    )

    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type": "application/json"
    }

    payload = {
        "model": MODEL_NAME,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.3
    }

    try:
        response = requests.post(API_URL, headers=headers, json=payload)
        response.raise_for_status()
        result = response.json()
        content = result["choices"][0]["message"]["content"].strip()
        content = re.sub(r"^```(?:json)?|```$", "", content.strip())
        return json.loads(content)
    except Exception as e:
        return {
            "error": str(e),
            "raw_response": response.text if 'response' in locals() else None
        }


Chuẩn bị dữ liệu CV

In [18]:
cv_folder = r"E:\ML-Final-Project\app\backend\database"

cvs_info = []
for filename in os.listdir(cv_folder):
    if filename.lower().endswith(".pdf"):
        filepath = os.path.join(cv_folder, filename)
        print(f"Đang xử lý: {filepath}")
        text = extract_text_from_pdf(filepath)
        info = extract_info(text)
        cvs_info.append(info)

cvs_info[0]

Đang xử lý: E:\ML-Final-Project\app\backend\database\01.pdf
Đang xử lý: E:\ML-Final-Project\app\backend\database\02.pdf
Đang xử lý: E:\ML-Final-Project\app\backend\database\03.pdf
Đang xử lý: E:\ML-Final-Project\app\backend\database\04.pdf
Đang xử lý: E:\ML-Final-Project\app\backend\database\05.pdf
Đang xử lý: E:\ML-Final-Project\app\backend\database\06.pdf
Đang xử lý: E:\ML-Final-Project\app\backend\database\07.pdf
Đang xử lý: E:\ML-Final-Project\app\backend\database\08.pdf
Đang xử lý: E:\ML-Final-Project\app\backend\database\09.pdf
Đang xử lý: E:\ML-Final-Project\app\backend\database\10.pdf
Đang xử lý: E:\ML-Final-Project\app\backend\database\11.pdf
Đang xử lý: E:\ML-Final-Project\app\backend\database\12.pdf
Đang xử lý: E:\ML-Final-Project\app\backend\database\13.pdf
Đang xử lý: E:\ML-Final-Project\app\backend\database\14.pdf
Đang xử lý: E:\ML-Final-Project\app\backend\database\15.pdf
Đang xử lý: E:\ML-Final-Project\app\backend\database\16.pdf
Đang xử lý: E:\ML-Final-Project\app\back

{'full_name': 'JR Sabado Tweetie',
 'email': 'sabadotweetie@gmail.com',
 'phone': '+639 17887 1043',
 'job_title': 'Software Engineer',
 'education': [{'degree': 'Bachelor of Science in Information Systems',
   'university': 'University of Santo Tomas'}],
 'experience': [{'job_title': 'Software Engineer',
   'company': 'Infor, PSSC, Inc.',
   'description': 'Maintained current Homepages and developed additional Widgets App. Working on H5A, a migration of their H5 Classic App which was built using Java, to Angular. Primary Component Owner of the following components under H5A: Focus, Charting, Auto scroll, Split Screen.'},
  {'job_title': 'Application Development Analyst',
   'company': 'Accenture, Inc.',
   'description': "Responsible for the development of web portal for one of Accenture's diamond clients. Deliver front-end development tasks using AngularJS, HTML and CSS while assisting and helping fellow developers in maintaining a clean source code. Performance improvement and Code 

Chuẩn bị dữ liệu Job

In [25]:
csv_path = r"../database/job_evaluate.csv"
df = pd.read_csv(csv_path)

list_fields = [
    "address", "date_posted", "industry", "company_size", "company_nationality",
    "experience_years", "position_level", "employment_type", "contract_type",
    "technologies_used", "job_description"
]

def normalize_job_row(row):
    job = {}
    for col in df.columns:
        if col in list_fields:
            try:
                job[col] = ast.literal_eval(row[col])
            except:
                job[col] = []
        elif col not in ['job_url', 'company_url']:
            job[col] = row[col]
    return job

jobs_info = [normalize_job_row(row) for _, row in df.iterrows()]
jobs_info[0]

{'job_title': 'UX/UI Designer (Game)',
 'company_name': 'CÔNG TY CỔ PHẦN SUNTEK',
 'salary': 'Thương lượng',
 'address': ['Thành phố Thủ Đức, Hồ Chí Minh'],
 'date_posted': ['Đăng 1 giờ trước'],
 'industry': ['Giải trí/ Game'],
 'company_size': ['25-99 Nhân viên'],
 'company_nationality': ['Thailand'],
 'experience_years': ['Từ 2 năm'],
 'position_level': ['Junior', 'Middle'],
 'employment_type': ['In Office'],
 'contract_type': ['Fulltime'],
 'technologies_used': ['UX/UI Design', 'HTML & CSS', 'UI/UX'],
 'job_description': ['Trách nhiệm công việc\n1. General task\nHave aesthetic thinking, color coordination and layout...\nCan use one of the product design tools such as: Figma, Adobe Illustrator, Photoshop...\nAbility to take clear notes in design files.\nAbility to organize design documents scientifically.\nDesign the interface of menus, buttons, tabs, pop-ups, and graphical user interface elements.\nCreate user interface mockups and prototypes that clearly demonstrate how the website

Tạo ra 23 x 10 = 230 cặp (cv, job) để đưa vào LLM

In [26]:
cv_job_pairs = []
for cv in cvs_info:        # cvs_info: list chứa 23 CV
    for job in jobs_info:  # jobs_info: list chứa 10 job
        cv_job_pairs.append((cv, job))

print(f"Tổng số cặp CV - Job: {len(cv_job_pairs)}")
print(cv_job_pairs[0])

Tổng số cặp CV - Job: 230
({'full_name': 'JR Sabado Tweetie', 'email': 'sabadotweetie@gmail.com', 'phone': '+639 17887 1043', 'job_title': 'Software Engineer', 'education': [{'degree': 'Bachelor of Science in Information Systems', 'university': 'University of Santo Tomas'}], 'experience': [{'job_title': 'Software Engineer', 'company': 'Infor, PSSC, Inc.', 'description': 'Maintained current Homepages and developed additional Widgets App. Working on H5A, a migration of their H5 Classic App which was built using Java, to Angular. Primary Component Owner of the following components under H5A: Focus, Charting, Auto scroll, Split Screen.'}, {'job_title': 'Application Development Analyst', 'company': 'Accenture, Inc.', 'description': "Responsible for the development of web portal for one of Accenture's diamond clients. Deliver front-end development tasks using AngularJS, HTML and CSS while assisting and helping fellow developers in maintaining a clean source code. Performance improvement and 

In [27]:
match_llm_results = []
for cv, job in cv_job_pairs:
    result = matching_score_llm(cv, job)['match_score']
    match_llm_results.append(result)

In [28]:
print('Matching scores LLM:', match_llm_results) 

Matching scores LLM: [30, 30, 40, 40, 60, 75, 75, 40, 40, 70, 20, 60, 60, 30, 40, 60, 40, 40, 65, 75, 40, 30, 40, 40, 65, 60, 60, 40, 40, 50, 20, 70, 20, 20, 85, 85, 75, 60, 70, 60, 20, 65, 30, 20, 70, 85, 85, 60, 70, 60, 20, 60, 30, 60, 60, 60, 30, 60, 65, 30, 10, 85, 30, 20, 60, 30, 30, 60, 85, 60, 10, 70, 40, 30, 75, 60, 60, 60, 75, 70, 30, 60, 60, 30, 85, 75, 75, 40, 70, 60, 30, 60, 60, 20, 40, 60, 75, 40, 60, 60, 20, 60, 20, 30, 65, 30, 30, 40, 80, 30, 20, 85, 20, 20, 60, 60, 60, 60, 85, 85, 20, 60, 20, 20, 65, 30, 30, 40, 75, 30, 20, 30, 40, 75, 60, 30, 30, 40, 40, 30, 20, 85, 60, 20, 75, 85, 85, 60, 85, 60, 60, 30, 30, 30, 75, 85, 85, 30, 40, 40, 20, 20, 40, 60, 40, 40, 50, 40, 30, 70, 20, 10, 30, 40, 30, 30, 50, 30, 20, 60, 40, 20, 60, 40, 60, 65, 75, 30, 30, 40, 20, 20, 75, 40, 40, 40, 40, 30, 20, 40, 30, 20, 30, 30, 30, 30, 40, 30, 30, 30, 10, 30, 30, 60, 50, 30, 30, 40, 40, 40, 70, 10, 10, 20, 20, 30, 10, 10, 10, 10]


## IV. Phân tích độ chính xác (MSE, MAE)

In [29]:
mae = mean_absolute_error(matching_score_human, match_llm_results)
print(f"MAE: {mae:.2f}")

MAE: 15.50


In [30]:
mse = mean_squared_error(matching_score_human, match_llm_results)
print(f"MSE: {mse:.2f}")

MSE: 429.02


In [31]:
import scipy.stats as stats

corr, _ = stats.pearsonr(matching_score_human, match_llm_results)
print(f"Correlation: {corr:.2f}")

Correlation: 0.62


**Nhận xét**

LLM chưa tái hiện đầy đủ đánh giá của con người:
 - Một số cặp điểm có sự chênh lệch khá lớn, thể hiện ở MSE cao.
 
 - MAE ở mức ~15.5 nghĩa là nếu con người đánh giá một CV là 80 điểm, LLM có thể đánh 65 hoặc 95, gây sai lệch đáng kể trong phân loại.


Tuy nhiên vẫn có xu hướng khớp tổng thể:
 - Hệ số tương quan 0.62 cho thấy LLM nắm được xu hướng chung (CV tốt thì điểm vẫn cao, CV yếu thì vẫn thấp), nhưng chưa định lượng sát như con người.