## Install dependencies

In [None]:
%pip install gradio
%pip install bitsandbytes accelerate



## Login to huggingface

In [1]:
from huggingface_hub import login
import os

# Set your token here securely or prompt for it in Colab
# Recommended: store in Colab secrets or environment variable
HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")


if not HF_TOKEN:
    # Prompt for token if not set in environment
    print("🔑 Please enter your Hugging Face token:")
    # For Colab or local prompt input
    HF_TOKEN = input("🔑 Enter your Hugging Face token: ").strip()

login(token=HF_TOKEN)


## create test data file

In [None]:
import os
from pathlib import Path

JSON_OUTPUT_SCORING_DIR = Path("json_outputs_all_data/scoring")
JSON_OUTPUT_SCORING_FT_DATA = Path("json_outputs_all_data/scoring/FT_data")
JSON_OUTPUT_FINE_TUNE_TEST_DATA = Path("json_outputs_all_data/scoring/non-ft-test-data")

# 🔍 Get all scored filenames (ending in .json)
all_scored_files = {
    f.name for f in JSON_OUTPUT_SCORING_DIR.glob("*.json")
}

# 📂 Get all filenames used in fine-tuning (assuming full file names are used)
used_in_ft_files = {
    f.name for f in JSON_OUTPUT_SCORING_FT_DATA.glob("*.json")
}

# 🚫 Get files not used in FT
unused_files = sorted(all_scored_files - used_in_ft_files)

# 💾 Save list to file
output_file = JSON_OUTPUT_FINE_TUNE_TEST_DATA / "unused_for_ft_test_files.txt"
with open(output_file, "w") as f:
    f.write("\n".join(unused_files))

print(f"✅ Found {len(unused_files)} unused files. List saved to {output_file}")


#  Step-by-Step Implementation

### Setup Dependencies

In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from pathlib import Path
from IPython.display import display, Markdown
import gradio as gr  # Optional interactive UI


### Load Fine-Tuned Model

In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch

MODEL_PATH = "rubsj/Qwen2-Resume-ATS"  # or local path

# 1. Load 4-bit quant config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",  # or "fp4"
    bnb_4bit_compute_dtype=torch.float16
)

# 2. Load tokenizer (no quantization here)
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)

# 3. Load model with quantization
model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    device_map="auto",  # automatic GPU/CPU placement
    quantization_config=bnb_config,
    trust_remote_code=True
)
model.eval()


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(152064, 3584)
    (layers): ModuleList(
      (0-27): 28 x Qwen2DecoderLayer(
        (self_attn): Qwen2Attention(
          (q_proj): lora.Linear4bit(
            (base_layer): Linear4bit(in_features=3584, out_features=3584, bias=True)
            (lora_dropout): ModuleDict(
              (default): Dropout(p=0.05, inplace=False)
            )
            (lora_A): ModuleDict(
              (default): Linear(in_features=3584, out_features=8, bias=False)
            )
            (lora_B): ModuleDict(
              (default): Linear(in_features=8, out_features=3584, bias=False)
            )
            (lora_embedding_A): ParameterDict()
            (lora_embedding_B): ParameterDict()
            (lora_magnitude_vector): ModuleDict()
          )
          (k_proj): Linear4bit(in_features=3584, out_features=512, bias=True)
          (v_proj): lora.Linear4bit(
            (base_layer): Linear4bit(in_features=3584, ou

### Define Inference Function

In [4]:
SCORING_PROMPT_TEMPLATE = """
<|im_start|>system
You are an ATS scoring assistant. Your job is to evaluate how well a resume matches a job description using the JSON schema provided below.

- Respond with valid JSON only.
- All float values must be between 0.0 and 1.0.
- Include all fields, even if the score is 0.0.
- Do not explain, translate, or add anything outside the JSON.

<|im_end|>
<|im_start|>user
Evaluate the resume against the job description using this schema:

Schema:
{{
  "final_ats_score": float,
  "certifications": float,
  "education": float,
  "experience": float,
  "grammar_cleanliness": float,
  "leadership": float,
  "responsibilities": float,
  "skills": float,
  "soft_skills": float,
  "tools": float,
  "transferable_skills": float
}}

<RESUME>
{resume_text}
</RESUME>

<JD>
{jd_text}
</JD>
<|im_end|>
<|im_start|>assistant
"""


In [5]:
EXPECTED_KEYS = [
    "final_ats_score", "certifications", "education", "experience",
    "grammar_cleanliness", "leadership", "responsibilities", "skills",
    "soft_skills", "tools", "transferable_skills"
]


In [16]:
import json
import regex  # ✅ Must use `regex` for recursive pattern support
from typing import Union, Dict

def extract_and_parse_json(text: str) -> Union[Dict, None]:
    """
    Extract the first valid JSON object from text using recursive pattern matching.
    Returns a dictionary if successful, else None.
    """
    pattern = regex.compile(r"\{(?:[^{}]|(?R))*\}")
    matches = pattern.findall(text)

    for match in matches:
        try:
            parsed = json.loads(match)
            if isinstance(parsed, dict):
                return parsed
        except json.JSONDecodeError:
            continue  # try next match
    return None


In [17]:
from typing import Dict

def fill_missing_scores(score_dict: Dict) -> Dict:
    """Ensure all expected fields exist, filling in 0.0 if missing."""
    return {key: float(score_dict.get(key, 0.0)) for key in EXPECTED_KEYS}



def score_resume_vs_jd(resume_text: str, jd_text: str) -> Tuple[str, str]:
    prompt = SCORING_PROMPT_TEMPLATE.format(resume_text=resume_text, jd_text=jd_text)

    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
    inputs = {k: v.to(model.device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=512,
            do_sample=False,
            eos_token_id=tokenizer.eos_token_id
        )

    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)

    json_result = extract_and_parse_json(decoded)
    if isinstance(json_result, dict):
        # patch partial output into full ATS schema
        patched_result = fill_missing_scores(json_result)
        return json.dumps(patched_result, indent=2), "✅ Valid or partial JSON parsed"

    # fallback for complete failure
    return decoded, "⚠️ JSON decode failed (invalid structure)"


### Upload Interface (Notebook Widgets)

In [None]:
import ipywidgets as widgets

resume_box = widgets.Textarea(placeholder='Paste resume text here', layout=widgets.Layout(width='100%', height='200px'))
jd_box = widgets.Textarea(placeholder='Paste job description text here', layout=widgets.Layout(width='100%', height='200px'))
run_button = widgets.Button(description="Score Resume")
output_area = widgets.Output()

def on_run_clicked(b):
    output_area.clear_output()
    with output_area:
        result = score_resume_vs_jd(resume_box.value, jd_box.value)
        print(f"🔍 ATS Score Output: {result}"   )
        display(Markdown(f"### 🔍 ATS Score Output\n```\n{result}\n```"))

run_button.on_click(on_run_clicked)
display(resume_box, jd_box, run_button, output_area)


### Optional: Gradio Interface (if you prefer a web-style UI)

In [8]:
print("Model loaded:", model.config.name_or_path)
print("Tokenizer loaded:", tokenizer.name_or_path)


Model loaded: Qwen/Qwen2-7B-Instruct
Tokenizer loaded: rubsj/Qwen2-Resume-ATS


In [18]:
def gradio_interface(resume, jd):
    return score_resume_vs_jd(resume, jd)

gr.close_all()
gr.Interface(
    fn=gradio_interface,
    inputs=["textbox", "textbox"],
    outputs=["textbox", "textbox"],
    title="Resume vs JD Scorer (with Debug Info)",
    description="Paste resume and JD, see model output + debug tokens info"
).launch()


Closing server running on port: 7860
Closing server running on port: 7860
Closing server running on port: 7860
* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.




The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
