In [18]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

import pandas as pd
import spacy
from transformers import pipeline
from flair.models import TARSClassifier
from flair.data import Sentence
from tqdm import tqdm
from transformers import pipeline

In [19]:
#question-answering pipeline 
qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")

qa_pipeline({
    "context": "This job requires 5 to 7 years of experience in Python and data science.",
    "question": "What is the experience required?"
})

{'score': 0.7795524597167969,
 'start': 48,
 'end': 71,
 'answer': 'Python and data science'}

In [20]:
#Loading NLP Models
nlp = spacy.load("en_core_web_sm")
bert_qa = pipeline("question-answering", model="deepset/bert-base-cased-squad2")
roberta_qa = pipeline("question-answering", model="deepset/roberta-base-squad2")
distilbert_qa = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")

In [21]:
#TARS Model (zero-shot classifier)
tars = TARSClassifier.load("tars-base")

2025-05-06 16:41:12,383 TARS initialized without a task. You need to call .add_and_switch_to_new_task() before training this model


In [22]:
#Loading Excel File
df = pd.read_excel("Job Descriptions.xlsx")
df["JD"] = df["Job Descriptions"].astype(str)  

In [23]:
#Helper function to extract experience with spaCy
def extract_with_spacy(text):
    doc = nlp(text)
    experience_phrases = []
    for ent in doc.ents:
        if ent.label_ == "DATE" and "year" in ent.text.lower():
            experience_phrases.append(ent.text)
    return ", ".join(experience_phrases) if experience_phrases else None

In [24]:
#Used QA models to extract experience
def extract_with_qa(text, model, model_name):
    try:
        result = model({
            'context': text,
            'question': 'What is the total experience required for this job?'
        })
        return result['answer']
    except:
        return None

In [25]:
#Used TARS model
def extract_with_tars(text):
    try:
        sentence = Sentence(text)
        tars.predict_zero_shot(sentence, 
            candidate_labels=[
                "0-1 years", "1-3 years", "3-5 years", 
                "5-7 years", "7+ years", "10+ years", "experience not specified"
            ],
            multi_label=False)
        return sentence.labels[0].value
    except:
        return None

In [26]:
#output columns
tqdm.pandas()
df["spaCy_Experience"] = df["JD"].progress_apply(extract_with_spacy)
df["BERT_Experience"] = df["JD"].progress_apply(lambda x: extract_with_qa(x, bert_qa, "BERT"))
df["RoBERTa_Experience"] = df["JD"].progress_apply(lambda x: extract_with_qa(x, roberta_qa, "RoBERTa"))
df["DistilBERT_Experience"] = df["JD"].progress_apply(lambda x: extract_with_qa(x, distilbert_qa, "DistilBERT"))
df["TARS_Experience"] = df["JD"].progress_apply(extract_with_tars)

100%|███████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:08<00:00, 24.08it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:16<00:00, 12.34it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:21<00:00,  9.26it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:08<00:00, 22.63it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:00<00:00, 330.69it/s]


In [27]:
#saved output to Excel
df.to_excel("Extracted_Experience_Comparison.xlsx", index=False)
print("Output saved to Extracted_Experience_Comparison.xlsx")

Output saved to Extracted_Experience_Comparison.xlsx
