In [1]:
!pip install gradio
!pip install python-docx
!pip install PyPDF2
!pip install sentence-transformers


Collecting python-docx
  Downloading python_docx-1.2.0-py3-none-any.whl.metadata (2.0 kB)
Downloading python_docx-1.2.0-py3-none-any.whl (252 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m253.0/253.0 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-docx
Successfully installed python-docx-1.2.0
Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu1

In [2]:
import gradio as gr
import PyPDF2
import docx
import pandas as pd
from sentence_transformers import SentenceTransformer, util

model = SentenceTransformer('all-MiniLM-L6-v2')

def extract_text(file):
    name = file.name
    if name.endswith('.pdf'):
        reader = PyPDF2.PdfReader(file)
        return ''.join([page.extract_text() for page in reader.pages if page.extract_text()])
    elif name.endswith('.docx'):
        doc = docx.Document(file)
        return '\n'.join([para.text for para in doc.paragraphs])
    elif name.endswith('.txt'):
        return file.read().decode()
    return ""

def match_resumes(jd_file, resume_files):
    jd_text = extract_text(jd_file)
    jd_embedding = model.encode(jd_text, convert_to_tensor=True)

    scores = []
    for resume_file in resume_files:
        resume_text = extract_text(resume_file)
        candidate_name = resume_text.strip().split('\n')[0]  # First line = Candidate name

        resume_embedding = model.encode(resume_text, convert_to_tensor=True)
        similarity = util.pytorch_cos_sim(jd_embedding, resume_embedding)
        score = round(similarity.item() * 100, 2)

        scores.append({"Candidate Name": candidate_name, "Match Score (%)": score})

    # Create and sort DataFrame
    df = pd.DataFrame(scores)
    df = df.sort_values(by="Match Score (%)", ascending=False).reset_index(drop=True)

    # Top match
    top = df.iloc[0]
    top_summary = f"""
## 🏆 Most Eligible Candidate

**{top['Candidate Name']}**
✔️ Match Score: **{top['Match Score (%)']}%**

---

## 📊 All Candidate Scores:
"""

    return df, top_summary

# Gradio UI
gr.Interface(
    fn=match_resumes,
    inputs=[
        gr.File(label="📄 Upload Job Description", file_types=[".pdf", ".docx", ".txt"]),
        gr.File(label="📁 Upload Multiple Resumes", file_types=[".pdf", ".docx", ".txt"], file_count="multiple")
    ],
    outputs=[
        gr.Dataframe(label="Candidate Scoreboard"),
        gr.Markdown(label="Top Match Summary")
    ],
    title="💼 AI Resume Matcher with Real Names",
    description="Upload a Job Description and up to 10 Resumes. Candidate names are extracted from resumes and ranked by match score."
).launch()





The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://dea3f7ed8d3278df12.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


