<a href="https://colab.research.google.com/github/satvikipandey10-cpu/HR-hiring-project/blob/main/HRHriningcode.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# ===============================
# STEP 1: Install dependencies
# ===============================
!pip install -q google-generativeai PyPDF2 python-docx pandas python-dotenv requests gitpython rapidfuzz

# ===============================
# STEP 2: Imports
# ===============================
import os
import re
import requests
import pandas as pd
from PyPDF2 import PdfReader
import docx
from google.colab import files
import google.generativeai as genai
import tempfile
import git
from pathlib import Path
from rapidfuzz import fuzz
import json

# ===============================
# STEP 3: Gemini API Setup
# ===============================
os.environ['GOOGLE_API_KEY'] = "AIzaSyCkd-bXDuiQh7jPYtgDNujUPd62amlnX1E"  # replace with your actual key
genai.configure(api_key=os.environ["GOOGLE_API_KEY"])

# ===============================
# STEP 4: Upload resumes
# ===============================
uploaded_files = files.upload()

# ===============================
# STEP 5: Read uploaded resumes
# ===============================
resumes_text = {}

for filename in uploaded_files.keys():
    if filename.endswith('.pdf'):
        reader = PdfReader(filename)
        text = ""
        for page in reader.pages:
            text += page.extract_text()
        resumes_text[filename] = text
    elif filename.endswith('.docx'):
        doc = docx.Document(filename)
        text = "\n".join([para.text for para in doc.paragraphs])
        resumes_text[filename] = text

# ===============================
# STEP 6: Take Job Description input
# ===============================
job_description = input("Enter the Job Description: ")

# ===============================
# STEP 7: Helper Functions
# ===============================

def extract_github_links(text):
    """Extract GitHub repo links from resume text."""
    links = re.findall(r'(https?://github\.com/[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+)', text)
    return links

def list_code_files(repo_dir, exts=(".py", ".java", ".cpp", ".js", ".ts")):
    files = []
    for p in Path(repo_dir).rglob("*"):
        if p.suffix.lower() in exts and p.is_file():
            files.append(p)
    return files

def github_code_search(snippet, language=None, per_page=3):
    """Search GitHub for similar code."""
    q = f'"{snippet}"'
    if language:
        q += f' language:{language}'
    url = "https://api.github.com/search/code"
    headers = {"Accept": "application/vnd.github.v3+json"}
    r = requests.get(url, params={"q": q, "per_page": per_page}, headers=headers)
    if r.status_code == 200:
        return r.json().get("items", [])
    return []

def normalize_code(code):
    return re.sub(r"\s+", " ", code.strip())

def call_gemini_judge(candidate_snippet, top_matches_info):
    """Call Gemini LLM to decide originality."""
    model = genai.GenerativeModel("gemini-1.5-flash")
    prompt = f"""
You are a code originality analyst.

Candidate code snippet:
\"\"\"{candidate_snippet[:2000]}\"\"\"

Top matches from public repos:
{json.dumps(top_matches_info, indent=2)}

Based on this, return JSON:
{{
"verdict": "Copied"|"Possibly Copied"|"No Match",
"confidence": 0.0-1.0,
"top_evidence": [{{"url","similarity_token"}}],
"reasoning": "short explanation"
}}
"""
    resp = model.generate_content(prompt)
    try:
        return json.loads(resp.text)
    except:
        return {"verdict": "Unknown", "confidence": 0.0, "top_evidence": [], "reasoning": resp.text}

# ===============================
# STEP 8: Analyze resumes & GitHub repos
# ===============================
results = []

for filename, resume_text in resumes_text.items():
    github_links = extract_github_links(resume_text)

    print(f"\n--- Analysis for {filename} ---")

    # Gemini evaluation for resume vs JD
    resume_prompt = f"""
Compare the following resume with the job description.

Job Description:
{job_description}

Resume:
{resume_text}

Provide:
1. Suitability score out of 10
2. Key strengths
3. Missing skills/weaknesses
"""
    model = genai.GenerativeModel("gemini-1.5-flash")
    resume_response = model.generate_content(resume_prompt)
    print(resume_response.text)

    repo_results = []
    for repo_url in github_links:
        print(f"\n🔗 Checking GitHub repo: {repo_url}")
        try:
            with tempfile.TemporaryDirectory() as tmpdir:
                repo_dir = Path(tmpdir)/"repo"
                git.Repo.clone_from(repo_url, repo_dir, depth=1)

                files = list_code_files(repo_dir)
                repo_verdicts = []

                for fpath in files:
                    code = fpath.read_text(errors="ignore")
                    snippet = "\n".join([line for line in code.splitlines() if line.strip()][:10])
                    snippet_norm = normalize_code(snippet)
                    lang = fpath.suffix.lstrip(".")

                    # GitHub search
                    matches = github_code_search(snippet_norm, language=lang)
                    top_matches_info = []
                    for m in matches:
                        top_matches_info.append({
                            "url": m.get("html_url"),
                            "similarity_token": fuzz.token_set_ratio(snippet_norm, snippet_norm)/100.0
                        })

                    # Call Gemini only if matches found
                    if top_matches_info:
                        verdict_json = call_gemini_judge(snippet_norm, top_matches_info)
                    else:
                        verdict_json = {"verdict":"No Match", "confidence":1.0, "top_evidence":[], "reasoning":"No similar code found."}

                    repo_verdicts.append({
                        "file": str(fpath.relative_to(repo_dir)),
                        "verdict": verdict_json["verdict"],
                        "confidence": verdict_json["confidence"],
                        "top_evidence": verdict_json.get("top_evidence", []),
                        "reasoning": verdict_json.get("reasoning", "")
                    })

                repo_results.append({
                    "repo_url": repo_url,
                    "analysis": repo_verdicts
                })
        except Exception as e:
            repo_results.append({"repo_url": repo_url, "error": str(e)})

    results.append({
        "Filename": filename,
        "Resume Analysis": resume_response.text,
        "GitHub Analysis": repo_results
    })

# ===============================
# STEP 9: Save results to CSV
# ===============================
df = pd.DataFrame(results)
df.to_csv("resume_analysis_with_github.csv", index=False)
print("\nResults saved to resume_analysis_with_github.csv")


Saving RESUMEE1.pdf to RESUMEE1.pdf
Saving RESUMEE2.pdf to RESUMEE2.pdf
Saving RESUMEE3.pdf to RESUMEE3.pdf
Saving RESUMEE4.pdf to RESUMEE4.pdf
Saving RESUMEE5.pdf to RESUMEE5.pdf
Enter the Job Description: Skilled Full Stack Developer with experience in building and maintaining web applications across front-end and back-end technologies. Proficient in developing responsive interfaces, server-side logic, and database integration while optimizing performance and ensuring scalability. Strong problem-solving, collaboration, and communication skills.

--- Analysis for RESUMEE1.pdf ---
## Resume 1 Evaluation

**1. Suitability Score: 6/10**

**2. Key Strengths:**

* **Relevant Backend Skills:** Priya possesses demonstrable skills in backend technologies like Python, Django, PostgreSQL, and Flask, which align well with the job description's requirement for server-side logic and database integration.  The quantifiable achievement of reducing latency by 30% is a strong point.
* **Experience:** 