In [1]:
import google.generativeai as genai
import os
import PyPDF2

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer 

In [3]:
genai.configure(api_key=os.environ["GOOGLE_API"])

In [4]:
gemini_model = genai.GenerativeModel(model_name="gemini-1.5-flash")

In [5]:
emb_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')



In [6]:
def extract_text_from_pdf(pdf_path):
    with open(pdf_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        text = ""
        for page in reader.pages:
            text += page.extract_text()
    return text

In [7]:
def analyze_resume_with_gemini(resume_text,model):
    prompt = f"""
    You are analyzing a candidate's resume. Identify the key skills and domains based on their resume. Do not include any other information. Also make sure you only highlight prominent skills.

    Output only the keywords for:
    Skills: (e.g., Object Detection, OpenCV, Python)
    Domains: (e.g., Computer Vision, Machine Learning)

    You are supposed to follow these instructions while giving the output strictly.
    1. Don't write anything else apart from skills and domain.
    2. Use only ',' for separating two skills or domains.
    3. Strictly follow the output format of the sample output.

    Sample output:
    Object Detection, OpenCV, Python
    Computer Vision, Machine Learning
    """
    
    response = model.generate_content([prompt, resume_text])
    output = response.text.strip().split('\n')
    return output

### Navya-Sufyan

In [8]:
pdf_path = "candidate/Sufyan_Resume.pdf"
resume_text = extract_text_from_pdf(pdf_path)
result = analyze_resume_with_gemini(resume_text,gemini_model)
result

['Python, C, R, Git, GitHub, PyCharm, NumPy, Pandas, Power BI, Scikit-learn, YOLO, CNN, GPT, Gemini, Gemma, LLAMA3',
 'Machine Learning, Deep Learning, Data Analysis, Generative AI']

In [25]:
pdf_path = "Resume_Navya.pdf"
resume_text = extract_text_from_pdf(pdf_path)
result1 = analyze_resume_with_gemini(resume_text,gemini_model)
result1

['Python, C, R, Git, GitHub, PyCharm, NumPy, Pandas, Power BI, Scikit-learn, YOLO, CNN, GPT, Gemini, Gemma, LLAMA3',
 'Machine Learning, Deep Learning, Computer Vision, Generative AI']

In [17]:
#skills
emb = emb_model.encode([result[0],
                      result[0]])
#cosine_similarity([emb[0]], [emb[1]])

(384,)

In [31]:
#domain
emb = emb_model.encode([result[1],
                      result1[1]])
cosine_similarity([emb[0]], [emb[1]])

array([[0.8515991]], dtype=float32)

### Sambit-Snigdha

In [32]:
pdf_path = "Sambit Mallick Resume.pdf"
resume_text = extract_text_from_pdf(pdf_path)
result = analyze_resume_with_gemini(resume_text,gemini_model)
result

['Python, C++, C, Scikit-learn, Keras, JupyterNotebooks, Git, Github, Huggingface, CNN, YOLO, GAN, Transformer, EfficientNet, DenseNet, Gemini, Gemma, StableDiffusion, Llama3, Pandas, NumPy, Matplotlib, Plotly, Flask, OpenCV, DeepSort, LightGBM, XgBoost, YOLOv8',
 'Machine Learning, Deep Learning, Computer Vision, Generative AI, Data Analysis']

In [34]:
pdf_path = "resume_snigdha_paul.pdf-1.pdf"
resume_text = extract_text_from_pdf(pdf_path)
result1 = analyze_resume_with_gemini(resume_text,gemini_model)
result1

['Python, C++, C, Git, Github, PyCharm, Scikit-learn, YOLO, CNN, DenseNet, GAN, GPT, Gemini, Gemma, LLAMA3',
 'Machine Learning, Deep Learning, Computer Vision, Generative AI, Quantum Machine Learning']

In [35]:
#skills
emb = emb_model.encode([result[0],
                      result1[0]])
cosine_similarity([emb[0]], [emb[1]])

array([[0.88260144]], dtype=float32)

In [36]:
#domain
emb = emb_model.encode([result[1],
                      result1[1]])
cosine_similarity([emb[0]], [emb[1]])

array([[0.81917477]], dtype=float32)

### Ayush-Sufyan

In [37]:
pdf_path = "Ayush_resume.pdf"
resume_text = extract_text_from_pdf(pdf_path)
result = analyze_resume_with_gemini(resume_text,gemini_model)
result

['Python, NumPy, Pandas, Matplotlib, Scikit-Learn, OpenCV, Dlib, pyAudioAnalysis, HTML, CSS, JavaScript, PHP, R',
 'Machine Learning, Computer Vision, Data Science, Web Development, Data Analytics']

In [38]:
pdf_path = "Sufyan_Resume.pdf"
resume_text = extract_text_from_pdf(pdf_path)
result1 = analyze_resume_with_gemini(resume_text,gemini_model)
result1

['Python, C, R, Git, GitHub, PyCharm, NumPy, Pandas, Power BI, Scikit-learn, YOLO, CNN, GPT, Gemini, Gemma, LLAMA3',
 'Machine Learning, Deep Learning, Data Analysis, Generative AI']

In [62]:
#skills
emb = emb_model.encode([result[0],
                      result1[0]])
cosine_similarity([emb[0]], [emb[1]])

array([[0.73804843]], dtype=float32)

In [64]:
#domain
emb = emb_model.encode([result[1],
                      result1[1]])
cosine_similarity([emb[0]], [emb[1]])

array([[0.6133897]], dtype=float32)

### Compiling everything

In [70]:
def resume_similarity(pdf1,pdf2,extracting_model,embedding_model):
    resume1_text=extract_text_from_pdf(pdf1)
    resume2_text=extract_text_from_pdf(pdf2)

    resume1_details=analyze_resume_with_gemini(resume1_text,extracting_model)
    resume2_details=analyze_resume_with_gemini(resume2_text,extracting_model)

    skill_emb = embedding_model.encode([resume1_details[0], resume2_details[0]])
    domain_emb = embedding_model.encode([resume1_details[1], resume2_details[1]])

    skill_similarity = cosine_similarity([skill_emb[0]], [skill_emb[1]])
    domain_similarity = cosine_similarity([domain_emb[0]], [domain_emb[1]])

    similarity = (0.4 * skill_similarity) + (0.6 * domain_similarity)

    return similarity

In [92]:
resume_similarity('Resume_Navya.pdf', 'Sufyan_Resume.pdf', gemini_model, emb_model)

array([[0.9499607]], dtype=float32)

In [87]:
resume_similarity('Sambit Mallick Resume.pdf', 'resume_snigdha_paul.pdf-1.pdf', gemini_model, emb_model)

array([[0.8757193]], dtype=float32)

In [48]:
resume_similarity('Sambit Mallick Resume.pdf', 'Sufyan_Resume.pdf', gemini_model, emb_model)

array([[0.8812917]], dtype=float32)

In [55]:
resume_similarity('Resume_Navya.pdf', 'resume_snigdha_paul.pdf-1.pdf', gemini_model, emb_model)

array([[0.84288]], dtype=float32)

In [73]:
resume_similarity('Resume_Navya.pdf', 'resume_snigdha_paul.pdf-1.pdf', gemini_model, emb_model)

array([[0.87421536]], dtype=float32)

In [85]:
resume_similarity('resume_snigdha_paul.pdf-1.pdf', 'Pratik_resume_v2.pdf', gemini_model, emb_model)

array([[0.47892556]], dtype=float32)

In [16]:
def files(db):
    for filename in os.listdir(db):
        print(filename)

files('experts')

engr (1).pdf
engr (2).pdf
engr (3).pdf
engr (4).pdf
HR (1).pdf
HR (2).pdf
HR (3).pdf
Profile (1).pdf
sales (1).pdf
sales (2).pdf
sales (3).pdf


In [35]:
def expertmatch(db,cand,extracting_model,embedding_model):
    lst=[]
    max=0
    for filename in os.listdir(db):
        f = os.path.join(db, filename)
        text1=extract_text_from_pdf(f)
        analysis1=analyze_resume_with_gemini(text1,extracting_model)
        for cand_file in os.listdir(cand):
            c = os.path.join(cand, cand_file)
            text2=extract_text_from_pdf(c)
            analysis2=analyze_resume_with_gemini(text2,extracting_model)
            skill_emb = embedding_model.encode([analysis1[0], analysis2[0]])
            domain_emb = embedding_model.encode([analysis1[1], analysis2[1]])
            skill_similarity = cosine_similarity([skill_emb[0]], [skill_emb[1]])
            domain_similarity = cosine_similarity([domain_emb[0]], [domain_emb[1]])
            similarity = (0.4 * skill_similarity) + (0.6 * domain_similarity)
            lst.append(similarity)
            if max<similarity:
                max=similarity
                file=filename
    return max,file 

In [36]:
expertmatch('experts','candidate',gemini_model, emb_model)

(array([[0.77082264]], dtype=float32), 'Akash Kundu.pdf')

In [11]:
def skill_domain_from_job_des(text,model=gemini_model):
    prompt = f"""
    You are analyzing a job. Identify the key skills and domains based on the description. Do not include any other information. Also make sure you only highlight prominent skills.

    Output only the keywords for:
    Skills: (e.g., Object Detection, OpenCV, Python)
    Domains: (e.g., Computer Vision, Machine Learning)

    You are supposed to follow these instructions while giving the output strictly.
    1. Don't write anything else apart from skills and domain.
    2. Use only ',' for separating two skills or domains.
    3. Strictly follow the output format of the sample output.

    Sample output:
    Object Detection, OpenCV, Python
    Computer Vision, Machine Learning
    """
    
    response = model.generate_content([prompt, text])
    output = response.text.strip().split('\n')
    return output

In [12]:
pdf_path = "Data_Scientist_Job_Description.pdf"
jd_text = extract_text_from_pdf(pdf_path)
result = analyze_resume_with_gemini(jd_text,gemini_model)
result

['Python, SQL, Pandas, NumPy, Scikit-learn, TensorFlow, PyTorch, Git, AWS, Azure, GCP',
 'Machine Learning, Data Science, Data Engineering, Cloud Computing']