In [19]:
import pandas as pd
from sentence_transformers import SentenceTransformer
from transformers import AutoModel
from transformers import AutoModel, AutoTokenizer
from peft import LoraConfig, get_peft_model
from sentence_transformers import util
import google.generativeai as genai
import os

In [None]:

df = pd.read_csv("dataset/Resume.csv")

print(df.head())


         ID                                         Resume_str  \
0  16852973           HR ADMINISTRATOR/MARKETING ASSOCIATE\...   
1  22323967           HR SPECIALIST, US HR OPERATIONS      ...   
2  33176873           HR DIRECTOR       Summary      Over 2...   
3  27018550           HR SPECIALIST       Summary    Dedica...   
4  17812897           HR MANAGER         Skill Highlights  ...   

                                         Resume_html Category  
0  <div class="fontsize fontface vmargins hmargin...       HR  
1  <div class="fontsize fontface vmargins hmargin...       HR  
2  <div class="fontsize fontface vmargins hmargin...       HR  
3  <div class="fontsize fontface vmargins hmargin...       HR  
4  <div class="fontsize fontface vmargins hmargin...       HR  


In [None]:
# Select only useful columns
df = df[['ID', 'Resume_str', 'Category']]

In [None]:
# Load pre-trained embedding model
embed_model = SentenceTransformer("all-MiniLM-L6-v2")

# Convert resume text to embeddings
df['resume_embedding'] = df['Resume_str'].apply(lambda x: embed_model.encode(x, convert_to_tensor=True))









In [None]:
model_name = "sentence-transformers/all-MiniLM-L6-v2"
model = AutoModel.from_pretrained(model_name)

# Print all available module names
for name, _ in model.named_modules():
    print(name)



embeddings
embeddings.word_embeddings
embeddings.position_embeddings
embeddings.token_type_embeddings
embeddings.LayerNorm
embeddings.dropout
encoder
encoder.layer
encoder.layer.0
encoder.layer.0.attention
encoder.layer.0.attention.self
encoder.layer.0.attention.self.query
encoder.layer.0.attention.self.key
encoder.layer.0.attention.self.value
encoder.layer.0.attention.self.dropout
encoder.layer.0.attention.output
encoder.layer.0.attention.output.dense
encoder.layer.0.attention.output.LayerNorm
encoder.layer.0.attention.output.dropout
encoder.layer.0.intermediate
encoder.layer.0.intermediate.dense
encoder.layer.0.intermediate.intermediate_act_fn
encoder.layer.0.output
encoder.layer.0.output.dense
encoder.layer.0.output.LayerNorm
encoder.layer.0.output.dropout
encoder.layer.1
encoder.layer.1.attention
encoder.layer.1.attention.self
encoder.layer.1.attention.self.query
encoder.layer.1.attention.self.key
encoder.layer.1.attention.self.value
encoder.layer.1.attention.self.dropout
encoder.

In [24]:
# Load base model & tokenizer
model_name = "sentence-transformers/all-MiniLM-L6-v2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

# Define valid LoRA target modules dynamically
target_modules = [f"encoder.layer.{i}.attention.self.query" for i in range(6)] + \
                 [f"encoder.layer.{i}.attention.self.key" for i in range(6)] + \
                 [f"encoder.layer.{i}.attention.self.value" for i in range(6)]

# Apply PEFT with correct target modules
peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=target_modules,  
    lora_dropout=0.1,
    bias="none",
    task_type="SEQ_CLS"
)

# Integrate PEFT into the model
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()
model.save_pretrained("saved_model")
tokenizer.save_pretrained("saved_model")





trainable params: 110,592 || all params: 22,823,808 || trainable%: 0.4845


('saved_model\\tokenizer_config.json',
 'saved_model\\special_tokens_map.json',
 'saved_model\\vocab.txt',
 'saved_model\\added_tokens.json',
 'saved_model\\tokenizer.json')

In [None]:
def match_resume_to_job(resume_text, job_description):
    resume_embedding = embed_model.encode(resume_text, convert_to_tensor=True)
    job_embedding = embed_model.encode(job_description, convert_to_tensor=True)
    return util.pytorch_cos_sim(resume_embedding, job_embedding).item()

# Example: Match a resume with a job
job_desc = "Experienced Data Scientist with a strong background in Python, Machine Learning, and Data Analytics. Proficient in developing, training, and deploying machine learning models to drive business insights and automation. Skilled in handling large datasets, feature engineering, and implementing supervised and unsupervised learning algorithms. Adept at using libraries such as Pandas, NumPy, Scikit-learn, TensorFlow, and PyTorch to build scalable solutions. Experienced in working with cloud platforms like AWS and GCP, as well as SQL for data extraction and analysis. Passionate about transforming raw data into actionable insights through advanced analytics, statistical modeling, and visualization. Strong understanding of MLOps principles to streamline deployment and monitoring of models in production. Proven ability to collaborate with cross-functional teams, communicate findings effectively, and translate business problems into data-driven solutions. Holds a Bachelor’s or Master’s degree in Computer Science, Data Science, or a related field, with hands-on experience in AI research and real-world machine learning applications. Always eager to stay up-to-date with the latest advancements in artificial intelligence, deep learning, and big data technologies. Looking to leverage expertise in data science to solve complex problems and drive innovation in a dynamic, growth-oriented environment. Recognized for analytical problem-solving, creativity, and ability to deliver high-impact solutions under tight deadlines. Enthusiastic about contributing to data-driven decision-making and shaping the future of AI-driven solutions. Open to exciting opportunities in AI, analytics, and machine learning engineering roles."
resume_text = df['Resume_str'][0]
print(f"Match Score: {match_resume_to_job(resume_text, job_desc)}")


Match Score: 0.27143406867980957


In [None]:
from dotenv import load_dotenv
load_dotenv()

genai.configure(api_key=os.environ["GEMINI_API_KEY"])

def improve_resume(resume_text):
    model = genai.GenerativeModel("gemini-1.5-flash") 
    prompt = f"Analyze this resume and suggest improvements:\n{resume_text}"
    response = model.generate_content(prompt)
    return response.text

# Test on a sample resume
print(improve_resume(df['Resume_str'][0]))


This resume needs significant restructuring and refinement to be more effective.  The current version is cluttered and doesn't effectively highlight the candidate's key skills and accomplishments. Here's a suggested improvement plan:

**1.  Rewrite the Summary:**

The current summary focuses on hospitality, which isn't directly relevant to the target HR Administrator/Marketing Associate role.  It needs to be concise and highlight relevant skills and experience for the desired position.  For example:

> *Highly accomplished and results-oriented HR Administrator and Marketing Associate with 15+ years of experience in driving operational efficiency and enhancing brand presence. Proven ability to manage employee relations, administer benefits programs, develop marketing campaigns, and build strong customer relationships.  Seeking a challenging role leveraging expertise in both HR and Marketing to contribute to organizational growth.*


**2.  Consolidate Experience:**

The resume lists many