In [3]:
import pandas as pd
import random

# Parameters
num_records = 1000
skills_list = ['Python', 'Data Analysis', 'Machine Learning', 'Communication', 'Leadership', 'Java', 'Project Management', 'SQL', 'Deep Learning', 'Data Visualization']
education_list = ['Bachelors', 'Masters', 'PhD', 'Diploma']

# Generate synthetic data
data = []
for _ in range(num_records):
    resume_text = f"Experienced professional with skills in {', '.join(random.sample(skills_list, 3))}."
    experience = random.randint(0, 10)  # Years of experience
    skills = random.sample(skills_list, random.randint(3, 6))
    education = random.choice(education_list)

    data.append([resume_text, experience, skills, education])

# Create DataFrame
columns = ['Resume Text', 'Experience', 'Skills', 'Education']
df = pd.DataFrame(data, columns=columns)

# Display sample data
print(df.head())

# Save to CSV
df.to_csv('synthetic_resumes.csv', index=False)


                                         Resume Text  Experience  \
0  Experienced professional with skills in Leader...           2   
1  Experienced professional with skills in Projec...           0   
2  Experienced professional with skills in Deep L...          10   
3  Experienced professional with skills in Machin...           4   
4  Experienced professional with skills in Commun...           1   

                                              Skills  Education  
0   [Communication, Machine Learning, Data Analysis]  Bachelors  
1  [Project Management, Communication, Java, Deep...        PhD  
2           [Project Management, SQL, Deep Learning]  Bachelors  
3  [Data Visualization, Machine Learning, Java, D...        PhD  
4  [Communication, SQL, Data Visualization, Pytho...        PhD  


In [4]:
import pandas as pd

# Load resumes into a DataFrame
resumes = pd.read_csv('synthetic_resumes.csv')

# Preprocess resumes (e.g., parsing, cleaning)
# Example: Extracting skills
def extract_skills(resume_text):
    # Dummy implementation for skill extraction
    skills = ['Python', 'Data Analysis', 'Machine Learning', 'Communication', 'Leadership']
    return [skill for skill in skills if skill in resume_text]

resumes['Skills'] = resumes['Resume Text'].apply(lambda x: extract_skills(x))
# Extract experience using a regular expression
resumes['Experience'] = resumes['Resume Text'].str.extract(r'(\d+)\s*years?')
# Fill NaN values with 0 and convert to numeric type
resumes['Experience'] = pd.to_numeric(resumes['Experience'], errors='coerce').fillna(0).astype(int)


In [5]:
# Define job requirements
job_requirements = {
    'skills': ['Python', 'Data Analysis', 'Machine Learning'],
    'experience': 2  # Minimum years of experience
}


In [6]:
def calculate_score(resume, job_requirements):
    skills = resume['Skills']
    experience = resume['Experience']

    skill_score = sum(1 for skill in job_requirements['skills'] if skill in skills) / len(job_requirements['skills'])
    experience_score = min(experience / job_requirements['experience'], 1)

    total_score = (skill_score + experience_score) / 2
    return total_score

resumes['Score'] = resumes.apply(calculate_score, job_requirements=job_requirements, axis=1)


In [7]:
def provide_suggestions(resume, job_requirements):
    suggestions = []
    skills = resume['Skills']
    experience = resume['Experience']

    for skill in job_requirements['skills']:
        if skill not in skills:
            suggestions.append(f"Consider adding more experience or projects involving {skill}.")

    if experience < job_requirements['experience']:
        suggestions.append(f"Consider gaining more experience, at least {job_requirements['experience']} years.")

    return suggestions

resumes['Suggestions'] = resumes.apply(provide_suggestions, job_requirements=job_requirements, axis=1)


In [8]:
# Example resume data
data = {
    'Resume': [
        'Experienced data scientist with skills in Python, Data Analysis, and Machine Learning.',
        'Software engineer with 1 year of experience in Communication and Leadership.',
        'Analyst with expertise in Data Analysis and 3 years of experience.'
    ],
    'Experience': ['3 years', '1 year', '3 years']
}

resumes = pd.DataFrame(data)

# Apply the preprocessing, scoring, and suggestions
resumes['Skills'] = resumes['Resume'].apply(lambda x: extract_skills(x))
resumes['Experience'] = resumes['Experience'].apply(lambda x: int(x.split()[0]))
resumes['Score'] = resumes.apply(calculate_score, job_requirements=job_requirements, axis=1)
resumes['Suggestions'] = resumes.apply(provide_suggestions, job_requirements=job_requirements, axis=1)

# Display the results
print(resumes[['Resume', 'Score', 'Suggestions']])


                                              Resume     Score  \
0  Experienced data scientist with skills in Pyth...  1.000000   
1  Software engineer with 1 year of experience in...  0.250000   
2  Analyst with expertise in Data Analysis and 3 ...  0.666667   

                                         Suggestions  
0                                                 []  
1  [Consider adding more experience or projects i...  
2  [Consider adding more experience or projects i...  
