# Importing Libraries and Loading Data 

In [1]:
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import re


# Load the dataset
df = pd.read_csv(r"C:\Users\icham\OneDrive\Desktop\clean_resume_data.csv\jobs_dataset_with_features.csv (1)\jobs_dataset_with_features.csv")

# Display the shape and the first few rows of the dataset
print(df.shape)
print(df.head())


(1615940, 2)
                        Role  \
0       Social Media Manager   
1     Frontend Web Developer   
2    Quality Control Manager   
3  Wireless Network Engineer   
4         Conference Manager   

                                            Features  
0  5 to 15 Years Digital Marketing Specialist M.T...  
1  2 to 12 Years Web Developer BCA HTML, CSS, Jav...  
2  0 to 12 Years Operations Manager PhD Quality c...  
3  4 to 11 Years Network Engineer PhD Wireless ne...  
4  1 to 12 Years Event Manager MBA Event planning...  


# Filtering Roles with Less Than 8000 Instances

In [2]:
# Define the minimum count threshold
min_count = 8000

# Get the role counts
role_counts = df['Role'].value_counts()

# Identify and drop classes with fewer than the minimum count
dropped_classes = role_counts[role_counts < min_count].index
filtered_df = df[~df['Role'].isin(dropped_classes)].reset_index(drop=True)

# Check the updated role counts
print(filtered_df['Role'].value_counts())

# Display the number of unique roles remaining
print(len(filtered_df['Role'].value_counts()))


Interaction Designer         20580
Network Administrator        17470
User Interface Designer      14036
Social Media Manager         13945
User Experience Designer     13935
Procurement Analyst          13757
Social Media Analyst         10659
Quality Assurance Analyst    10541
SEO Specialist               10512
Executive Assistant          10496
Database Administrator       10482
Procurement Manager          10407
Data Analyst                 10406
Backend Developer            10404
Demand Planner               10362
Office Manager               10361
Customer Success Manager     10308
Frontend Developer           10308
Retirement Planner           10305
Name: Role, dtype: int64
19


# Sampling and Displaying the Data

In [3]:
# Sample 10,000 instances from the filtered dataset
df = filtered_df.sample(n=10000)

# Display the first few rows of the sampled dataset
print(df.head())

                           Role  \
75142       Procurement Analyst   
172569  User Interface Designer   
213834           Demand Planner   
123164      Executive Assistant   
2418             Demand Planner   

                                                 Features  
75142   2 to 8 Years Procurement Manager B.Com Procure...  
172569  3 to 14 Years UX/UI Designer MCA UI design pri...  
213834  3 to 11 Years Supply Chain Manager B.Com Deman...  
123164  3 to 8 Years Administrative Assistant B.Com Ca...  
2418    3 to 8 Years Supply Chain Manager BCA Demand f...  


# Splitting Data and Vectorization

In [4]:
# Split the data into features (X) and target (y)
X = df['Features']
y = df['Role']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and fit the TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer()
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

# Training and Evaluating the Model

In [5]:
# Initialize and train the RandomForest classifier
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_train_tfidf, y_train)

# Predict on the test set
y_pred = rf_classifier.predict(X_test_tfidf)

# Calculate and print the accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 1.0


# Helper Functions for Cleaning and Prediction

In [6]:
# Function to clean the resume text
def cleanResume(txt):
    cleanText = re.sub('http\S+\s', ' ', txt)
    cleanText = re.sub('RT|cc', ' ', cleanText)
    cleanText = re.sub('#\S+\s', ' ', cleanText)
    cleanText = re.sub('@\S+', '  ', cleanText)  
    cleanText = re.sub('[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""), ' ', cleanText)
    cleanText = re.sub(r'[^\x00-\x7f]', ' ', cleanText) 
    cleanText = re.sub('\s+', ' ', cleanText)
    return cleanText

# Function to recommend a job role based on the resume text
def job_recommendation(resume_text):
    resume_text = cleanResume(resume_text)
    resume_tfidf = tfidf_vectorizer.transform([resume_text])
    predicted_category = rf_classifier.predict(resume_tfidf)[0]
    return predicted_category


# Example Usage

In [7]:
# Example Usage
resume_file = """Objective:
Dedicated and results-oriented Banking professional with a strong background in financial analysis and customer service seeking opportunities to contribute to a reputable financial institution. Eager to leverage expertise in risk management, investment strategies, and relationship building to drive business growth and client satisfaction.

Education:
- Bachelor of Business Administration in Finance, XYZ University, GPA: 3.8/4.0
- Certified Financial Analyst (CFA) Level I Candidate

Skills:
- Proficient in financial modeling and analysis using Excel, Bloomberg Terminal, and other financial software
- Extensive knowledge of banking products and services, including loans, mortgages, and investment products
- Strong understanding of regulatory compliance and risk management practices in the banking industry
- Excellent communication and interpersonal skills, with a focus on building rapport with clients and colleagues
- Ability to work efficiently under pressure and adapt to changing market conditions

Experience:
Financial Analyst | ABC Bank
- Conducted financial analysis and risk assessment for corporate clients, including credit analysis, financial statement analysis, and cash flow modeling
- Developed customized financial solutions to meet clients' needs and objectives, resulting in increased revenue and client retention
- Collaborated with cross-functional teams to identify new business opportunities and optimize existing processes

Customer Service Representative | DEF Bank
- Provided exceptional customer service to bank clients, addressing inquiries, resolving issues, and promoting banking products and services
- Processed transactions accurately and efficiently, including deposits, withdrawals, and account transfers
- Educated customers on various banking products and services, helping them make informed financial decisions

Internship | GHI Investments
- Assisted portfolio managers with investment research and analysis, including industry and company-specific research, financial modeling, and performance analysis
- Prepared investment presentations and reports for clients, highlighting investment opportunities and performance metrics
- Conducted market research and analysis to identify trends and opportunities in the financial markets

Certifications:
- Certified Financial Planner (CFP)
- Series 7 and Series 63 Securities Licenses

Languages:
- English (Native)
- Spanish (Proficient)

"""
predicted_category = job_recommendation(resume_file)
print("Predicted Category:", predicted_category)

Predicted Category: Customer Success Manager


In [8]:
# Example Usage
resume_file = """Objective:
A creative and detail-oriented Designer with a passion for visual communication and brand identity seeking opportunities to leverage design skills in a dynamic and collaborative environment.

Education:
- Bachelor of Fine Arts in Graphic Design, XYZ College, GPA: 3.7/4.0
- Diploma in Web Design, ABC Institute, GPA: 3.9/4.0

Skills:
- Proficient in Adobe Creative Suite (Photoshop, Illustrator, InDesign)
- Strong understanding of typography, layout, and color theory
- Experience in both print and digital design
- Ability to conceptualize and execute design projects from concept to completion
- Excellent attention to detail and time management skills

Experience:
Graphic Designer | XYZ Design Studio
- Created visually appealing graphics for various marketing materials, including brochures, flyers, and social media posts
- Collaborated with clients to understand their design needs and deliver creative solutions that align with their brand identity
- Worked closely with the marketing team to ensure consistency in brand messaging across all platforms

Freelance Designer
- Designed logos, branding materials, and website layouts for small businesses and startups
- Managed multiple projects simultaneously while meeting tight deadlines and maintaining quality standards
- Established and maintained strong client relationships through clear communication and exceptional service

Projects:
- Rebranding Campaign for XYZ Company: Led a team to redesign the company's logo, website, and marketing collateral, resulting in a 30% increase in brand recognition
- Packaging Design for ABC Product Launch: Developed eye-catching packaging designs for a new product line, contributing to a successful launch and positive customer feedback

Certifications:
- Adobe Certified Expert (ACE) in Adobe Illustrator
- Responsive Web Design Certification from Udemy

Languages:
- English (Native)
- Spanish (Intermediate)
"""
predicted_category = job_recommendation(resume_file)
print("Predicted Category:", predicted_category)

Predicted Category: User Interface Designer


# Saving the Model and Vectorizer


In [9]:
# Save the trained model and the TF-IDF vectorizer
pickle.dump(rf_classifier, open(r"C:\Users\icham\OneDrive\Desktop\python\resume_job_matching_screening\models\rf_classifier_job_recommendation.pkl", 'wb'))
pickle.dump(tfidf_vectorizer, open(r"C:\Users\icham\OneDrive\Desktop\python\resume_job_matching_screening\models\tfidf_vectorizer_job_recommendation.pkl", 'wb'))