In [2]:
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
from html import unescape
import unicodedata
import re
import warnings

In [3]:
data = pd.read_csv("data/fake_job_postings.csv", delimiter=",")
pd.options.display.max_colwidth = None
data[11:12]["requirements"]

11    We’re looking for someone who:Proven track record in sourcing across marketing, banking &amp; building a strong, steady pipelineStrong knowledge of internet sourcingFluent in converting passive candidates into new hiresExperience with using recruitment toolsAbility to multi-task and prioritise workloadPrior Applicant Tracking Systems (ATS) experienceWhat you’ll be doing:Manage your recruitment pipeline and find new innovative ways to source candidates   Utilise recruitment tools to handpick &amp; engage passive candidatesWork side-by-side with our Hiring Managers &amp; Recruiter to understand the needs of the businessTailor/master your pitch of the TransferWise vision to attract candidates of varied seniorityProduce reporting metrics
Name: requirements, dtype: object

In [6]:
def beautify(s):
    s = str(s)
    s = BeautifulSoup(unescape(s), 'html').text
    s = unicodedata.normalize('NFKC', s)
    s = re.sub(r"([a-z:])([A-Z])", r"\1 \2", s)
    return s

In [7]:
s = beautify(float("NaN"))
s

'nan'

In [8]:
df = pd.DataFrame(data = data)
warnings.filterwarnings("ignore", category=UserWarning, module='bs4')
for col in ["company_profile", "description", "requirements", "benefits"]:
    for i in range(len(df[col])):
        df.loc[i, col] = beautify(df.loc[i, col])
df["requirements"][11]

'We’re looking for someone who: Proven track record in sourcing across marketing, banking & building a strong, steady pipeline Strong knowledge of internet sourcing Fluent in converting passive candidates into new hires Experience with using recruitment tools Ability to multi-task and prioritise workload Prior Applicant Tracking Systems (ATS) experience What you’ll be doing: Manage your recruitment pipeline and find new innovative ways to source candidates   Utilise recruitment tools to handpick & engage passive candidates Work side-by-side with our Hiring Managers & Recruiter to understand the needs of the business Tailor/master your pitch of the Transfer Wise vision to attract candidates of varied seniority Produce reporting metrics'

In [14]:
df.to_csv("./Data/data_first_clean.csv", encoding='utf-8', index=False)

In [24]:
df = df.drop(columns=['job_id', 'title'])

In [35]:
df.columns

Index(['location', 'department', 'salary_range', 'company_profile',
       'description', 'requirements', 'benefits', 'telecommuting',
       'has_company_logo', 'has_questions', 'employment_type',
       'required_experience', 'required_education', 'industry', 'function',
       'fraudulent'],
      dtype='object')

In [36]:
df.loc[df.description.duplicated()][['description', 'fraudulent']]

Unnamed: 0,description,fraudulent
40,"Play with kids, get paid for it Love travel? Jobs in Asia$1,500+ USD monthly ($200 Cost of living)Housing provided (Private/Furnished)Airfare Reimbursed Excellent for student loans/credit cards Gabriel Adkins : #URL_ed9094c60184b8a4975333957f05be37e69d3cdb68decc9dd9a4242733cfd7f7##URL_75db76d58f7994c7db24e8998c2fc953ab9a20ea9ac948b217693963f78d2e6b#12 month contract : Apply today",0
48,"Play with kids, get paid for it Love travel? Jobs in Asia$1,500+ USD monthly ($200 Cost of living)Housing provided (Private/Furnished)Airfare Reimbursed Excellent for student loans/credit cards Gabriel Adkins : #URL_ed9094c60184b8a4975333957f05be37e69d3cdb68decc9dd9a4242733cfd7f7##URL_75db76d58f7994c7db24e8998c2fc953ab9a20ea9ac948b217693963f78d2e6b#12 month contract : Apply today",0
56,"As an Outside Sales Representative, you must have excellent sales talents as well as the willingness to learn the ABC Supply sales approach. It will also be very helpful to your role as an Outside Sales Representative if you have existing knowledge of exterior building product lines such as siding, roofing, and windows. This will not only to lend credibility to your sales presentations, but also to assist the Branch Manager in the selection of new product lines. It is also vital for your role as an Outside Sales Representative that you have empathy for your customers and their needs, and always provide them with the best possible service.",0
62,"Play with kids, get paid for it Love travel? Jobs in Asia$1,500+ USD monthly ($200 Cost of living)Housing provided (Private/Furnished)Airfare Reimbursed Excellent for student loans/credit cards Gabriel Adkins : #URL_ed9094c60184b8a4975333957f05be37e69d3cdb68decc9dd9a4242733cfd7f7##URL_75db76d58f7994c7db24e8998c2fc953ab9a20ea9ac948b217693963f78d2e6b#12 month contract : Apply today",0
80,"Play with kids, get paid for it Love travel? Jobs in Asia$1,500+ USD monthly ($200 Cost of living)Housing provided (Private/Furnished)Airfare Reimbursed Excellent for student loans/credit cards Gabriel Adkins : #URL_ed9094c60184b8a4975333957f05be37e69d3cdb68decc9dd9a4242733cfd7f7##URL_75db76d58f7994c7db24e8998c2fc953ab9a20ea9ac948b217693963f78d2e6b#12 month contract : Apply today",0
...,...,...
17847,"Perform basic workstation trouble analysis to include PC and cabling Maintain Windows Desktops, focusing on functionality, security, and patches Perform basic installation and troubleshooting of computer and associated operating systems, operating environments, application software, cable and peripheral equipment Create basic accounts to rollout new computers and services (new employee setup)Assist end users with general computer operation and desktop application software questions and problems Identify and resolve customer or internal system access issues (account lockouts, password resets, access setup, etc.)Assist in maintaining a network free of computer viruses, to include scanning and eliminating viruses on desktop computers Assist with hardware and software upgrades Assist with IT projects as required Administer hardware repairs Manage phone system – moves, adds and changes.Oversee printer maintenance and repair Other IT related responsibilities and tasks as they arise",0
17853,At Command we care enough to consistently place the right candidates in the right jobs. We have more than 50 locally-managed branches throughout the United States that serve as trusted partners to businesses and job seekers.,0
17854,"(We have more than 1500+ Job openings in our website and some of them are relevant to this job. Feel free to search it in the website and apply directly. Just Click the “Apply Now” and you will redirect to our main website where you can search for the other jobs.)Job Responsibilities: The TMO Manager will be a “Hands on” Manager responsible for: Designing, building and automating talent management processes, metrics, tracking, and reporting capabilities.Developing rigorous analytical models that provide structure to ambiguous, complex issues.Reviewing data and communicating insights as a basis for talent management action to be utilized with key stakeholders including c-suite executive management.Evaluating Talent data collection process to ensure efficiency and high level of data integrity.Comparing the Company’s talent metrics against relevant industry benchmarks and identifying key areas for improvement.Conducting research, externally and internally to identify trends/benchmarks, implications for talent management analytics and recommend responses.Providing project and thought leadership in areas of subject matter expertise.Integrating analytics across multiple HR groups to ensure synergies in measurement and reporting.Managing annual TMO cycle.Managing highly confidential information.Overseeing and training junior analyst to ensure data accuracy and integrity.Education & Experience: 5+ to 7 years of experience Management Experience Required - No Minimum Education - Bachelor's Degree Willingness to Travel - Occasionally Visit - #URL_ec64af2b4fe2ca316e828f93b0cd098c22f8beba98dcac09d4dd7384b221a5e8#-#URL_2954b76adf23051d4bc35fc729f5a18c5de715ed82dfaee24b3aabbf3874ca57#",0
17874,"Sr, Java Script Developer Experience : 4-10 years Location : New York Experience in front end development Experience in using Javascript, HTML, HTML5, AJAX, JSON, javascript and CSS3. Familiar with at least one Javascript MVC framework like backbone, #URL_5543aa46fe37875fcc9e8b278ee02d4a18479d6d208cad289401987349eea211# , Canjs, or Ember JS. Strong experience accessing REST API's including third parties Facebook, Twitter, Pinterest etc... Knowledge of PHP and SQL databases",0


In [37]:
df2 = pd.read_csv("./Data/data_first_clean.csv", encoding='utf-8')

In [39]:
df2.loc[df2.description.duplicated()][['title','description','fraudulent']]

Unnamed: 0,title,description,fraudulent
40,English Teacher Abroad,"Play with kids, get paid for it Love travel? Jobs in Asia$1,500+ USD monthly ($200 Cost of living)Housing provided (Private/Furnished)Airfare Reimbursed Excellent for student loans/credit cards Gabriel Adkins : #URL_ed9094c60184b8a4975333957f05be37e69d3cdb68decc9dd9a4242733cfd7f7##URL_75db76d58f7994c7db24e8998c2fc953ab9a20ea9ac948b217693963f78d2e6b#12 month contract : Apply today",0
48,English Teacher Abroad,"Play with kids, get paid for it Love travel? Jobs in Asia$1,500+ USD monthly ($200 Cost of living)Housing provided (Private/Furnished)Airfare Reimbursed Excellent for student loans/credit cards Gabriel Adkins : #URL_ed9094c60184b8a4975333957f05be37e69d3cdb68decc9dd9a4242733cfd7f7##URL_75db76d58f7994c7db24e8998c2fc953ab9a20ea9ac948b217693963f78d2e6b#12 month contract : Apply today",0
56,Outside Sales Professional-Oronoco,"As an Outside Sales Representative, you must have excellent sales talents as well as the willingness to learn the ABC Supply sales approach. It will also be very helpful to your role as an Outside Sales Representative if you have existing knowledge of exterior building product lines such as siding, roofing, and windows. This will not only to lend credibility to your sales presentations, but also to assist the Branch Manager in the selection of new product lines. It is also vital for your role as an Outside Sales Representative that you have empathy for your customers and their needs, and always provide them with the best possible service.",0
62,English Teacher Abroad,"Play with kids, get paid for it Love travel? Jobs in Asia$1,500+ USD monthly ($200 Cost of living)Housing provided (Private/Furnished)Airfare Reimbursed Excellent for student loans/credit cards Gabriel Adkins : #URL_ed9094c60184b8a4975333957f05be37e69d3cdb68decc9dd9a4242733cfd7f7##URL_75db76d58f7994c7db24e8998c2fc953ab9a20ea9ac948b217693963f78d2e6b#12 month contract : Apply today",0
80,English Teacher Abroad,"Play with kids, get paid for it Love travel? Jobs in Asia$1,500+ USD monthly ($200 Cost of living)Housing provided (Private/Furnished)Airfare Reimbursed Excellent for student loans/credit cards Gabriel Adkins : #URL_ed9094c60184b8a4975333957f05be37e69d3cdb68decc9dd9a4242733cfd7f7##URL_75db76d58f7994c7db24e8998c2fc953ab9a20ea9ac948b217693963f78d2e6b#12 month contract : Apply today",0
...,...,...,...
17847,User Support Technician,"Perform basic workstation trouble analysis to include PC and cabling Maintain Windows Desktops, focusing on functionality, security, and patches Perform basic installation and troubleshooting of computer and associated operating systems, operating environments, application software, cable and peripheral equipment Create basic accounts to rollout new computers and services (new employee setup)Assist end users with general computer operation and desktop application software questions and problems Identify and resolve customer or internal system access issues (account lockouts, password resets, access setup, etc.)Assist in maintaining a network free of computer viruses, to include scanning and eliminating viruses on desktop computers Assist with hardware and software upgrades Assist with IT projects as required Administer hardware repairs Manage phone system – moves, adds and changes.Oversee printer maintenance and repair Other IT related responsibilities and tasks as they arise",0
17853,Call Center/Customer Service,At Command we care enough to consistently place the right candidates in the right jobs. We have more than 50 locally-managed branches throughout the United States that serve as trusted partners to businesses and job seekers.,0
17854,Talent Management Process Manager,"(We have more than 1500+ Job openings in our website and some of them are relevant to this job. Feel free to search it in the website and apply directly. Just Click the “Apply Now” and you will redirect to our main website where you can search for the other jobs.)Job Responsibilities: The TMO Manager will be a “Hands on” Manager responsible for: Designing, building and automating talent management processes, metrics, tracking, and reporting capabilities.Developing rigorous analytical models that provide structure to ambiguous, complex issues.Reviewing data and communicating insights as a basis for talent management action to be utilized with key stakeholders including c-suite executive management.Evaluating Talent data collection process to ensure efficiency and high level of data integrity.Comparing the Company’s talent metrics against relevant industry benchmarks and identifying key areas for improvement.Conducting research, externally and internally to identify trends/benchmarks, implications for talent management analytics and recommend responses.Providing project and thought leadership in areas of subject matter expertise.Integrating analytics across multiple HR groups to ensure synergies in measurement and reporting.Managing annual TMO cycle.Managing highly confidential information.Overseeing and training junior analyst to ensure data accuracy and integrity.Education & Experience: 5+ to 7 years of experience Management Experience Required - No Minimum Education - Bachelor's Degree Willingness to Travel - Occasionally Visit - #URL_ec64af2b4fe2ca316e828f93b0cd098c22f8beba98dcac09d4dd7384b221a5e8#-#URL_2954b76adf23051d4bc35fc729f5a18c5de715ed82dfaee24b3aabbf3874ca57#",0
17874,JavaScript Developer,"Sr, Java Script Developer Experience : 4-10 years Location : New York Experience in front end development Experience in using Javascript, HTML, HTML5, AJAX, JSON, javascript and CSS3. Familiar with at least one Javascript MVC framework like backbone, #URL_5543aa46fe37875fcc9e8b278ee02d4a18479d6d208cad289401987349eea211# , Canjs, or Ember JS. Strong experience accessing REST API's including third parties Facebook, Twitter, Pinterest etc... Knowledge of PHP and SQL databases",0


In [46]:
df2.loc[40]

job_id                                                                                                                                                                                                                                                                                                                                                                                                              41
title                                                                                                                                                                                                                                                                                                                                                                                          English Teacher Abroad 
location                                                                                                                                                                                  

In [47]:
df2.loc[48]

job_id                                                                                                                                                                                                                                                                                                                                                                                                              49
title                                                                                                                                                                                                                                                                                                                                                                                          English Teacher Abroad 
location                                                                                                                                                                                  