## Cleaning Resume and Job Posts Data for use by Job Recommender System 

### Datasets
* https://www.kaggle.com/maitrip/resumes; 1219 resumes; ID, category, text description
* https://www.kaggle.com/samdeeplearning/deepnlp; 125 resumes; may drop this one
* https://www.kaggle.com/madhab/jobposts/data; 19,000 job posts from 2004-2015.




Imports

In [87]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import re
import nltk
import spacy
import string

First, start with resume dataset

In [88]:
#resume_path = '/Users/richardkuzma/coding/NLP_projects/job_recommender_project/data/resume_dataset_maitrip.csv'
# that's my home path

lightsail_resume_path = '/home/ec2-user/NLP_projects/job_recommender_project/data/resume_dataset_maitrip.csv'
local_resume_path = '/Users/richardkuzma/coding/NLP_projects/job_recommender_project/data/resume_dataset_maitrip.csv'


resume_path = local_resume_path



In [89]:
df = pd.read_csv(resume_path)
df.head()

Unnamed: 0,ID,Category,Resume
0,1,HR,"b'John H. Smith, P.H.R.\n800-991-5187 | PO Box..."
1,2,HR,b'Name Surname\nAddress\nMobile No/Email\nPERS...
2,3,HR,b'Anthony Brown\nHR Assistant\nAREAS OF EXPERT...
3,4,HR,b'www.downloadmela.com\nSatheesh\nEMAIL ID:\nC...
4,5,HR,"b""HUMAN RESOURCES DIRECTOR\n\xef\x82\xb7Expert..."


In [90]:
#what does one resume look like
df.Resume[0]

"b'John H. Smith, P.H.R.\\n800-991-5187 | PO Box 1673 | Callahan, FL 32011 | info@greatresumesfast.com\\n\\nApproachable innovator with a passion for Human Resources.\\n\\nSENIOR HUMAN RESOURCES PROFESSIONAL\\nPersonable, analytical, flexible Senior HR Professional with multifaceted expertise. Seasoned Benefits Administrator with\\nextensive experience working with highly paid professionals in client-relationship-based settings. Dynamic team leader\\ncapable of analyzing alternatives and identifying tough choices while communicating the total value of benefit and\\ncompensation packages to senior level executives and employees.\\n\\nCORE COMPETENCIES\\nBenefits Administration \\xe2\\x80\\x93 Customer Service \\xe2\\x80\\x93 Cost Control \\xe2\\x80\\x93 Recruiting \\xe2\\x80\\x93 Acquisition Management \\xe2\\x80\\x93 Compliance Reporting\\nRetention \\xe2\\x80\\x93 Professional Services \\xe2\\x80\\x93 Domestic & International Benefits \\xe2\\x80\\x93 Collaboration \\xe2\\x80\\x93 Adap

<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>

## Text Preprocessing

https://www.kaggle.com/sudalairajkumar/getting-started-with-text-preprocessing

In [91]:
df["resume"] = df["Resume"]
df.drop(["Resume"], axis=1, inplace=True)
df.head()

Unnamed: 0,ID,Category,resume
0,1,HR,"b'John H. Smith, P.H.R.\n800-991-5187 | PO Box..."
1,2,HR,b'Name Surname\nAddress\nMobile No/Email\nPERS...
2,3,HR,b'Anthony Brown\nHR Assistant\nAREAS OF EXPERT...
3,4,HR,b'www.downloadmela.com\nSatheesh\nEMAIL ID:\nC...
4,5,HR,"b""HUMAN RESOURCES DIRECTOR\n\xef\x82\xb7Expert..."


In [92]:
df.head()

Unnamed: 0,ID,Category,resume
0,1,HR,"b'John H. Smith, P.H.R.\n800-991-5187 | PO Box..."
1,2,HR,b'Name Surname\nAddress\nMobile No/Email\nPERS...
2,3,HR,b'Anthony Brown\nHR Assistant\nAREAS OF EXPERT...
3,4,HR,b'www.downloadmela.com\nSatheesh\nEMAIL ID:\nC...
4,5,HR,"b""HUMAN RESOURCES DIRECTOR\n\xef\x82\xb7Expert..."


In [93]:
#remove unicode
def unicodetoascii(text):

    TEXT = (text.
    		replace('\\xe2\\x80\\x99', "'").
            replace('\\xc3\\xa9', 'e').
            replace('\\xe2\\x80\\x90', '-').
            replace('\\xe2\\x80\\x91', '-').
            replace('\\xe2\\x80\\x92', '-').
            replace('\\xe2\\x80\\x93', '-').
            replace('\\xe2\\x80\\x94', '-').
            replace('\\xe2\\x80\\x94', '-').
            replace('\\xe2\\x80\\x98', "'").
            replace('\\xe2\\x80\\x9b', "'").
            replace('\\xe2\\x80\\x9c', '"').
            replace('\\xe2\\x80\\x9c', '"').
            replace('\\xe2\\x80\\x9d', '"').
            replace('\\xe2\\x80\\x9e', '"').
            replace('\\xe2\\x80\\x9f', '"').
            replace('\\xe2\\x80\\xa6', '...').
            replace('\\xe2\\x80\\xb2', "'").
            replace('\\xe2\\x80\\xb3', "'").
            replace('\\xe2\\x80\\xb4', "'").
            replace('\\xe2\\x80\\xb5', "'").
            replace('\\xe2\\x80\\xb6', "'").
            replace('\\xe2\\x80\\xb7', "'").
            replace('\\xe2\\x81\\xba', "+").
            replace('\\xe2\\x81\\xbb', "-").
            replace('\\xe2\\x81\\xbc', "=").
            replace('\\xe2\\x81\\xbd', "(").
            replace('\\xe2\\x81\\xbe', ")").replace('\\xef\\x81\\xb3', "").replace('\\n', ' ').replace('xefxxb', ' ')
             )
    return TEXT

In [94]:
#remove punctuation
PUNCT_TO_REMOVE = string.punctuation
def remove_punctuation(text):
    """custom function to remove the punctuation"""
    return text.translate(str.maketrans('', '', PUNCT_TO_REMOVE))

In [95]:
#remove stopwords
nltk.download('stopwords')
from nltk.corpus import stopwords
STOPWORDS = set(stopwords.words('english'))
def remove_stopwords(text):
    """custom function to remove the stopwords"""
    return " ".join([word for word in str(text).split() if word not in STOPWORDS])


[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/richardkuzma/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [96]:
#remove URLs
def remove_urls(text):
    url_pattern = re.compile(r'https?://\S+|www\.\S+')
    return url_pattern.sub(r'', text)

In [97]:
#remove HTML
def remove_html(text):
    html_pattern = re.compile('<.*?>')
    return html_pattern.sub(r'', text)

In [98]:
#remove digits
DIGITS_TO_REMOVE = "0123456789"
def remove_digits(text):
    return text.translate(str.maketrans('', '', DIGITS_TO_REMOVE))

In [99]:
#remove endlines
END_LINES = '\r\n'
def remove_endlines(text):
    """custom function to remove \r\n for end of line"""
    return text.replace(END_LINES, ' ')

In [100]:
#remove hyphens
#use this before remove_punctuation, becuase words like 'detail-oriented' get mushed
#together by remove_punctuation, which replaces '-' with ''
def remove_hyphens(text):
    """custom function to remove \r\n for end of line"""
    return text.replace('-', ' ')

In [101]:
# df["resume"] = df["resume"].astype(str).str.lower()
# df['resume'] = df['resume'].apply(remove_urls)
# df['resume'] = df['resume'].apply(remove_html)
# df['resume'] = df["resume"].apply(unicodetoascii)
# df['resume'] = df['resume'].apply(remove_hyphens)
# df['resume'] = df['resume'].apply(remove_endlines)
# df['resume'] = df['resume'].apply(remove_punctuation)
# df['resume'] = df['resume'].apply(remove_stopwords)
# df['resume'] = df['resume'].apply(remove_digits)
# df['resume'] = df['resume'].str.slice_replace(0,1, '') #remove the b at the beginning

In [102]:
# one cleaning method to rule them all
def full_clean(dataframe, string):
    dataframe[string] = dataframe[string].astype(str).str.lower()
    dataframe[string] = dataframe[string].apply(remove_urls)
    dataframe[string] = dataframe[string].apply(remove_html)
    dataframe[string] = dataframe[string].apply(unicodetoascii)
    dataframe[string] = dataframe[string].apply(remove_hyphens)
    dataframe[string] = dataframe[string].apply(remove_endlines)
    dataframe[string] = dataframe[string].apply(remove_punctuation)
    dataframe[string] = dataframe[string].apply(remove_stopwords)
    dataframe[string] = dataframe[string].apply(remove_digits)
    
    return dataframe

In [103]:
df = full_clean(df, 'resume')

In [105]:
# get rid of the byte signifier 'b' at beginning
df['resume'] = df['resume'].str.slice_replace(0,1, '') 

In [106]:
df.head()

Unnamed: 0,ID,Category,resume
0,1,HR,john h smith phr po box callahan fl infog...
1,2,HR,name surname address mobile noemail personal p...
2,3,HR,anthony brown hr assistant areas expertise per...
3,4,HR,id career objective pursue growth oriented ca...
4,5,HR,human resources director xefxxbexpert organiza...


<br>
<br>
<br>
<br>
<br>
<br>

# Clean Job Posts Data

In [107]:
lightsail_jobs_path = '/home/ec2-user/NLP_projects/job_recommender_project/data/job_posts_madhab.csv'
local_jobs_path = '/Users/richardkuzma/coding/NLP_projects/job_recommender_project/data/job_posts_madhab.csv'



jobs_path = local_jobs_path

In [108]:
#load and take first look at job data
jobs = pd.read_csv(jobs_path)
jobs.head()

Unnamed: 0,jobpost,date,Title,Company,AnnouncementCode,Term,Eligibility,Audience,StartDate,Duration,...,Salary,ApplicationP,OpeningDate,Deadline,Notes,AboutC,Attach,Year,Month,IT
0,AMERIA Investment Consulting Company\r\nJOB TI...,"Jan 5, 2004",Chief Financial Officer,AMERIA Investment Consulting Company,,,,,,,...,,"To apply for this position, please submit a\r\...",,26 January 2004,,,,2004,1,False
1,International Research & Exchanges Board (IREX...,"Jan 7, 2004",Full-time Community Connections Intern (paid i...,International Research & Exchanges Board (IREX),,,,,,3 months,...,,Please submit a cover letter and resume to:\r\...,,12 January 2004,,The International Research & Exchanges Board (...,,2004,1,False
2,Caucasus Environmental NGO Network (CENN)\r\nJ...,"Jan 7, 2004",Country Coordinator,Caucasus Environmental NGO Network (CENN),,,,,,Renewable annual contract\r\nPOSITION,...,,Please send resume or CV toursula.kazarian@......,,20 January 2004\r\nSTART DATE: February 2004,,The Caucasus Environmental NGO Network is a\r\...,,2004,1,False
3,Manoff Group\r\nJOB TITLE: BCC Specialist\r\n...,"Jan 7, 2004",BCC Specialist,Manoff Group,,,,,,,...,,Please send cover letter and resume to Amy\r\n...,,23 January 2004\r\nSTART DATE: Immediate,,,,2004,1,False
4,Yerevan Brandy Company\r\nJOB TITLE: Software...,"Jan 10, 2004",Software Developer,Yerevan Brandy Company,,,,,,,...,,Successful candidates should submit\r\n- CV; \...,,"20 January 2004, 18:00",,,,2004,1,True


In [109]:
#far more columns than we need
jobs.columns

Index(['jobpost', 'date', 'Title', 'Company', 'AnnouncementCode', 'Term',
       'Eligibility', 'Audience', 'StartDate', 'Duration', 'Location',
       'JobDescription', 'JobRequirment', 'RequiredQual', 'Salary',
       'ApplicationP', 'OpeningDate', 'Deadline', 'Notes', 'AboutC', 'Attach',
       'Year', 'Month', 'IT'],
      dtype='object')

In [110]:
#drop columns. Kept Title, Description, Requirement, and Quals
#19001 rows x 5 cols
jobs.drop(['jobpost', 'date', 'AnnouncementCode', 'Term', 'Eligibility',
       'Audience', 'StartDate', 'Duration', 'Location',
       'Salary', 'ApplicationP', 'OpeningDate', 'Deadline', 'Notes', 'AboutC', 
       'Attach', 'Year', 'Month', 'IT'], axis=1, inplace=True)

#renaming 
jobs['JobRequirement'] = jobs['JobRequirment']
jobs.drop(['JobRequirment'], axis=1, inplace=True)
jobs

Unnamed: 0,Title,Company,JobDescription,RequiredQual,JobRequirement
0,Chief Financial Officer,AMERIA Investment Consulting Company,AMERIA Investment Consulting Company is seekin...,"To perform this job successfully, an\r\nindivi...",- Supervises financial management and administ...
1,Full-time Community Connections Intern (paid i...,International Research & Exchanges Board (IREX),,- Bachelor's Degree; Master's is preferred;\r\...,
2,Country Coordinator,Caucasus Environmental NGO Network (CENN),Public outreach and strengthening of a growing...,"- Degree in environmentally related field, or ...",- Working with the Country Director to provide...
3,BCC Specialist,Manoff Group,The LEAD (Local Enhancement and Development fo...,"- Advanced degree in public health, social sci...",- Identify gaps in knowledge and overseeing in...
4,Software Developer,Yerevan Brandy Company,,- University degree; economical background is ...,- Rendering technical assistance to Database M...
...,...,...,...,...,...
18996,Senior Creative UX/ UI Designer,Technolinguistics NGO,A tech startup of Technolinguistics based in N...,- At least 5 years of experience in Interface/...,- Work closely with product and business teams...
18997,Category Development Manager,"""Coca-Cola Hellenic Bottling Company Armenia"" ...",,"- University degree, ideally business related;...",- Establish and manage Category Management dev...
18998,Operational Marketing Manager,"""Coca-Cola Hellenic Bottling Company Armenia"" ...",,"- Degree in Business, Marketing or a related f...","- Develop, establish and maintain marketing st..."
18999,Head of Online Sales Department,San Lazzaro LLC,San Lazzaro LLC is looking for a well-experien...,- At least 1 year of experience in online sale...,- Handle the project activites of the online s...


In [111]:
#drop any rows with NaN
#13124 rows x 5 cols from 19001 rows x 5 cols
jobs.dropna(inplace=True)
jobs

Unnamed: 0,Title,Company,JobDescription,RequiredQual,JobRequirement
0,Chief Financial Officer,AMERIA Investment Consulting Company,AMERIA Investment Consulting Company is seekin...,"To perform this job successfully, an\r\nindivi...",- Supervises financial management and administ...
2,Country Coordinator,Caucasus Environmental NGO Network (CENN),Public outreach and strengthening of a growing...,"- Degree in environmentally related field, or ...",- Working with the Country Director to provide...
3,BCC Specialist,Manoff Group,The LEAD (Local Enhancement and Development fo...,"- Advanced degree in public health, social sci...",- Identify gaps in knowledge and overseeing in...
13,"Community Development, Capacity Building and C...",Food Security Regional Cooperation and Stabili...,Food Security Regional Cooperation and Stabili...,- Higher Education and/or professional experie...,- Assist the Tavush Marz communities and commu...
17,Country Economist (NOB),"United Nations Development Programme, Armenia",The United Nations Development Programme in Ar...,- Minimum Masters Degree in Economics;\r\n- Mi...,The incumbent under direct supervision of UNDP...
...,...,...,...,...,...
18992,.NET Developer,Macadamian AR CJSC,The incumbent will develop software applicatio...,- 2 - 5 years of experience in software develo...,- Participate in all the steps of the software...
18995,Deputy Director,"""Transport PIU"" State Institution of the RA Mi...",The incumbent will be responsible for supporti...,"- University degree in Civil Engineering, Econ...",- Support the Director in organizing the activ...
18996,Senior Creative UX/ UI Designer,Technolinguistics NGO,A tech startup of Technolinguistics based in N...,- At least 5 years of experience in Interface/...,- Work closely with product and business teams...
18999,Head of Online Sales Department,San Lazzaro LLC,San Lazzaro LLC is looking for a well-experien...,- At least 1 year of experience in online sale...,- Handle the project activites of the online s...


In [112]:
# reset the indexes after dropping NaN values
# give explicit column called 'label' for index
jobs.set_index(np.arange(len(jobs['Title'])), inplace=True)
for i in jobs_copy['Title']:
    jobs['label'] = jobs['Title'].index

Descriptions, Requirements, Required Quals

In [113]:
#what does a job description look like
jobs['JobDescription'][0]

"AMERIA Investment Consulting Company is seeking a\r\nChief Financial Officer. This position manages the company's fiscal and\r\nadministrative functions, provides highly responsible and technically\r\ncomplex staff assistance to the Executive Director. The work performed\r\nrequires a high level of technical proficiency in financial management\r\nand investment management, as well as management, supervisory, and\r\nadministrative skills."

In [114]:
#what does a job requirement look like
jobs['JobRequirement'][0]

"- Supervises financial management and administrative staff, including\r\nassigning responsibilities, reviewing employees' work processes and\r\nproducts, counseling employees, giving performance evaluations, and\r\nrecommending disciplinary action;\r\n- Serves as member of management team participating in both strategic\r\nand operational planning for the company;\r\n- Directs and oversees the company's financial management activities,\r\nincluding establishing and monitoring internal controls, managing cash\r\nand investments, and managing the investment portfolio in collaboration\r\nwith the Investment team leader. This includes, but is not limited to,\r\nevaluation of investment risk, concentration risk, fund deployment\r\nlevels, adequacy of loss and liquidity reserves Assists investment team\r\nin development of proper documentation and internal systems;\r\n- Directs and oversees the annual budgeting process, including\r\ndeveloping projections for financial planning, and prepari

In [115]:
#what do the required quals look like?
jobs['RequiredQual'][0]

"To perform this job successfully, an\r\nindividual must be able to perform each essential duty satisfactorily.\r\nThe requirements listed below are representative of the knowledge,\r\nskill, and/or ability required.\r\nKnowledge of:\r\n- Generally accepted accounting principles;\r\n- Local accounting standards and legislation;\r\n- State reporting requirements pertaining to accounting;\r\n- Principles and practices of financial management and budgeting;\r\n- Principles and practices of financial systems design and analysis;\r\n- Principles and practices of contract management, records management,\r\nand risk management;\r\n- Principles and practices of management and supervision;\r\n- Principles and practices of information systems management.\r\nAbility to:\r\n- Apply sound fiscal and administrative practices to the company's\r\nactivities;\r\n- Plan, organize and supervise the work of subordinate employees,\r\nincluding training them, assigning and evaluating their work, and\r\nprov

In [116]:
#make new 'combined' column, concatenate strings from 
# Title, JobDescription, JobRequirement, RequiredQuals
jobs['combined'] = jobs['Title'].str.cat(jobs['JobDescription'], sep=' ').str.cat(jobs['JobRequirement'], sep=' ').str.cat(jobs['RequiredQual'], sep =' ')



In [117]:
jobs['combined'][0]

"Chief Financial Officer AMERIA Investment Consulting Company is seeking a\r\nChief Financial Officer. This position manages the company's fiscal and\r\nadministrative functions, provides highly responsible and technically\r\ncomplex staff assistance to the Executive Director. The work performed\r\nrequires a high level of technical proficiency in financial management\r\nand investment management, as well as management, supervisory, and\r\nadministrative skills. - Supervises financial management and administrative staff, including\r\nassigning responsibilities, reviewing employees' work processes and\r\nproducts, counseling employees, giving performance evaluations, and\r\nrecommending disciplinary action;\r\n- Serves as member of management team participating in both strategic\r\nand operational planning for the company;\r\n- Directs and oversees the company's financial management activities,\r\nincluding establishing and monitoring internal controls, managing cash\r\nand investments,

In [118]:
# jobs["combined"] = jobs["combined"].astype(str).str.lower()
# jobs['combined'] = jobs['combined'].apply(remove_urls)
# jobs['combined'] = jobs['combined'].apply(remove_html)
# jobs['combined'] = jobs["combined"].apply(unicodetoascii)
# jobs['combined'] = jobs['combined'].apply(remove_hyphens)
# jobs['combined'] = jobs['combined'].apply(remove_endlines)
# jobs['combined'] = jobs['combined'].apply(remove_punctuation)
# jobs['combined'] = jobs['combined'].apply(remove_stopwords)
# jobs['combined'] = jobs['combined'].apply(remove_digits)

In [119]:
jobs = full_clean(jobs, 'combined')

In [120]:
jobs.head()

Unnamed: 0,Title,Company,JobDescription,RequiredQual,JobRequirement,label,combined
0,Chief Financial Officer,AMERIA Investment Consulting Company,AMERIA Investment Consulting Company is seekin...,"To perform this job successfully, an\r\nindivi...",- Supervises financial management and administ...,0,chief financial officer ameria investment cons...
1,Country Coordinator,Caucasus Environmental NGO Network (CENN),Public outreach and strengthening of a growing...,"- Degree in environmentally related field, or ...",- Working with the Country Director to provide...,1,country coordinator public outreach strengthen...
2,BCC Specialist,Manoff Group,The LEAD (Local Enhancement and Development fo...,"- Advanced degree in public health, social sci...",- Identify gaps in knowledge and overseeing in...,2,bcc specialist lead local enhancement developm...
3,"Community Development, Capacity Building and C...",Food Security Regional Cooperation and Stabili...,Food Security Regional Cooperation and Stabili...,- Higher Education and/or professional experie...,- Assist the Tavush Marz communities and commu...,3,community development capacity building confli...
4,Country Economist (NOB),"United Nations Development Programme, Armenia",The United Nations Development Programme in Ar...,- Minimum Masters Degree in Economics;\r\n- Mi...,The incumbent under direct supervision of UNDP...,4,country economist nob united nations developme...


In [121]:
jobs['combined'][0]

'chief financial officer ameria investment consulting company seeking chief financial officer position manages companys fiscal administrative functions provides highly responsible technically complex staff assistance executive director work performed requires high level technical proficiency financial management investment management well management supervisory administrative skills supervises financial management administrative staff including assigning responsibilities reviewing employees work processes products counseling employees giving performance evaluations recommending disciplinary action serves member management team participating strategic operational planning company directs oversees companys financial management activities including establishing monitoring internal controls managing cash investments managing investment portfolio collaboration investment team leader includes limited evaluation investment risk concentration risk fund deployment levels adequacy loss liquidity

<br>
<br>
<br>
<br>
<br>

## Save cleaned data to CSV

In [125]:
df.head()

Unnamed: 0,ID,Category,resume
0,1,HR,john h smith phr po box callahan fl infog...
1,2,HR,name surname address mobile noemail personal p...
2,3,HR,anthony brown hr assistant areas expertise per...
3,4,HR,id career objective pursue growth oriented ca...
4,5,HR,human resources director xefxxbexpert organiza...


In [137]:
jobs.head()

Unnamed: 0,Title,Company,JobDescription,RequiredQual,JobRequirement,label,combined
0,Chief Financial Officer,AMERIA Investment Consulting Company,AMERIA Investment Consulting Company is seekin...,"To perform this job successfully, an\r\nindivi...",- Supervises financial management and administ...,1,chief financial officer ameria investment cons...
1,Country Coordinator,Caucasus Environmental NGO Network (CENN),Public outreach and strengthening of a growing...,"- Degree in environmentally related field, or ...",- Working with the Country Director to provide...,2,country coordinator public outreach strengthen...
2,BCC Specialist,Manoff Group,The LEAD (Local Enhancement and Development fo...,"- Advanced degree in public health, social sci...",- Identify gaps in knowledge and overseeing in...,3,bcc specialist lead local enhancement developm...
3,"Community Development, Capacity Building and C...",Food Security Regional Cooperation and Stabili...,Food Security Regional Cooperation and Stabili...,- Higher Education and/or professional experie...,- Assist the Tavush Marz communities and commu...,4,community development capacity building confli...
4,Country Economist (NOB),"United Nations Development Programme, Armenia",The United Nations Development Programme in Ar...,- Minimum Masters Degree in Economics;\r\n- Mi...,The incumbent under direct supervision of UNDP...,5,country economist nob united nations developme...


In [None]:
# already ran this, but deleted cell
# used to make sure job labels start at 1 not 0
jobs['label'] = jobs['label'] +1 

In [126]:
# save resumes to csv
lightsail_resume_cleaned_path = r'/home/ec2-user/NLP_projects/job_recommender_project/data/cleaned_resume_dataset_maitrip.csv'
local_resume_cleaned_path = r'/Users/richardkuzma/coding/NLP_projects/job_recommender_project/data/cleaned_resume_dataset_maitrip.csv'

df.to_csv(local_resume_cleaned_path, index=False)


In [139]:
# save jobs to csv
lightsail_jobs_cleaned_path = '/home/ec2-user/NLP_projects/job_recommender_project/data/cleaned_job_posts_madhab.csv'
local_jobs_cleaned_path = '/Users/richardkuzma/coding/NLP_projects/job_recommender_project/data/cleaned_job_posts_madhab.csv'

jobs.to_csv(local_jobs_cleaned_path, index=False)


#### Still need to get rid of the 'xefxxb' in resumes