# MFDS_RS_Jobs

In [1]:
#!/usr/bin/env python
# coding: utf-8

In [2]:
import pandas as pd
import numpy as np
from datetime import date
import glob
import os
from os import path, listdir

import re
import nltk
#nltk.download('stopwords')
#nltk.download('wordnet')
from nltk.stem.porter import PorterStemmer
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [3]:
MFDS_dir = 'data/mfds/ds/'

In [4]:
tfdif = TfidfVectorizer(stop_words='english')

def recommend_jobs(search: str, item_count: int = 30) -> pd.DataFrame:
    jobs_list = pd.concat(
        [pd.Series([search]), data['Summary']],
        ignore_index=True
    )
    description_matrix = tfdif.fit_transform(jobs_list)
    similarity_matrix = linear_kernel(description_matrix)

    job_index = 0

    similarity_score = list(enumerate(similarity_matrix[job_index]))
    similarity_score = sorted(similarity_score, key=lambda x: x[1], reverse=True)
    similarity_score = similarity_score[1:item_count + 1]

    job_indices = [i[0] for i in similarity_score]
    return data.iloc[job_indices]

In [5]:
def load_data() -> pd.DataFrame:
    csv_files = [path.join(MFDS_dir, csv) for csv in listdir(MFDS_dir)]
    df = pd.concat(
        map(lambda csv: pd.read_csv(csv, index_col=0, encoding='latin1'), csv_files),
        #map(lambda csv: pd.read_csv(csv,  encoding='latin1'), csv_files),
        ignore_index=True
    )

    df['Summary'] = df['Summary'].fillna('')

    df.drop('PostDate', inplace=True, axis=1)

    return df

In [6]:
data = load_data()

data

Unnamed: 0,JobTitle,Company,Location,Summary,url,category
0,Becario (a) Tableau Data Science,Turing Inteligencia Artificial,Mexico,DESCRIPCIÃâN DEL EMPLEO Somos una empresa C...,https://mx.indeed.com/company/Turing-Inteligen...,datascience
1,Data Scientist / Intern to Full time,Framework Science,Mexico,Interest in starting a professional career as ...,https://mx.indeed.com/rc/clk?jk=99945198460f78...,datascience
2,Data Science Graduate (Mexico),Novo Nordisk,Mexico,About the Data Science Graduate Programme*.\nT...,https://mx.indeed.com/rc/clk?jk=902e67a80cbd4d...,datascience
3,Data Analyst-1,S&P Global,Mexico,"3+ years of analytics, business intelligence, ...",https://mx.indeed.com/rc/clk?jk=902e67a80cbd4d...,datascience
4,Account Manager,Club Feast,Mexico,Create personas through user research and data...,https://mx.indeed.com/rc/clk?jk=902e67a80cbd4d...,datascience
...,...,...,...,...,...,...
1717,Systems Security Engineer,LOCKHEED MARTIN CORPORATION,"Moorestown, NJ",Active DoD security clearance at the TOP SECRE...,https://www.indeed.com/rc/clk?jk=05b9fb746a946...,cybersecurity
1718,"Internal Audit Manager, IT","DeKalb County, GA","DeKalb County, GA",This is a Merit-Exempt position and serves at ...,https://www.indeed.com/rc/clk?jk=e3f0c47104471...,cybersecurity
1719,Cloud Security Controls Engineer - Associate,Morgan Stanley,"Baltimore, MD",The candidate will be involved in multiple asp...,https://www.indeed.com/rc/clk?jk=abeba15d9461e...,cybersecurity
1720,Enterprise Security - Audit and Compliance Ana...,MIB Group,"Braintree, MA",Review purchase requests for security implicat...,https://www.indeed.com/rc/clk?jk=fcd395fa340c7...,cybersecurity


In [7]:
results = recommend_jobs("security")

results

Unnamed: 0,JobTitle,Company,Location,Summary,url,category
1706,"Application Security Engineer, Enterprise Engi...",Facebook,"New York, NY",Knowledge of internet security issues. Conduct...,https://www.indeed.com/rc/clk?jk=6d5ae6f611fb6...,cybersecurity
1548,Cyber Security Officer,Leidos,"Hampton, VA",Assessing system security threats/risks. Valid...,https://www.indeed.com/rc/clk?jk=b5ce377a98f80...,cybersecurity
1332,ICBM Mid-Level Cyber Security Engineer,BOEING,"Hill AFB, UT",Perform and publish security vulnerability ass...,https://www.indeed.com/rc/clk?jk=4ddb8e9bc5cdb...,cybersecurity
1397,Cyber Information Systems Security Officer,LOCKHEED MARTIN CORPORATION,"Annapolis Junction, MD",Evaluate security solutions to ensure they mee...,https://www.indeed.com/rc/clk?jk=6f2c3141d7e7f...,cybersecurity
1335,SOC Analyst - Philadelphia area,Accenture,"Washington, DC",You have the experience to analyze a clientsÃ¢...,https://www.indeed.com/rc/clk?jk=398d4cb2b8bcc...,cybersecurity
1410,Sr. Incident Handler (Remote US Available) - 1...,Splunk,"San Francisco, CA",2 years or more of experience leading the resp...,https://www.indeed.com/rc/clk?jk=8ea9700a08c61...,cybersecurity
1447,Cyber Watch Analyst - 1st/2nd shift,Cornerstone Defense,United States,An additional 4 years of experience in a cyber...,https://www.indeed.com/rc/clk?jk=a3b2c4983bad2...,cybersecurity
1320,IT Analyst I,Red Robin,"Greenwood Village, CO",Red Robin is taking extra measures to keep our...,https://www.indeed.com/rc/clk?jk=62cac36e7f0b0...,cybersecurity
1685,IT Governance Analyst,Alliance Data,"Columbus, OH",The Retail IT Governance is a group responsibl...,https://www.indeed.com/rc/clk?jk=05cee348d8561...,cybersecurity
1714,Sr Information Security Specialist (US),TD Bank,"New York, NY",Advanced and highly-specialized knowledge of t...,https://www.indeed.com/rc/clk?jk=acbf52e59cd75...,cybersecurity
