In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import numpy as np
import pandas as pd

In [None]:
salary_range = [60, 150]
experience_range = [1, 3]

In [5]:
df = pd.read_csv('./datasets/freelance-projects.csv')
df.head(3)

Unnamed: 0,Title,Category Name,Experience,Sub Category Name,Currency,Budget,Location,Freelancer Preferred From,Type,Date Posted,Description,Duration,Client Registration Date,Client City,Client Country,Client Currency,Client Job Title
0,Banner images for web desgin websites,Design,Entry ($),Graphic Design,EUR,60.0,remote,ALL,fixed_price,2023-04-29 18:06:39,We are looking to improve the banner images on...,,2010-11-03,Dublin,Ireland,EUR,PPC Management
1,Make my picture a solid silhouette,"Video, Photo & Image",Entry ($),Image Editing,GBP,20.0,remote,ALL,fixed_price,2023-04-29 17:40:28,Hello \n\nI need a quick designer to make 4 pi...,,2017-02-21,London,United Kingdom,GBP,Office manager
2,Bookkeeper needed,Business,Entry ($),Finance & Accounting,GBP,12.0,remote,ALL,fixed_price,2023-04-29 17:40:06,Hi - I need a bookkeeper to assist with bookke...,,2023-04-09,London,United Kingdom,GBP,Paralegal


In [7]:
df = df.drop(['Date Posted', 'Freelancer Preferred From'], axis=1)
df = df.filter(regex=r'^(?!.*Client).*$', axis=1)

experience_mapping = {'Entry ($)': 1, 'Intermediate ($$)': 2, 'Expert ($$$)': 3}
df['Experience'] = df['Experience'].map(experience_mapping)

df.head(3)

Unnamed: 0,Title,Category Name,Experience,Sub Category Name,Currency,Budget,Location,Type,Description,Duration
0,Banner images for web desgin websites,Design,1,Graphic Design,EUR,60.0,remote,fixed_price,We are looking to improve the banner images on...,
1,Make my picture a solid silhouette,"Video, Photo & Image",1,Image Editing,GBP,20.0,remote,fixed_price,Hello \n\nI need a quick designer to make 4 pi...,
2,Bookkeeper needed,Business,1,Finance & Accounting,GBP,12.0,remote,fixed_price,Hi - I need a bookkeeper to assist with bookke...,


In [10]:
input_words = ["python", 'SQL', 'sk-learn']

In [11]:
stop_words = set(stopwords.words('english'))
input_words = [word.lower() for word in input_words if word.lower() not in stop_words]

In [12]:
processed_job_descriptions = [" ".join([word.lower() for word in word_tokenize(desc) if word.lower() not in stop_words]) for desc in df['Description']]

In [None]:
vectorizer = TfidfVectorizer()
job_vectors = vectorizer.fit_transform(processed_job_descriptions)
input_vector = vectorizer.transform([" ".join(input_words)])

In [None]:
similarities = cosine_similarity(input_vector, job_vectors)
top_indices = np.argsort(similarities[0])[-10:][::-1]
top_jobs = df.loc[top_indices]
top_jobs

In [None]:
top_jobs = top_jobs[(top_jobs['Budget'] >= salary_range[0]) & (top_jobs['Budget'] <= salary_range[1])]
top_jobs

In [None]:
top_jobs = top_jobs[(top_jobs['Experience'] >= experience_range[0]) & (top_jobs['Experience'] <= experience_range[1])]
top_jobs