In [1]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import numpy as np
import pandas as pd

In [2]:
salary_range = [60, 150]
experience_range = [1, 3]

In [11]:
df = pd.read_csv('./datasets/freelance-projects.csv')
df.head(3)

Unnamed: 0,Title,Category Name,Experience,Sub Category Name,Currency,Budget,Location,Freelancer Preferred From,Type,Date Posted,Description,Duration,Client Registration Date,Client City,Client Country,Client Currency,Client Job Title
0,Banner images for web desgin websites,Design,Entry ($),Graphic Design,EUR,60.0,remote,ALL,fixed_price,2023-04-29 18:06:39,We are looking to improve the banner images on...,,2010-11-03,Dublin,Ireland,EUR,PPC Management
1,Make my picture a solid silhouette,"Video, Photo & Image",Entry ($),Image Editing,GBP,20.0,remote,ALL,fixed_price,2023-04-29 17:40:28,Hello \n\nI need a quick designer to make 4 pi...,,2017-02-21,London,United Kingdom,GBP,Office manager
2,Bookkeeper needed,Business,Entry ($),Finance & Accounting,GBP,12.0,remote,ALL,fixed_price,2023-04-29 17:40:06,Hi - I need a bookkeeper to assist with bookke...,,2023-04-09,London,United Kingdom,GBP,Paralegal


In [12]:
df = df.drop(['Date Posted', 'Freelancer Preferred From'], axis=1)
df = df.filter(regex=r'^(?!.*Client).*$', axis=1)

experience_mapping = {'Entry ($)': 1, 'Intermediate ($$)': 2, 'Expert ($$$)': 3}
df['Experience'] = df['Experience'].map(experience_mapping)

df.head(3)

Unnamed: 0,Title,Category Name,Experience,Sub Category Name,Currency,Budget,Location,Type,Description,Duration
0,Banner images for web desgin websites,Design,1,Graphic Design,EUR,60.0,remote,fixed_price,We are looking to improve the banner images on...,
1,Make my picture a solid silhouette,"Video, Photo & Image",1,Image Editing,GBP,20.0,remote,fixed_price,Hello \n\nI need a quick designer to make 4 pi...,
2,Bookkeeper needed,Business,1,Finance & Accounting,GBP,12.0,remote,fixed_price,Hi - I need a bookkeeper to assist with bookke...,


In [5]:
unique_categories = df['Category Name'].unique()
print(unique_categories)

['Design' 'Video, Photo & Image' 'Business' 'Digital Marketing'
 'Technology & Programming' 'Music & Audio' 'Social Media'
 'Marketing, Branding & Sales' 'Writing & Translation']


In [6]:
unique_sub_categories = df['Sub Category Name'].unique()
print(unique_sub_categories.shape)

(107,)


In [9]:
unique_type = df['Type'].unique()
print(unique_type)

['fixed_price' 'hourly']


In [13]:
input_words = ["python", 'SQL', 'sk-learn']

stop_words = set(stopwords.words('english'))
input_words = [word.lower() for word in input_words if word.lower() not in stop_words]

In [14]:
processed_job_descriptions = [" ".join([word.lower() for word in word_tokenize(desc) if word.lower() not in stop_words]) for desc in df['Description']]

In [15]:
vectorizer = TfidfVectorizer()
job_vectors = vectorizer.fit_transform(processed_job_descriptions)
input_vector = vectorizer.transform([" ".join(input_words)])

In [16]:
similarities = cosine_similarity(input_vector, job_vectors)
top_indices = np.argsort(similarities[0])[-10:][::-1]
top_jobs = df.loc[top_indices]
top_jobs

Unnamed: 0,Title,Category Name,Experience,Sub Category Name,Currency,Budget,Location,Type,Description,Duration
330,Connection between Python code and Laravel Pro...,Technology & Programming,1,Website Development,USD,10.0,remote,fixed_price,I need a web developer who can connect python ...,
12219,Simple SQL Query,Technology & Programming,1,Data Science & Analysis,GBP,50.0,remote,fixed_price,I need someone to write a quick SQL query on a...,
10378,PYTHON PROJECT,Technology & Programming,1,Data Science & Analysis,USD,30.0,remote,fixed_price,Hello\nHaving work related to python. There ar...,
4541,Convert VBA to Python,Technology & Programming,1,Programming & Coding,GBP,153.0,remote,fixed_price,HI \n\nI have a VBA script that I would like c...,
5552,Need to parameterize sql statements in website...,Technology & Programming,1,Programming & Coding,USD,140.0,remote,fixed_price,Hello all. I have a Sports website that is wri...,
4565,M2 interacting with an external SQL database,Technology & Programming,3,eCommerce CMS Development,GBP,264.0,remote,fixed_price,I would like to send all invoices from magento...,
6757,Got a complex system need a long term helper V...,Technology & Programming,1,Programming & Coding,USD,10.0,remote,hourly,We have a custom system built on VB and hosted...,
231,Python Django developer,Technology & Programming,3,Website Development,USD,320.0,remote,fixed_price,"Hi, I'm looking for a Python Django developer ...",
12216,Simple SQL Query,Technology & Programming,1,Databases,GBP,30.0,remote,fixed_price,I need someone to write a quick SQL query base...,
9831,Software developer for indian stock market,Technology & Programming,1,Programming & Coding,USD,50.0,remote,fixed_price,I am looking for freelance who can get tick by...,


In [17]:
top_jobs = top_jobs[(top_jobs['Budget'] >= salary_range[0]) & (top_jobs['Budget'] <= salary_range[1])]
top_jobs

Unnamed: 0,Title,Category Name,Experience,Sub Category Name,Currency,Budget,Location,Type,Description,Duration
5552,Need to parameterize sql statements in website...,Technology & Programming,1,Programming & Coding,USD,140.0,remote,fixed_price,Hello all. I have a Sports website that is wri...,


In [18]:
top_jobs = top_jobs[(top_jobs['Experience'] >= experience_range[0]) & (top_jobs['Experience'] <= experience_range[1])]
top_jobs

Unnamed: 0,Title,Category Name,Experience,Sub Category Name,Currency,Budget,Location,Type,Description,Duration
5552,Need to parameterize sql statements in website...,Technology & Programming,1,Programming & Coding,USD,140.0,remote,fixed_price,Hello all. I have a Sports website that is wri...,
