## Import dependencies

In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import ast 
from scipy import stats
from ast import literal_eval
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
from sklearn.model_selection import train_test_split

import warnings; warnings.simplefilter('ignore')

## Load dataset

In [3]:
!ls naukri_com-job_sample.csv

naukri_com-job_sample.csv


In [4]:
data = pd.read_csv('naukri_com-job_sample.csv')

In [5]:
data.head()

Unnamed: 0,company,education,experience,industry,jobdescription,jobid,joblocation_address,jobtitle,numberofpositions,payrate,postdate,site_name,skills,uniq_id
0,MM Media Pvt Ltd,UG: B.Tech/B.E. - Any Specialization PG:Any Po...,0 - 1 yrs,Media / Entertainment / Internet,Job Description Send me Jobs like this Quali...,210516002263,Chennai,Walkin Data Entry Operator (night Shift),,"1,50,000 - 2,25,000 P.A",2016-05-21 19:30:00 +0000,,ITES,43b19632647068535437c774b6ca6cf8
1,find live infotech,UG: B.Tech/B.E. - Any Specialization PG:MBA/PG...,0 - 0 yrs,Advertising / PR / MR / Event Management,Job Description Send me Jobs like this Quali...,210516002391,Chennai,Work Based Onhome Based Part Time.,60.0,"1,50,000 - 2,50,000 P.A. 20000",2016-05-21 19:30:00 +0000,,Marketing,d4c72325e57f89f364812b5ed5a795f0
2,Softtech Career Infosystem Pvt. Ltd,UG: Any Graduate - Any Specialization PG:Any P...,4 - 8 yrs,IT-Software / Software Services,Job Description Send me Jobs like this - as ...,101016900534,Bengaluru,Pl/sql Developer - SQL,,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,IT Software - Application Programming,c47df6f4cfdf5b46f1fd713ba61b9eba
3,Onboard HRServices LLP,UG: Any Graduate - Any Specialization PG:CA Do...,11 - 15 yrs,Banking / Financial Services / Broking,Job Description Send me Jobs like this - Inv...,81016900536,"Mumbai, Bengaluru, Kolkata, Chennai, Coimbator...",Manager/ad/partner - Indirect Tax - CA,,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,Accounts,115d28f140f694dd1cc61c53d03c66ae
4,Spire Technologies and Solutions Pvt. Ltd.,UG: B.Tech/B.E. - Any Specialization PG:Any Po...,6 - 8 yrs,IT-Software / Software Services,Job Description Send me Jobs like this Pleas...,120916002122,Bengaluru,JAVA Technical Lead (6-8 yrs) -,4.0,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,IT Software - Application Programming,a12553fc03bc7bcced8b1bb8963f97b4


In [6]:
data.columns

Index(['company', 'education', 'experience', 'industry', 'jobdescription',
       'jobid', 'joblocation_address', 'jobtitle', 'numberofpositions',
       'payrate', 'postdate', 'site_name', 'skills', 'uniq_id'],
      dtype='object')

In [7]:
data.shape

(22000, 14)

In [8]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22000 entries, 0 to 21999
Data columns (total 14 columns):
company                21996 non-null object
education              20004 non-null object
experience             21996 non-null object
industry               21995 non-null object
jobdescription         21996 non-null object
jobid                  22000 non-null int64
joblocation_address    21499 non-null object
jobtitle               22000 non-null object
numberofpositions      4464 non-null float64
payrate                21903 non-null object
postdate               21977 non-null object
site_name              3987 non-null object
skills                 21472 non-null object
uniq_id                22000 non-null object
dtypes: float64(1), int64(1), object(12)
memory usage: 2.3+ MB


## Pre-processing data

In [9]:
df = data[data['industry'] == "IT-Software / Software Services"]

In [19]:
df

Unnamed: 0,company,education,experience,industry,jobdescription,jobid,joblocation_address,jobtitle,numberofpositions,payrate,postdate,site_name,skills,uniq_id
2,Softtech Career Infosystem Pvt. Ltd,UG: Any Graduate - Any Specialization PG:Any P...,4 - 8 yrs,IT-Software / Software Services,Job Description Send me Jobs like this - as ...,101016900534,Bengaluru,Pl/sql Developer - SQL,,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,IT Software - Application Programming,c47df6f4cfdf5b46f1fd713ba61b9eba
4,Spire Technologies and Solutions Pvt. Ltd.,UG: B.Tech/B.E. - Any Specialization PG:Any Po...,6 - 8 yrs,IT-Software / Software Services,Job Description Send me Jobs like this Pleas...,120916002122,Bengaluru,JAVA Technical Lead (6-8 yrs) -,4.0,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,IT Software - Application Programming,a12553fc03bc7bcced8b1bb8963f97b4
5,PFS Web Global Services Pvt Ltd,UG: B.Tech/B.E. - Any Specialization PG:MCA - ...,2 - 5 yrs,IT-Software / Software Services,Job Description Send me Jobs like this We ar...,131016005070,Bengaluru,WALK IN - As400 Developer - Pfsweb Global Serv...,,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,IT Software - Application Programming,8c3af9062ea835b0965779e2c7faac76
6,Kinesis Management Consultant Pvt. Ltd,,1 - 3 yrs,IT-Software / Software Services,Job Description Send me Jobs like this exper...,131016004451,"Delhi NCR, Mumbai, Bengaluru, Kochi, Greater N...",PHP Developer,2.0,"3,00,000 - 6,50,000 P.A",2016-10-13 16:20:55 +0000,,IT Software - Application Programming,98f84958cd6409386e7f0c9e447b8510
9,Accenture,UG: Any Graduate - Any Specialization PG:Any P...,1 - 5 yrs,IT-Software / Software Services,Job Description Send me Jobs like this Overa...,121016901354,Bengaluru,German Translator,,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,ITES,7774df1c672c0b92486da8b36a721638
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21990,Link Tree Technologies PVT LTD hiring for CMM ...,UG: Any Graduate - Any Specialization PG:Post ...,7 - 12 yrs,IT-Software / Software Services,Job Description Send me Jobs like this Looki...,261116001071,Bengaluru,Urgent Placement for SAP ABAP with HCL Bangalore,,Not Disclosed by Recruiter,2016-11-25 22:47:05 +0000,www.naukri.com,IT Software - Application Programming,9810d3c9e451e7374871ec1927d59111
21992,Confidential,UG: Any Graduate - Any Specialization Doctorat...,12 - 18 yrs,IT-Software / Software Services,Job Description Send me Jobs like this Solut...,261116900077,Bengaluru,Advisory Solution Architect,,Not Disclosed by Recruiter,2016-11-25 22:47:05 +0000,www.naukri.com,IT Software - Application Programming,0eb70f84a1d491fb2ad7332241475741
21996,Careernet Technologies Pvt Ltd hiring for Client,UG: B.Tech/B.E. - Any Specialization PG:M.Tech...,3 - 5 yrs,IT-Software / Software Services,Job Description Send me Jobs like this Looki...,241116001104,"Bengaluru, Gurgaon",Java Backend Developers for a Product Company,,"8,50,000 - 14,00,000 P.A",2016-11-24 22:47:05 +0000,www.naukri.com,IT Software - Application Programming,acb80b7bb109fa76d22d1f11c6e16c6a
21998,Confidential,UG: B.Tech/B.E. - Any Specialization PG:MCA - ...,7 - 12 yrs,IT-Software / Software Services,Job Description Send me Jobs like this We ar...,231116901329,"Delhi NCR, Bengaluru",Sr UI Developer/ Technical Lead - Html/ CSS/ J...,,Not Disclosed by Recruiter,2016-11-27 22:47:05 +0000,www.naukri.com,IT Software - Application Programming,c6f6538cb1007dd02c1b4b77909b3d38


In [26]:
df.shape

(9216, 14)

In [39]:
replacements = {
   'joblocation_address': {
      r'(Bengaluru/Bangalore)': 'Bangalore',
      r'Bengaluru': 'Bangalore',
      r'Hyderabad / Secunderabad': 'Hyderabad',
      r'Mumbai , Mumbai': 'Mumbai',
      r'Noida': 'NCR',
      r'Delhi': 'NCR',
      r'Gurgaon': 'NCR', 
      r'Delhi/NCR(National Capital Region)': 'NCR',
      r'Delhi , Delhi': 'NCR',
      r'Noida , Noida/Greater Noida': 'NCR',
      r'Ghaziabad': 'NCR',
      r'Delhi/NCR(National Capital Region) , Gurgaon': 'NCR',
      r'NCR , NCR': 'NCR',
      r'NCR/NCR(National Capital Region)': 'NCR',
      r'NCR , NCR/Greater NCR': 'NCR',
      r'NCR/NCR(National Capital Region) , NCR': 'NCR', 
      r'NCR , NCR/NCR(National Capital Region)': 'NCR', 
      r'Bangalore , Bangalore / Bangalore': 'Bangalore',
      r'Bangalore , karnataka': 'Bangalore',
      r'NCR/NCR(National Capital Region)': 'NCR',
      r'NCR/Greater NCR': 'NCR',
      r'NCR/NCR(National Capital Region) , NCR': 'NCR'
       
   }
}

df.replace(replacements, regex=True, inplace=True)

In [40]:
df['split'] = np.random.randn(df.shape[0], 1)

msk = np.random.rand(len(df)) <= 0.7

train = df[msk]
test = df[~msk]

In [41]:
train

Unnamed: 0,company,education,experience,industry,jobdescription,jobid,joblocation_address,jobtitle,numberofpositions,payrate,postdate,site_name,skills,uniq_id,split
2,Softtech Career Infosystem Pvt. Ltd,UG: Any Graduate - Any Specialization PG:Any P...,4 - 8 yrs,IT-Software / Software Services,Job Description Send me Jobs like this - as ...,101016900534,Bangalore,Pl/sql Developer - SQL,,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,IT Software - Application Programming,c47df6f4cfdf5b46f1fd713ba61b9eba,0.647748
4,Spire Technologies and Solutions Pvt. Ltd.,UG: B.Tech/B.E. - Any Specialization PG:Any Po...,6 - 8 yrs,IT-Software / Software Services,Job Description Send me Jobs like this Pleas...,120916002122,Bangalore,JAVA Technical Lead (6-8 yrs) -,4.0,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,IT Software - Application Programming,a12553fc03bc7bcced8b1bb8963f97b4,-0.114305
5,PFS Web Global Services Pvt Ltd,UG: B.Tech/B.E. - Any Specialization PG:MCA - ...,2 - 5 yrs,IT-Software / Software Services,Job Description Send me Jobs like this We ar...,131016005070,Bangalore,WALK IN - As400 Developer - Pfsweb Global Serv...,,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,IT Software - Application Programming,8c3af9062ea835b0965779e2c7faac76,-0.638890
9,Accenture,UG: Any Graduate - Any Specialization PG:Any P...,1 - 5 yrs,IT-Software / Software Services,Job Description Send me Jobs like this Overa...,121016901354,Bangalore,German Translator,,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,ITES,7774df1c672c0b92486da8b36a721638,-0.266100
11,Convate hiring for Retail/e-commerce domain,UG: Any Graduate - Any Specialization,4 - 8 yrs,IT-Software / Software Services,Job Description Send me Jobs like this Greet...,61016003187,Bangalore,Opening for Android Developer-bangalore-4-8 yrs,,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,IT Software - Application Programming,2657f737cb46cf289a05d6577a746976,-0.878506
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21986,Peopleton Solutions,UG: B.Tech/B.E. - Any Specialization PG:MCA - ...,4 - 9 yrs,IT-Software / Software Services,Job Description Send me Jobs like this Stron...,261116002974,"Bangalore, NCR",SDE 2 (contact at Priyank.pal@peopleton.in),,"15,00,000 - 30,00,000 P.A. Salary is open",2016-11-25 22:47:05 +0000,www.naukri.com,IT Software - eCommerce,ca542ff8b113744468b677f061bde0ec,-0.169210
21988,Microsoft India (R and D) Pvt Ltd,UG: B.Tech/B.E. - Computers PG:MS/M.Sc(Science...,6 - 7 yrs,IT-Software / Software Services,Job Description Send me Jobs like this Job D...,241116901005,Bangalore,SDE2,,Not Disclosed by Recruiter,2016-11-25 22:47:05 +0000,www.naukri.com,IT Software - eCommerce,a225337270ab5f885ee8af83e7b04865,1.172678
21990,Link Tree Technologies PVT LTD hiring for CMM ...,UG: Any Graduate - Any Specialization PG:Post ...,7 - 12 yrs,IT-Software / Software Services,Job Description Send me Jobs like this Looki...,261116001071,Bangalore,Urgent Placement for SAP ABAP with HCL Bangalore,,Not Disclosed by Recruiter,2016-11-25 22:47:05 +0000,www.naukri.com,IT Software - Application Programming,9810d3c9e451e7374871ec1927d59111,-1.368424
21992,Confidential,UG: Any Graduate - Any Specialization Doctorat...,12 - 18 yrs,IT-Software / Software Services,Job Description Send me Jobs like this Solut...,261116900077,Bangalore,Advisory Solution Architect,,Not Disclosed by Recruiter,2016-11-25 22:47:05 +0000,www.naukri.com,IT Software - Application Programming,0eb70f84a1d491fb2ad7332241475741,1.138126


In [42]:
test

Unnamed: 0,company,education,experience,industry,jobdescription,jobid,joblocation_address,jobtitle,numberofpositions,payrate,postdate,site_name,skills,uniq_id,split
6,Kinesis Management Consultant Pvt. Ltd,,1 - 3 yrs,IT-Software / Software Services,Job Description Send me Jobs like this exper...,131016004451,"NCR NCR, Mumbai, Bangalore, Kochi, Greater NCR...",PHP Developer,2.0,"3,00,000 - 6,50,000 P.A",2016-10-13 16:20:55 +0000,,IT Software - Application Programming,98f84958cd6409386e7f0c9e447b8510,-0.068122
23,OKDA Solutions,UG: Any Graduate - Any Specialization,6 - 10 yrs,IT-Software / Software Services,Job Description Send me Jobs like this Job D...,110116900920,Bangalore,Sr iOS Developer - Objective C / Cocoa,,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,IT Software - Application Programming,bf23fa0322c4af0f4d24ae7241a33d99,-0.751303
28,Careernet Technologies Pvt Ltd hiring for A US...,UG: B.Tech/B.E. - Any Specialization PG:M.Tech...,3 - 6 yrs,IT-Software / Software Services,Job Description Send me Jobs like this Posit...,121016004415,Bangalore,"Senio QA with a Product Based Startup, Bangalore",,"9,00,000 - 14,00,000 P.A",2016-10-13 16:20:56 +0000,,IT Software - Application Programming,3382a9c15d825cb9382dc5f3a1f99ce3,0.493978
30,The HRism hiring for US based Product Client,UG: Any Graduate - Any Specialization PG:Any P...,3 - 7 yrs,IT-Software / Software Services,Job Description Send me Jobs like this Candi...,131016007104,"Ahmedabad, Bangalore, Pune",Test Automation Engineer ( Selenium + API Test...,2.0,Not Disclosed by Recruiter,2016-10-13 16:20:56 +0000,,IT Software - QA & Testing,fb748fb77d37c9cdd15daaa68f77f95a,-1.350865
37,SLK Software Services Pvt. Ltd.,UG: B.Tech/B.E. - Computers PG:Post Graduation...,3 - 5 yrs,IT-Software / Software Services,Job Description Send me Jobs like this Job P...,121016901125,Bangalore,Senior Developer-bi,,Not Disclosed by Recruiter,2016-10-13 16:20:56 +0000,,IT Software - Application Programming,0f469db6708503116bf996c325047bc1,-1.292006
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21945,Career Trackers and Consulting hiring for A ve...,"UG: Any Graduate, Graduation Not Required PG:A...",5 - 10 yrs,IT-Software / Software Services,Job Description Send me Jobs like this We ha...,70916000307,"Bangalore, Pune, Mumbai","Network Security Specialist for Pune, Mumbai &...",10.0,Best In The Industry,2016-11-25 22:47:03 +0000,www.naukri.com,IT Software - Network Administration,c5a9b84defcce6ec9702743b348fba7b,-0.142197
21955,Cambio Consulting India Pvt Ltd,,6 - 11 yrs,IT-Software / Software Services,Job Description Send me Jobs like this Job D...,251116006262,"Bangalore, Hyderabad, Chennai, Pune",Java/j2ee with Spring Architects--bangalore,,"8,00,000 - 18,00,000 P.A",2016-11-24 22:47:03 +0000,www.naukri.com,IT Software - eCommerce,25f9b33a61fc19803163399a221d9f05,-1.694985
21968,HR ACCESS,UG: B.Tech/B.E. - Any Specialization PG:MBA/PG...,10 - 14 yrs,IT-Software / Software Services,Job Description Send me Jobs like this The A...,251116900026,Bangalore,Application Services Global Program Office Lea...,,Not Disclosed by Recruiter,2016-11-25 22:47:04 +0000,www.naukri.com,Sales,44dcc9527994ac2914af93e38dd85aff,0.490008
21996,Careernet Technologies Pvt Ltd hiring for Client,UG: B.Tech/B.E. - Any Specialization PG:M.Tech...,3 - 5 yrs,IT-Software / Software Services,Job Description Send me Jobs like this Looki...,241116001104,"Bangalore, NCR",Java Backend Developers for a Product Company,,"8,50,000 - 14,00,000 P.A",2016-11-24 22:47:05 +0000,www.naukri.com,IT Software - Application Programming,acb80b7bb109fa76d22d1f11c6e16c6a,0.453978


In [43]:
test.shape

(2829, 15)

In [44]:
test.head()

Unnamed: 0,company,education,experience,industry,jobdescription,jobid,joblocation_address,jobtitle,numberofpositions,payrate,postdate,site_name,skills,uniq_id,split
6,Kinesis Management Consultant Pvt. Ltd,,1 - 3 yrs,IT-Software / Software Services,Job Description Send me Jobs like this exper...,131016004451,"NCR NCR, Mumbai, Bangalore, Kochi, Greater NCR...",PHP Developer,2.0,"3,00,000 - 6,50,000 P.A",2016-10-13 16:20:55 +0000,,IT Software - Application Programming,98f84958cd6409386e7f0c9e447b8510,-0.068122
23,OKDA Solutions,UG: Any Graduate - Any Specialization,6 - 10 yrs,IT-Software / Software Services,Job Description Send me Jobs like this Job D...,110116900920,Bangalore,Sr iOS Developer - Objective C / Cocoa,,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,IT Software - Application Programming,bf23fa0322c4af0f4d24ae7241a33d99,-0.751303
28,Careernet Technologies Pvt Ltd hiring for A US...,UG: B.Tech/B.E. - Any Specialization PG:M.Tech...,3 - 6 yrs,IT-Software / Software Services,Job Description Send me Jobs like this Posit...,121016004415,Bangalore,"Senio QA with a Product Based Startup, Bangalore",,"9,00,000 - 14,00,000 P.A",2016-10-13 16:20:56 +0000,,IT Software - Application Programming,3382a9c15d825cb9382dc5f3a1f99ce3,0.493978
30,The HRism hiring for US based Product Client,UG: Any Graduate - Any Specialization PG:Any P...,3 - 7 yrs,IT-Software / Software Services,Job Description Send me Jobs like this Candi...,131016007104,"Ahmedabad, Bangalore, Pune",Test Automation Engineer ( Selenium + API Test...,2.0,Not Disclosed by Recruiter,2016-10-13 16:20:56 +0000,,IT Software - QA & Testing,fb748fb77d37c9cdd15daaa68f77f95a,-1.350865
37,SLK Software Services Pvt. Ltd.,UG: B.Tech/B.E. - Computers PG:Post Graduation...,3 - 5 yrs,IT-Software / Software Services,Job Description Send me Jobs like this Job P...,121016901125,Bangalore,Senior Developer-bi,,Not Disclosed by Recruiter,2016-10-13 16:20:56 +0000,,IT Software - Application Programming,0f469db6708503116bf996c325047bc1,-1.292006


In [45]:
train.head(5).transpose()

Unnamed: 0,2,4,5,9,11
company,Softtech Career Infosystem Pvt. Ltd,Spire Technologies and Solutions Pvt. Ltd.,PFS Web Global Services Pvt Ltd,Accenture,Convate hiring for Retail/e-commerce domain
education,UG: Any Graduate - Any Specialization PG:Any P...,UG: B.Tech/B.E. - Any Specialization PG:Any Po...,UG: B.Tech/B.E. - Any Specialization PG:MCA - ...,UG: Any Graduate - Any Specialization PG:Any P...,UG: Any Graduate - Any Specialization
experience,4 - 8 yrs,6 - 8 yrs,2 - 5 yrs,1 - 5 yrs,4 - 8 yrs
industry,IT-Software / Software Services,IT-Software / Software Services,IT-Software / Software Services,IT-Software / Software Services,IT-Software / Software Services
jobdescription,Job Description Send me Jobs like this - as ...,Job Description Send me Jobs like this Pleas...,Job Description Send me Jobs like this We ar...,Job Description Send me Jobs like this Overa...,Job Description Send me Jobs like this Greet...
jobid,101016900534,120916002122,131016005070,121016901354,61016003187
joblocation_address,Bangalore,Bangalore,Bangalore,Bangalore,Bangalore
jobtitle,Pl/sql Developer - SQL,JAVA Technical Lead (6-8 yrs) -,WALK IN - As400 Developer - Pfsweb Global Serv...,German Translator,Opening for Android Developer-bangalore-4-8 yrs
numberofpositions,,4,,,
payrate,Not Disclosed by Recruiter,Not Disclosed by Recruiter,Not Disclosed by Recruiter,Not Disclosed by Recruiter,Not Disclosed by Recruiter


In [46]:
train

Unnamed: 0,company,education,experience,industry,jobdescription,jobid,joblocation_address,jobtitle,numberofpositions,payrate,postdate,site_name,skills,uniq_id,split
2,Softtech Career Infosystem Pvt. Ltd,UG: Any Graduate - Any Specialization PG:Any P...,4 - 8 yrs,IT-Software / Software Services,Job Description Send me Jobs like this - as ...,101016900534,Bangalore,Pl/sql Developer - SQL,,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,IT Software - Application Programming,c47df6f4cfdf5b46f1fd713ba61b9eba,0.647748
4,Spire Technologies and Solutions Pvt. Ltd.,UG: B.Tech/B.E. - Any Specialization PG:Any Po...,6 - 8 yrs,IT-Software / Software Services,Job Description Send me Jobs like this Pleas...,120916002122,Bangalore,JAVA Technical Lead (6-8 yrs) -,4.0,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,IT Software - Application Programming,a12553fc03bc7bcced8b1bb8963f97b4,-0.114305
5,PFS Web Global Services Pvt Ltd,UG: B.Tech/B.E. - Any Specialization PG:MCA - ...,2 - 5 yrs,IT-Software / Software Services,Job Description Send me Jobs like this We ar...,131016005070,Bangalore,WALK IN - As400 Developer - Pfsweb Global Serv...,,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,IT Software - Application Programming,8c3af9062ea835b0965779e2c7faac76,-0.638890
9,Accenture,UG: Any Graduate - Any Specialization PG:Any P...,1 - 5 yrs,IT-Software / Software Services,Job Description Send me Jobs like this Overa...,121016901354,Bangalore,German Translator,,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,ITES,7774df1c672c0b92486da8b36a721638,-0.266100
11,Convate hiring for Retail/e-commerce domain,UG: Any Graduate - Any Specialization,4 - 8 yrs,IT-Software / Software Services,Job Description Send me Jobs like this Greet...,61016003187,Bangalore,Opening for Android Developer-bangalore-4-8 yrs,,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,IT Software - Application Programming,2657f737cb46cf289a05d6577a746976,-0.878506
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21986,Peopleton Solutions,UG: B.Tech/B.E. - Any Specialization PG:MCA - ...,4 - 9 yrs,IT-Software / Software Services,Job Description Send me Jobs like this Stron...,261116002974,"Bangalore, NCR",SDE 2 (contact at Priyank.pal@peopleton.in),,"15,00,000 - 30,00,000 P.A. Salary is open",2016-11-25 22:47:05 +0000,www.naukri.com,IT Software - eCommerce,ca542ff8b113744468b677f061bde0ec,-0.169210
21988,Microsoft India (R and D) Pvt Ltd,UG: B.Tech/B.E. - Computers PG:MS/M.Sc(Science...,6 - 7 yrs,IT-Software / Software Services,Job Description Send me Jobs like this Job D...,241116901005,Bangalore,SDE2,,Not Disclosed by Recruiter,2016-11-25 22:47:05 +0000,www.naukri.com,IT Software - eCommerce,a225337270ab5f885ee8af83e7b04865,1.172678
21990,Link Tree Technologies PVT LTD hiring for CMM ...,UG: Any Graduate - Any Specialization PG:Post ...,7 - 12 yrs,IT-Software / Software Services,Job Description Send me Jobs like this Looki...,261116001071,Bangalore,Urgent Placement for SAP ABAP with HCL Bangalore,,Not Disclosed by Recruiter,2016-11-25 22:47:05 +0000,www.naukri.com,IT Software - Application Programming,9810d3c9e451e7374871ec1927d59111,-1.368424
21992,Confidential,UG: Any Graduate - Any Specialization Doctorat...,12 - 18 yrs,IT-Software / Software Services,Job Description Send me Jobs like this Solut...,261116900077,Bangalore,Advisory Solution Architect,,Not Disclosed by Recruiter,2016-11-25 22:47:05 +0000,www.naukri.com,IT Software - Application Programming,0eb70f84a1d491fb2ad7332241475741,1.138126


## Job Recommendation Based on Job Title

In [48]:
train.columns

Index(['company', 'education', 'experience', 'industry', 'jobdescription',
       'jobid', 'joblocation_address', 'jobtitle', 'numberofpositions',
       'payrate', 'postdate', 'site_name', 'skills', 'uniq_id', 'split'],
      dtype='object')

In [49]:
train.head().transpose()

Unnamed: 0,2,4,5,9,11
company,Softtech Career Infosystem Pvt. Ltd,Spire Technologies and Solutions Pvt. Ltd.,PFS Web Global Services Pvt Ltd,Accenture,Convate hiring for Retail/e-commerce domain
education,UG: Any Graduate - Any Specialization PG:Any P...,UG: B.Tech/B.E. - Any Specialization PG:Any Po...,UG: B.Tech/B.E. - Any Specialization PG:MCA - ...,UG: Any Graduate - Any Specialization PG:Any P...,UG: Any Graduate - Any Specialization
experience,4 - 8 yrs,6 - 8 yrs,2 - 5 yrs,1 - 5 yrs,4 - 8 yrs
industry,IT-Software / Software Services,IT-Software / Software Services,IT-Software / Software Services,IT-Software / Software Services,IT-Software / Software Services
jobdescription,Job Description Send me Jobs like this - as ...,Job Description Send me Jobs like this Pleas...,Job Description Send me Jobs like this We ar...,Job Description Send me Jobs like this Overa...,Job Description Send me Jobs like this Greet...
jobid,101016900534,120916002122,131016005070,121016901354,61016003187
joblocation_address,Bangalore,Bangalore,Bangalore,Bangalore,Bangalore
jobtitle,Pl/sql Developer - SQL,JAVA Technical Lead (6-8 yrs) -,WALK IN - As400 Developer - Pfsweb Global Serv...,German Translator,Opening for Android Developer-bangalore-4-8 yrs
numberofpositions,,4,,,
payrate,Not Disclosed by Recruiter,Not Disclosed by Recruiter,Not Disclosed by Recruiter,Not Disclosed by Recruiter,Not Disclosed by Recruiter


In [52]:
train['jobtitle'] = train['jobtitle'].fillna('')
train['jobdescription'] = train['jobdescription'].fillna('')
#jobs_US_base_line['Requirements'] = jobs_US_base_line['Requirements'].fillna('')

train['jobdescription'] = train['jobtitle'] + train['jobdescription']

In [53]:
tf = TfidfVectorizer(ngram_range=(1, 2),min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(train['jobdescription'])

In [54]:
tfidf_matrix.shape

(6387, 410269)

In [55]:
# http://scikit-learn.org/stable/modules/metrics.html#linear-kernel
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [56]:
cosine_sim[0]

array([1.        , 0.02118194, 0.01565881, ..., 0.0276533 , 0.02820985,
       0.01674215])

In [58]:
train = train.reset_index()
titles = train['jobtitle']
indices = pd.Series(train.index, index=train['jobtitle'])
#indices.head(2)


In [59]:
def get_recommendations(title):
    idx = indices[title]
    #print (idx)
    sim_scores = list(enumerate(cosine_sim[idx]))
    #print (sim_scores)
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    job_indices = [i[0] for i in sim_scores]
    return titles.iloc[job_indices]

In [61]:
get_recommendations('Pl/sql Developer - SQL').head(10)

0                         Pl/sql Developer - SQL
3773                   Pl/sql Developer - Mumbai
1369                   ETL Tester - Sql/teradata
3072    Oracle BRM Developer - C/shell Scripting
72      Salesforce Developer - Apex/visual Force
3079                Hadoop Developer - Java/j2ee
6350                 Windows Engineer - Embedded
3111      Java/j2ee Developer - Spring/hibernate
3094                                 UX Designer
1567          UI Developer - Html/css/javascript
Name: jobtitle, dtype: object

In [63]:
get_recommendations('German Translator').head(10)

3                                       German Translator
481                          Salesforce.com Professionals
7                                       Revenue Assurance
15                                           Call Quality
11                                              Analytics
285     Sap-extended Wareh.&logis.- Logis. Serv. Provi...
6308                    Java Enterprise Edition Developer
347              PTC (parametric Technology Corporation)-
398                            Human Performance Lab Lead
469                         Recruitment Associate Manager
Name: jobtitle, dtype: object

In [64]:
get_recommendations('Opening for Android Developer-bangalore-4-8 yrs').head(10)

4         Opening for Android Developer-bangalore-4-8 yrs
179     Immediate Joinee for WAS for top Retail Domain...
28      Opening for Hadoop Developers-product Developm...
253                   Android Developer,3.5yrs+,bangalore
904     Immediate Opening for Android Application Deve...
6213    Android Developer -2+ Years Experience (locati...
852                                     Android Developer
2552                Android Developer - Immediate Joining
4930                             Project Engineer C#, WPF
2341    Urgent :-immediate Req- Android Developer,good...
Name: jobtitle, dtype: object

In [66]:
get_recommendations('JAVA Technical Lead (6-8 yrs) -').head(10)

1                         JAVA Technical Lead (6-8 yrs) -
984                                   Java Technical Lead
2037                    Big Data Technical Architect/Lead
5870           Senior Tech Lead/architect - Big Data/java
3354                                     Product Engineer
6301                                  Java Technical lead
3372                                 Sr. Product Engineer
4163    Hiring Big Data Experts for a Product Dev Co -...
90                                          Lead Engineer
5208                                 Angular.js Developer
Name: jobtitle, dtype: object

## Alternative approach

#### Find out similar users -- Find out for which jobs they have applied -- suggest those job to the other users who shared similar user profile.

We are finding put similar user profile based on their education and total years of experience. 
* We will get to 10 similar users.
* We will find our which are the jobs for which these users have applied
* We take an union of these jobs and recommend the jobs all these user base

In [67]:
train.head()

Unnamed: 0,index,company,education,experience,industry,jobdescription,jobid,joblocation_address,jobtitle,numberofpositions,payrate,postdate,site_name,skills,uniq_id,split
0,2,Softtech Career Infosystem Pvt. Ltd,UG: Any Graduate - Any Specialization PG:Any P...,4 - 8 yrs,IT-Software / Software Services,Pl/sql Developer - SQLJob Description Send m...,101016900534,Bangalore,Pl/sql Developer - SQL,,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,IT Software - Application Programming,c47df6f4cfdf5b46f1fd713ba61b9eba,0.647748
1,4,Spire Technologies and Solutions Pvt. Ltd.,UG: B.Tech/B.E. - Any Specialization PG:Any Po...,6 - 8 yrs,IT-Software / Software Services,JAVA Technical Lead (6-8 yrs) -Job Description...,120916002122,Bangalore,JAVA Technical Lead (6-8 yrs) -,4.0,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,IT Software - Application Programming,a12553fc03bc7bcced8b1bb8963f97b4,-0.114305
2,5,PFS Web Global Services Pvt Ltd,UG: B.Tech/B.E. - Any Specialization PG:MCA - ...,2 - 5 yrs,IT-Software / Software Services,WALK IN - As400 Developer - Pfsweb Global Serv...,131016005070,Bangalore,WALK IN - As400 Developer - Pfsweb Global Serv...,,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,IT Software - Application Programming,8c3af9062ea835b0965779e2c7faac76,-0.63889
3,9,Accenture,UG: Any Graduate - Any Specialization PG:Any P...,1 - 5 yrs,IT-Software / Software Services,German TranslatorJob Description Send me Job...,121016901354,Bangalore,German Translator,,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,ITES,7774df1c672c0b92486da8b36a721638,-0.2661
4,11,Convate hiring for Retail/e-commerce domain,UG: Any Graduate - Any Specialization,4 - 8 yrs,IT-Software / Software Services,Opening for Android Developer-bangalore-4-8 yr...,61016003187,Bangalore,Opening for Android Developer-bangalore-4-8 yrs,,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,IT Software - Application Programming,2657f737cb46cf289a05d6577a746976,-0.878506


In [70]:
user_based_approach_B = train.loc[train['joblocation_address']=='Bangalore']

In [71]:
user_based_approach = user_based_approach_B.iloc[0:10000,:]

In [72]:
user_based_approach.head()

Unnamed: 0,index,company,education,experience,industry,jobdescription,jobid,joblocation_address,jobtitle,numberofpositions,payrate,postdate,site_name,skills,uniq_id,split
0,2,Softtech Career Infosystem Pvt. Ltd,UG: Any Graduate - Any Specialization PG:Any P...,4 - 8 yrs,IT-Software / Software Services,Pl/sql Developer - SQLJob Description Send m...,101016900534,Bangalore,Pl/sql Developer - SQL,,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,IT Software - Application Programming,c47df6f4cfdf5b46f1fd713ba61b9eba,0.647748
1,4,Spire Technologies and Solutions Pvt. Ltd.,UG: B.Tech/B.E. - Any Specialization PG:Any Po...,6 - 8 yrs,IT-Software / Software Services,JAVA Technical Lead (6-8 yrs) -Job Description...,120916002122,Bangalore,JAVA Technical Lead (6-8 yrs) -,4.0,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,IT Software - Application Programming,a12553fc03bc7bcced8b1bb8963f97b4,-0.114305
2,5,PFS Web Global Services Pvt Ltd,UG: B.Tech/B.E. - Any Specialization PG:MCA - ...,2 - 5 yrs,IT-Software / Software Services,WALK IN - As400 Developer - Pfsweb Global Serv...,131016005070,Bangalore,WALK IN - As400 Developer - Pfsweb Global Serv...,,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,IT Software - Application Programming,8c3af9062ea835b0965779e2c7faac76,-0.63889
3,9,Accenture,UG: Any Graduate - Any Specialization PG:Any P...,1 - 5 yrs,IT-Software / Software Services,German TranslatorJob Description Send me Job...,121016901354,Bangalore,German Translator,,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,ITES,7774df1c672c0b92486da8b36a721638,-0.2661
4,11,Convate hiring for Retail/e-commerce domain,UG: Any Graduate - Any Specialization,4 - 8 yrs,IT-Software / Software Services,Opening for Android Developer-bangalore-4-8 yr...,61016003187,Bangalore,Opening for Android Developer-bangalore-4-8 yrs,,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,IT Software - Application Programming,2657f737cb46cf289a05d6577a746976,-0.878506


In [75]:
user_based_approach['education'] = user_based_approach['education'].fillna('')
user_based_approach['experience'] = str(user_based_approach['experience'].fillna(''))

user_based_approach['education'] = user_based_approach['education'] + user_based_approach['experience']


In [76]:
tf = TfidfVectorizer(ngram_range=(1, 2),min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(user_based_approach['education'])

In [77]:
tfidf_matrix.shape

(1677, 417)

In [78]:
# http://scikit-learn.org/stable/modules/metrics.html#linear-kernel
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [79]:
cosine_sim[0]

array([1.        , 0.63033656, 0.46554876, ..., 0.87851187, 0.93595402,
       0.61883365])

In [104]:
userid = user_based_approach['level_0']
indices = pd.Series(user_based_approach.index, index=user_based_approach['level_0'])
#indices.head(2)

In [108]:
def get_recommendations_userwise(userid):
    idx = indices[userid]
    #print (idx)
    sim_scores = list(enumerate(cosine_sim[idx]))
    #print (sim_scores)
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    user_indices = [i[0] for i in sim_scores]
    #print (user_indices)
    return user_indices[0:11]

In [109]:
print ("-----Top 10 Similar users with userId: 123------")
get_recommendations_userwise(123)


-----Top 10 Similar users with userId: 123------


[9, 14, 23, 25, 31, 36, 37, 38, 42, 45, 47]

In [115]:
train

Unnamed: 0,index,company,education,experience,industry,jobdescription,jobid,joblocation_address,jobtitle,numberofpositions,payrate,postdate,site_name,skills,uniq_id,split
0,2,Softtech Career Infosystem Pvt. Ltd,UG: Any Graduate - Any Specialization PG:Any P...,4 - 8 yrs,IT-Software / Software Services,Pl/sql Developer - SQLJob Description Send m...,101016900534,Bangalore,Pl/sql Developer - SQL,,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,IT Software - Application Programming,c47df6f4cfdf5b46f1fd713ba61b9eba,0.647748
1,4,Spire Technologies and Solutions Pvt. Ltd.,UG: B.Tech/B.E. - Any Specialization PG:Any Po...,6 - 8 yrs,IT-Software / Software Services,JAVA Technical Lead (6-8 yrs) -Job Description...,120916002122,Bangalore,JAVA Technical Lead (6-8 yrs) -,4.0,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,IT Software - Application Programming,a12553fc03bc7bcced8b1bb8963f97b4,-0.114305
2,5,PFS Web Global Services Pvt Ltd,UG: B.Tech/B.E. - Any Specialization PG:MCA - ...,2 - 5 yrs,IT-Software / Software Services,WALK IN - As400 Developer - Pfsweb Global Serv...,131016005070,Bangalore,WALK IN - As400 Developer - Pfsweb Global Serv...,,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,IT Software - Application Programming,8c3af9062ea835b0965779e2c7faac76,-0.638890
3,9,Accenture,UG: Any Graduate - Any Specialization PG:Any P...,1 - 5 yrs,IT-Software / Software Services,German TranslatorJob Description Send me Job...,121016901354,Bangalore,German Translator,,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,ITES,7774df1c672c0b92486da8b36a721638,-0.266100
4,11,Convate hiring for Retail/e-commerce domain,UG: Any Graduate - Any Specialization,4 - 8 yrs,IT-Software / Software Services,Opening for Android Developer-bangalore-4-8 yr...,61016003187,Bangalore,Opening for Android Developer-bangalore-4-8 yrs,,Not Disclosed by Recruiter,2016-10-13 16:20:55 +0000,,IT Software - Application Programming,2657f737cb46cf289a05d6577a746976,-0.878506
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6382,21986,Peopleton Solutions,UG: B.Tech/B.E. - Any Specialization PG:MCA - ...,4 - 9 yrs,IT-Software / Software Services,SDE 2 (contact at Priyank.pal@peopleton.in)Job...,261116002974,"Bangalore, NCR",SDE 2 (contact at Priyank.pal@peopleton.in),,"15,00,000 - 30,00,000 P.A. Salary is open",2016-11-25 22:47:05 +0000,www.naukri.com,IT Software - eCommerce,ca542ff8b113744468b677f061bde0ec,-0.169210
6383,21988,Microsoft India (R and D) Pvt Ltd,UG: B.Tech/B.E. - Computers PG:MS/M.Sc(Science...,6 - 7 yrs,IT-Software / Software Services,SDE2Job Description Send me Jobs like this J...,241116901005,Bangalore,SDE2,,Not Disclosed by Recruiter,2016-11-25 22:47:05 +0000,www.naukri.com,IT Software - eCommerce,a225337270ab5f885ee8af83e7b04865,1.172678
6384,21990,Link Tree Technologies PVT LTD hiring for CMM ...,UG: Any Graduate - Any Specialization PG:Post ...,7 - 12 yrs,IT-Software / Software Services,Urgent Placement for SAP ABAP with HCL Bangalo...,261116001071,Bangalore,Urgent Placement for SAP ABAP with HCL Bangalore,,Not Disclosed by Recruiter,2016-11-25 22:47:05 +0000,www.naukri.com,IT Software - Application Programming,9810d3c9e451e7374871ec1927d59111,-1.368424
6385,21992,Confidential,UG: Any Graduate - Any Specialization Doctorat...,12 - 18 yrs,IT-Software / Software Services,Advisory Solution ArchitectJob Description S...,261116900077,Bangalore,Advisory Solution Architect,,Not Disclosed by Recruiter,2016-11-25 22:47:05 +0000,www.naukri.com,IT Software - Application Programming,0eb70f84a1d491fb2ad7332241475741,1.138126


In [120]:
def get_job_id(usrid_list):
    jobs_userwise = train['index'].isin(usrid_list) #
    df1 = pd.DataFrame(data = train[jobs_userwise], columns=['jobid'])
    joblist = df1['jobid'].tolist()
    Job_list = train['jobid'].isin(joblist) #[1083186, 516837, 507614, 754917, 686406, 1058896, 335132])
    df_temp = pd.DataFrame(data = train[Job_list], columns=['jobid','jobtitle','jobdescription','joblocation_address'])
    return df_temp

In [121]:
get_job_id(get_recommendations_userwise(10))

Unnamed: 0,jobid,jobtitle,jobdescription,joblocation_address
12,160316902471,Java Developer - Spring/mvc/hibernate,Java Developer - Spring/mvc/hibernateJob Descr...,Bangalore
41,150716001591,Associate Software Engineer And Testing Jobs I...,Associate Software Engineer And Testing Jobs I...,"Bangalore, Hyderabad"
51,200716005010,Available Part time / full time work at Top M....,Available Part time / full time work at Top M....,"NCR, Hyderabad, Dehradun, Mumbai, Bangalore, J..."


In [122]:
print ("-----Top 10 Similar users with userId: 47------")
get_recommendations_userwise(47)

-----Top 10 Similar users with userId: 47------


[9, 14, 23, 25, 31, 36, 37, 38, 42, 45, 47]

In [123]:
get_job_id(get_recommendations_userwise(47))

Unnamed: 0,jobid,jobtitle,jobdescription,joblocation_address
3,121016901354,German Translator,German TranslatorJob Description Send me Job...,Bangalore
7,121016901343,Revenue Assurance,Revenue AssuranceJob Description Send me Job...,Bangalore
14,131016004798,Good Opportunity for Oracle DBA - Bangalore Lo...,Good Opportunity for Oracle DBA - Bangalore Lo...,Bangalore
17,81016900152,WLAN Device Driver Development Engineer - Linux,WLAN Device Driver Development Engineer - Linu...,Bangalore
20,220116901982,Developer - SQL Bi/cosmos/sql Azure,Developer - SQL Bi/cosmos/sql AzureJob Descrip...,Bangalore
