The purpose of the file is to create a recommendation engine that allows PWD to get top 5 relevant jobs based on its skills and disability.

In [1]:
#Libraries
import math
from collections import Counter
from nltk import cluster
from scipy import spatial
import operator
import pandas as pd
import numpy as np 
import re

In [2]:
jdskill=pd.read_csv('JDToSkills.csv',encoding='latin-1')
jdskill

Unnamed: 0,jobTitle,company,desc,Skills,Disability
0,Data Scientist,Express Scripts,"position summary, the business analyst role is...","['a critical requirement', 'product aesthetics...","Spinal Cord Injury, Leprosoy Cured persons, Au..."
1,Data Scientist,Money Mart Financial Services,"what do we need?, you to have an amazing perso...","['fosters employee growth', 'segmentation', 'r...","Spinal Cord Injury, Leprosoy Cured persons, Au..."
2,Data Scientist,comScore,"validate, analyze, and conduct statistical ana...","['analytical software excel', 'sql', 'effectiv...","Spinal Cord Injury, Leprosoy Cured persons, Au..."
3,Graduate Studies Program - Data Scientist,Central Intelligence Agency,"full time, washington, dc metro area, starting...","['shape technology', 'advanced hardware', 'com...","Parkinson's disease, Multiple Sclerosis, Muscu..."
4,Data Scientist I,Federal Reserve Bank of Dallas,assist in consultations with business partners...,"['internal/external hr', 'hr dashboards', 'met...","Spinal Cord Injury, Schizophrenia"
...,...,...,...,...,...
1042,Accessibility Consultant,"Oracle, SAP Labs",Ensure that software developed meets the acces...,"['accessibility', 'of accessibility', 'quality...","Physical Disability,Low Vision,Hearing Impairm..."
1043,Customer Support Executive,IBM,Talk to customers and clients to fulfil given ...,"['customer handling', 'ms excel', 'reading', '...","Physical Disability,Low Vision,Hearing Impairm..."
1044,Customer Support Executive - Email Support,Allsec,Comprehend the customer's mail and respond acc...,"['computer skills','typing','ms excel','custom...","Physical Disability,Low Vision,Hearing Impairm..."
1045,Customer Support Executive - Grievance Redressal,Hyundai,"Understand customer complaints, capture missin...","['computer skills','typing','communication ski...","Physical Disability,Low Vision,Hearing Impairm..."


In [3]:
profileskill=pd.read_csv('ProfileToSkills.csv',encoding='cp1252')
profileskill

Unnamed: 0,name,disability,bio,Skills
0,Ratul Arora,Blindness,ratul arora is from delhi. he has blindness. h...,"['the accessibility', 'domain', 'a test engine..."
1,Aman Verma,Low Vision,"aman verma is from nalasopara, maharashtra. he...","['aman verma', 'an accessibility tester', 'an ..."
2,Sampa Gupta,Blindness,"sampa gupta is from mumbai, maharashtra. she h...","['a certified dhs', 'tester', 'accessibility s..."
3,Monu Varma,Blindness,monu varma is a 29-year-old visually impaired ...,"['computer applications', 'hcl technologies', ..."
4,Jayant Singh Raghav,Blindness,jayant singh raghav believes that the position...,"['time management', 'team leadership', 'a stro..."
...,...,...,...,...
1052,Resham Talwar,Blindness,reshams life has not been easy. she thinks tha...,"['braille', 'a born-to-be voice-over artist', ..."
1053,Sachin J Shetty,Muscular Dystrophy,sachin shetty belongs to the city of dreams an...,"['stories', 'a good listener', 'digital conten..."
1054,Chaitanya Purushottam Kayande,Blindness,chaitanya was born with complete blindness and...,"['a tourist guide', 'co-curricular skills', 'm..."
1055,Pulkit Sharma,Cerebral Palsy,pulkit sharma is a fun-loving person who is di...,"['a fun-loving person', 'voiceover', 'mimicry'..."


In [5]:
#function to clean data
def clean_text(raw):
    '''Case specific to be used with pandas apply method'''
    try:
        # remove carriage returns and new lines
        raw = raw.replace('\r', '')
        raw = raw.replace('\n', '')
        
        # brackets appear in all instances
        raw = raw.replace('[', '')
        raw = raw.replace(']', '')
        raw = raw.replace(')', '')
        raw = raw.replace('(', '')
        
        # removing html tags
        clean_html = re.compile('<.*?>')
        clean_text = re.sub(clean_html, ' ', raw)
        
        # removing duplicate whitespace in between words
        clean_text = re.sub(" +", " ", clean_text) 
        
        # stripping first and last white space 
        clean_text = clean_text.strip()
        
        # commas had multiple spaces before and after in each instance
        clean_text = re.sub(" , ", ", ", clean_text) 
        clean_text = re.sub(", ", ",", clean_text) 
        
        # eliminating the extra comma after a period
        clean_text = clean_text.replace('.,', '.')
        
        # using try and except due to Nan in the column
    except:
        clean_text = np.nan
        
    return clean_text

In [6]:
#get array of skills from String
def getSkillArray(arr):
    arrSplit=arr.split(",")
    newArr=[]
    for i in arrSplit:
        i=clean_text(i)
        newArr.append(i)    
    return newArr 

In [7]:
#Get jobs based on disability of the user
def returnDF(userProfile,df):
    getIndex=[]
    disability=userProfile['disability']
    for index, row in df.iterrows():
        if len(row['Disability'])>0:
            if disability in row['Disability']:
                getIndex.append(index)
    newTestDf=jdskill.iloc[getIndex]
    return newTestDf

In [8]:
#Recommendation Engine
class remommendationModel:
    def __init__(self, skills, df):
        self.dataframe = df
        self.skill_set = skills
        
    def recommendation_vectors(self):
        final_list = []
        #test1=self.skill_set
        skillArray1=getSkillArray(self.skill_set)
        counter1 = Counter(skillArray1)
        for index, row in self.dataframe.iterrows():
            if(len(row['Skills']) > 0):
                skillArray2=getSkillArray(row['Skills'])
                counter2 = Counter(skillArray2)
                all_items = set(counter1.keys()).union( set(counter2.keys()) )
                vector1 = [counter1[k] for k in all_items]
                vector2 = [counter2[k] for k in all_items]
                similarity = 1 - spatial.distance.cosine(vector1, vector2)
                if(similarity > 0):
                    #similarity = cluster.util.cosine_distance(vector1,vector2)
                    new_dict = [row['jobTitle'], row['company'], row['Skills'],row['Disability'],similarity]
                    final_list.append(new_dict)
        return final_list



In [9]:
#Loading user profile and getting jobs based on disability type of the user
userProfile=profileskill.iloc[515]#429,54
finalDF=returnDF(userProfile,jdskill)

In [10]:
#Getting top 5 relevant jobs based of Disability type and Skills of the user
model_class = remommendationModel(userProfile['Skills'], finalDF)
recommendation_df = model_class.recommendation_vectors()
recommendation_df = sorted(recommendation_df, key=lambda x: x[4], reverse=True)
filtered_profiles = pd.DataFrame(recommendation_df, columns=['jobTitle', 'company', 'Skills','Disability','similarity'])
relevent_profiles = filtered_profiles[['jobTitle', 'company', 'Skills','Disability']]
print("Recommendation for",userProfile['name'])
print("Skills for",userProfile['Skills'])
relevent_profiles.head(5)

Recommendation for Chain Singh Rajpurohit
Skills for ['customer handling', 'ms excel', 'good communication skills', 'hindi', 'rajasthani.chain singh', 'a quick learner', 'of arts', 'of customer handling bills', 'good communication skills', 'hindi', 'rajasthani.chain singh', 'quick learner', 'on new responsibilities', 'computer skills','domain knowledge','typing']


Unnamed: 0,jobTitle,company,Skills,Disability
0,Customer Support Executive,IBM,"['customer handling', 'ms excel', 'reading', '...","Physical Disability,Low Vision,Hearing Impairm..."
1,Customer Support Executive - Email Support,Allsec,"['computer skills','typing','ms excel','custom...","Physical Disability,Low Vision,Hearing Impairm..."
2,Customer Support (Chat),VMWare,"['computer skills',typing','domain knowledge',...","Physical Disability,Low Vision,Hearing Impairm..."
3,Customer Support Executive - Grievance Redressal,Hyundai,"['computer skills','typing','communication ski...","Physical Disability,Low Vision,Hearing Impairm..."
4,Data Entry Operator,ANZ Bank,"['data entry', 'content writing', 'ms office'o...","Low Vision,Hearing Impairment (Deaf and Hard o..."
