#### Importing the needed libraries

In [407]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
import re
from nltk.util import ngrams
import os
import docx2txt
from geopy.geocoders import Nominatim
from geotext import GeoText
from geopy.distance import great_circle

#### Skills data

In [408]:
#Reading skills csv file
skill_data=pd.read_csv("skills_lower1 - Sheet1.csv")

#Collecting skills
skill_list=[]
for i in range(2262):
    if(skill_data['skill_or_not'][i]=="skill"):
        skill_list.append(skill_data['essential:'][i])
                
#Converting skills data into unique list of skills
unique_skill_list=set(skill_list)
unique_skill_list=list(unique_skill_list)
unique_skill_list

['cad',
 'svn',
 'optimization techniques',
 'rdbms',
 'product management',
 'back-end development',
 'power pivot',
 'php',
 'firewall',
 'application development',
 'elastic',
 'adobe illustrator',
 'react',
 'hpalm',
 'pfmea',
 'image recognition systems',
 'ai',
 'machine learning',
 'ux',
 'ms sql',
 'koin',
 'gcp',
 'tableau',
 'neural networks',
 'tornado',
 'circleci',
 'jenkins',
 'spring',
 'non aws',
 'ios',
 'scala',
 'pl/sql',
 'sas',
 'zeplin',
 'microsoft excel',
 'process modelling',
 'artifactory',
 'supervised and unsupervised',
 'matlab',
 'modelling',
 'functional analysis',
 'ant',
 'mvp',
 'solution design',
 'sea',
 'vmware',
 'perl',
 'redshift',
 'c#',
 'hbase',
 'django',
 'quantitative analysis',
 'saas',
 'figma',
 'data regression analysis',
 'nodejs',
 'react js',
 'numpy',
 'sbt',
 'graph theory',
 'angularjs',
 'itil',
 'snowflake',
 'prototype design',
 'jax-rs',
 'abode suite',
 'agile',
 'html',
 'office 365',
 'wcf',
 'no sql',
 'scrum',
 'confluenc

#### Languages data

In [409]:
#Reading languages csv file
language_data=pd.read_csv("languages.csv")

#Collecting languages and converting to list
language_list=language_data['Language']
language_list=list(language_list)

#All the languages in the list are converted to lower case (normalization)
for i in range(len(language_list)):
    language_list[i]=language_list[i].lower()
language_list

['danish',
 'dutch',
 'french',
 'italian',
 'norwegian',
 'portuguese',
 'romanian',
 'spanish',
 'swedish',
 'german',
 'haitian creole',
 'indonesian',
 'malay',
 'swahili',
 'albanian',
 'amharic',
 'armenian',
 'azerbaijani',
 'bengali',
 'bulgarian',
 'burmese',
 'czech',
 'dari',
 'estonian',
 'farsi',
 'finnish',
 'georgian',
 'greek',
 'gujarati',
 'hausa',
 'hebrew',
 'hindi',
 'hungarian',
 'icelandic',
 'kazakh',
 'khmer',
 'kurdish',
 'kyrgyz',
 'lao',
 'latvian',
 'lithuanian',
 'macedonian',
 'mongolian',
 'nepali',
 'pashto',
 'polish',
 'russian',
 'serbo-croatian',
 'sinhala',
 'slovak',
 'slovenian',
 'somali',
 'tagalog',
 'tajiki',
 'tamil',
 'telugu',
 'thai',
 'tibetan',
 'turkish',
 'turkmen',
 'ukranian',
 'urdu',
 'uzbek',
 'vietnamese',
 'arabic',
 'chinese - cantonese',
 'chinese - mandarin',
 'japanese',
 'korean']

#### Reading the data of different names of the skills

In [410]:
#Reading the csv file of different namings for the skills
diff_naming=pd.read_csv("skill_naming1.csv")

#Converted into a dataframe
diff_naming_df=pd.DataFrame(diff_naming)

#The incorrect naming is stored in wrong 
wrong=diff_naming_df['wrong']

#The proper naming is stored in correct for the corresponding skills
correct=diff_naming_df['correct']

#### Reading the Job information Data

In [411]:
#Reading the job information file scrapped from linkedin
job_information_data = pd.read_csv("Data jobs.csv")
job_information_dataframe=pd.DataFrame(job_information_data)

#Can enter a particular row for the corresponding job informations 
single_company_data=job_information_dataframe.iloc[1637]

#Job Description
job_description_data=single_company_data['Description']

#Location of the company
company_location=single_company_data['Location']

#Industry type
industry=single_company_data['Industry']

#Seniority level
level=single_company_data['Level']

#job_title
title=single_company_data['Post']
print(title)

Lead Data Scientist


#### Text processing

In [412]:
def processing(data):
    
    #Removing punctuations from the text
    cleaned_data = re.sub(r'[,.;'':@#?!&$()/|]', ' ', data)
    
    #tokenization of the text
    tokenized_data = nltk.word_tokenize(cleaned_data)
    
    #Removing Stop words
    filtered_words = [word for word in tokenized_data if word not in stopwords.words('english')]
    
    #All the unigram words are converted to lower case
    for i in range(len(filtered_words)):
        filtered_words[i] = filtered_words[i]. lower()
        
    final_words=list()   
    for i in filtered_words:    
        final_words.append(i)
        
    #Bigram of words
    bigram_data=list(nltk.bigrams(tokenized_data))
    for i in bigram_data:
        test_string=''
        test_string=' '.join(i)
        final_words.append(test_string)

    #Trigram of words
    trigram_data=list(nltk.trigrams(tokenized_data))
    for i in trigram_data:
        test_string=''
        test_string=' '.join(i)
        final_words.append(test_string)

    #Fourgram of words
    fourgram_data=list(nltk.ngrams(tokenized_data,4))
    for i in fourgram_data:
        test_string=''
        test_string=' '.join(i)
        final_words.append(test_string)
    
    #All words are converted to lower case
    for i in range(len(final_words)):
        final_words[i] = final_words[i]. lower()
    return final_words

#### Finding different types of analytics 

In [413]:
def diff_types_analytics(final_words):
    
    #Collects the previous word of "analytics" in the list of words
    string=''
    for i in range(len(final_words)):
        if(final_words[i]=="analytics"):
            string=string+' '+(str(final_words[i-1]))
    
    #tokenization
    text = nltk.word_tokenize(string)
                              
    #POS_tagging
    #Tells for all the words, whether it is verb, noun, adjective, etc
    pos_tagged_text=nltk.pos_tag(text)
    tagged_words = nltk.ConditionalFreqDist((tag, word) for (word, tag) in pos_tagged_text)
    
    #Mentioned the types of noun taggings
    noun=['NN','NN$','NN$-HL','NN$-TL','NN-HL','NN-NC','NN-TL','NN-TL-HL','NNS','NNS$','NNS$-HL','NNS$-TL','NNS-HL','NNS-TL','NNS-TL-HL']
    
    #Collects the previous words of 'analytics' which are noun
    final_analytics=[]
    for i in range(len(noun)):
        for key in tagged_words[noun[i]].keys():
            final_analytics.append(key)
    
    #Appending those noun words like 'data', 'predictive' with 'analytics' ==> 'data analytics', 'predictive analytics'
    for i in range(len(final_analytics)):
        final_analytics[i]=str(final_analytics[i])+' '+"analytics"
    return final_analytics

#### Collecting languages from a list of words

In [414]:
def languages(final_words):
    #from the list of words, collects the languages
    language=[]
    for i in range(len(final_words)):
        for j in range(len(language_list)):
            if(final_words[i]==language_list[j]):
                language.append(language_list[j])
                
    #converting it into unique list
    language=set(language)
    language=list(language)
    #if(len(language)==0):
    #    return "No languages mentioned"
    #else:
    return language

#### Finding difference between R language and R&D

In [415]:
def diffbet_Rlang_RndD(final_words):
    r_count=0
    rd_count=0
    for i in range(len(final_words)):
        if(final_words[i]=="r"):
            r_count=r_count+1
            if(i==len(final_words)-1):
                break;
            elif(final_words[i+1]=='d'):
                rd_count=rd_count+1
    if(rd_count>0):
        if(r_count==rd_count):
            final_words.remove("r")
    return final_words

#### Collecting list of skills from the list of words

In [416]:
def matching_skill_list(concluded_words,unique_skill_list):
    matching_skills=[]
    for i in range(len(concluded_words)):
        for j in range(len(unique_skill_list)):
            if(concluded_words[i]==unique_skill_list[j]):
                matching_skills.append(concluded_words[i])
    
    matching_skills=set(matching_skills)
    matching_skills=list(matching_skills)
    return matching_skills

#### Removing the incorrectly mentioned skill and appending the skill list with correct name

In [417]:
def remove_wrong_namings(matching_skills,wrong,correct):
    for i in range(len(matching_skills)):
        for j in range(len(wrong)):
            if(matching_skills[i]==wrong[j]):
                matching_skills.remove(wrong[j])
                matching_skills.append(correct[j])
    
    matching_skills=set(matching_skills)
    matching_skills=list(matching_skills)
    return matching_skills

#### Finding difference between 'analytics' and different types of analytics like 'data analytics'

In [418]:
def processing_skills(matching_skills,corrected_skills,final_analytics):
    diff_analytics_count=0
    analytics_count=0
    for i in range(len(matching_skills)):
        if(matching_skills[i]=="analytics"):
            analytics_count=analytics_count+1
        for j in range(len(final_analytics)):
            if(matching_skills[i]==final_analytics[j]):
                diff_analytics_count=diff_analytics_count+1
    if(diff_analytics_count>0):
        if(analytics_count==diff_analytics_count):
            corrected_skills.remove("analytics")
    return corrected_skills

#### Concluding informations from job data by calling the above functions

In [419]:
#calling the above functions
words_job_description=processing(job_description_data)

final_analytics_jd=diff_types_analytics(words_job_description)    
final_languages_jd=languages(words_job_description)
concluded_words_jd=diffbet_Rlang_RndD(words_job_description)

matching_skills_jd=matching_skill_list(concluded_words_jd,unique_skill_list)
corrected_skills_jd=remove_wrong_namings(matching_skills_jd,wrong,correct)
processed_skills_jd=processing_skills(matching_skills_jd,corrected_skills_jd,final_analytics_jd)
company_score=3
company_score=company_score+len(final_languages_jd)
company_score=company_score+len(processed_skills_jd)
print(company_score)

18


#### Industry Type 

In [420]:
#reading industries names file
names_data=pd.read_csv("industries1.csv")
names_dataframe=pd.DataFrame(names_data)
one=names_dataframe['one']
two=names_dataframe['two']
three=names_dataframe['three']
four=names_dataframe['four']

In [421]:
#making changes in the words which are wrongly displayed
#eg.: SoftwareStaffingRecruiting => Software Staffing Recruting
ind=[]

string=''
for j in industry:
    if(j==j.lower()):
        string=string+j
    if(j==j.upper()):
        string = string+" "+j
ind.append(string)

for i in range(len(ind)):
    if("& &" in ind[i]):
        ind[i]=ind[i].replace(" & ",' ')
    if(", ," in ind[i]):
        ind[i]=ind[i].replace(", ,",' ')
    if("/" in ind[i]):
        ind[i]=ind[i].replace("/",' ')
print(ind)

[' Financial    Services Insurance']


In [422]:
#tokenization        
for i in range(len(ind)):
    test=nltk.word_tokenize(ind[i])

    #finds the industry type of the company    
    if(len(test)==1):
        for l in range(len(one)):
            if(test[0]==one[l]):
                found_industry=str(test[0]).lower()

    elif(len(test)==2):
        for l in range(len(one)):
            if(test[0]==one[l] and test[1]==two[l]):
                found_industry=str(test[0]+' '+test[1]).lower()
            elif(test[1]==one[l] and two[l]==' '):
                found_industry=str(test[1]).lower()

    elif(len(test)==3):
        for l in range(len(one)):
            if(test[0]==one[l] and test[1]==two[l] and test[2]==three[l]):
                found_industry=str(test[0]+' '+test[1]+' '+test[2]).lower()
            elif(test[1]==one[l] and test[2]==two[l]):
                found_industry=str(test[1]+' '+test[2]).lower()
            elif(test[2]==one[l]):
                found_industry=str(test[2]).lower()

    elif(len(test)==0):
        continue;

    else:
        ind1=[]
        for j in range(len(test),len(test)-4,-1):
            ind1.append(test[j-1])
        ind1=ind1[::-1]
        for l in range(len(one)):
            if(ind1[0]==one[l] and ind1[1]==two[l] and ind1[2]==three[l] and ind1[3]==four[l]):
                found_industry=str(ind1[0]+' '+ind1[1]+' '+ind1[2]+' '+ind1[3]).lower()
            elif(ind1[1]==one[l] and ind1[2]==two[l] and ind1[3]==three[l]):
                found_industry=str(ind1[1]+' '+ind1[2]+' '+ind1[3]).lower()
            elif(ind1[2]==one[l] and ind1[3]==two[l]):
                found_industry=str(ind1[2]+' '+ind1[3]).lower()
            elif(ind1[3]==one[l]):
                found_industry=str(ind1[3]).lower()


#### Outputting informations from the resumes

In [424]:
#skills and languages are collected from the job description
print(single_company_data)
print("\nRequired languages:")
print(final_languages_jd)
print("\nRequired skills:")
print(processed_skills_jd)
print("\n")

score=[]
industries3=[]
languages3=[]
locations3=[]
years3=[]
candidate_skills=[]
candidate={}
a,b,c,p,z=0,0,0,0,0
name=[]

cities_data=pd.read_csv("worldcities.csv")
cities=cities_data['city_ascii']
for i in range(len(cities)):
    cities[i]=cities[i].lower()
#print(cities)

#Reading all the resumes from the 'resumes' folder
entries = os.listdir('resumes')

#for loop for each resume
for entry in entries:
    
    #converts docs file to text
    resume_data = docx2txt.process(entry)
    
    #name of the candidate is appended in the list 'name'
    name.append(entry)
    
    #calling the above functions
    words_resume=processing(resume_data)
    
    final_analytics_resume=diff_types_analytics(words_resume)
    final_languages_resume=languages(words_resume)
    concluded_words_resume=diffbet_Rlang_RndD(words_resume)        
    
    matching_skills_resume=matching_skill_list(concluded_words_resume,unique_skill_list)
    corrected_skills_resume=remove_wrong_namings(matching_skills_resume,wrong,correct)
    processed_skills_resume=processing_skills(matching_skills_resume,corrected_skills_resume,final_analytics_resume)
    
    #skills are collected from the resume
    candidate_skills.append(processed_skills_resume)
    candidate[name[a]]=processed_skills_resume
    a=a+1
    
    places2=[]
    #location of the candidate
    for i in range(len(cities)):
        for j in range(len(words_resume)):
            if(cities[i]==words_resume[j]):
                places2.append(words_resume[j])
                print(places2)
                if(len(places2)==0):
                    print("goto")
                    continue;
                else:
                    break;
                break;
            else: 
                continue; 
            break;
        else: 
            continue; 
        break;
    candidate_location=places2[0]
    candidate_location=str(candidate_location)
    print("Candidate location:",candidate_location)
    
    places=[]
    places.append(company_location)
    places.append(candidate_location)
    latitude=[]
    longitude=[]
      
    #finding the distance between the candidate location and the company's location
    geolocator = Nominatim(user_agent="http")
    for i in range(len(places)):
        locate = geolocator.geocode(places[i])
        latitude.append(locate.latitude)
        longitude.append(locate.longitude)
           
    first = (latitude[0], longitude[0])
    second = (latitude[1], longitude[1])
    print("Distance in km:")
    print(name[b],":",great_circle(first, second).km)
    location_string=candidate_location+' '+str(great_circle(first, second).km)
    scores=0
    if(int(great_circle(first, second).km)<=50):
        scores=scores+1
    elif(int(great_circle(first, second).km)>50):
        scores=scores
    print(scores)
    score.append(scores)
    print(score)
    b=b+1   
    locations3.append(location_string)
    
    #collects the languages known by the candidate
    #if(len(final_languages_resume)==0):
    #    print("No languages mentioned")
    #else:
    print("Languages known :",final_languages_resume)    
    
    #finds the matching languages with the job description and the candidate's resume
    matching_languages=[]
    for i in range(len(final_languages_jd)):
        for j in range(len(final_languages_resume)):
            if(final_languages_jd[i]==final_languages_resume[j]):
                matching_languages.append(final_languages_jd[i])
    print("Matching languages : ")
    print(name[c],":",matching_languages)
    
    language_string=''
    for lang in range(len(matching_languages)):
        language_string=language_string+' '+str(matching_languages[lang])
    print(len(matching_languages))
    score[c]=score[c]+len(matching_languages)
    print(score)
    c=c+1
    if language_string=='':
        language_string="none"
    languages3.append(language_string)
    
    #industry type matching
    industry_score=0
    industri=' '
    for i in range(len(words_resume)):
        if(words_resume[i]==found_industry):
            industry_score=1
            industri=found_industry
            #print(industri)
            
    if(industri==' '):
        industri="industry not found"
        industry_score=0
        
    if(industry_score==1):
        score[z]=score[z]+1
    elif(industry_score==0):
        score[z]=score[z]
    print(industry_score)
    print(score)
    z=z+1
    industries3.append(industri)
    
    
    #level of experience matching
    years=[]
    for j in range(len(words_resume)):
        if(words_resume[j]=='years'):
            try:
                years.append(int(words_resume[j-1]))
            except:
                years.append(0)
                
    years1=[]
    
    if(len(years)>0):
        for y in range(len(years)):
            if(years[y]<=20):
                years1.append(years[y])
        
        if(len(years1)>0):           
            maxx=years1[0]
            for i in range(0, len(years1)):        
                if(years1[i] > maxx):    
                    maxx = years1[i] 
            #year = sum(filter(lambda m: isinstance(m, int), years))
            year=maxx
            #year=int(''.join(list(filter(lambda c: c.isdigit(), year))))
            print("Experience level :",year)

        else:
            year=0
            print("Experience level : Not mentioned")
    else:
        year=0
        print("Experience level : Not mentioned")
        
    
    # Job position's needed level        candidate's years of experience  
    #-------------------------------------------------------------------
    # Not Applicable                =>   greater than or equal to 0
    # Entry level or Associate      =>   greater than or equal to 0
    # Mid-Senior level              =>   greater than 3
    # Executive                     =>   greater than 5
    # Director                      =>   greater than 7
    
    if(type(level)==str):
        if(level=="Not Applicable"):
            if(year>=0):
                print("level matches")
                level_score=1

        elif(level=="Entry level" or level=="Associate"):
            if(3>year>=0):
                print("level matches")
                level_score=1
            else:
                print("level does not match")
                level_score=0

        elif(level=="Mid-Senior level"):
            if(5>=year>=3):
                print("level matches")
                level_score=1
            else:
                print("level does not match")
                level_score=0

        elif(level=="Executive"):
            if(7>=year>=5):
                print("level matches")
                level_score=1
            else:
                print("level does not match")  
                level_score=0

        elif(level=="Director"):
            if(year>=7):
                print("level matches")
                level_score=1
            else:
                print("level does not match") 
                level_score=0
        #print("\n")
        
    elif(type(level)==int):
        if(level==' '):
            if(year>=0):
                print("level matches")
                level_score=1

        elif(3>int(level)>=0):
            if(3>year>=0):
                print("level matches")
                level_score=1
            else:
                print("level does not match")
                level_score=0

        elif(5>=int(level)>=3):
            if(5>=year>=3):
                print("level matches")
                level_score=1
            else:
                print("level does not match")
                level_score=0

        elif(7>=int(level)>=5):
            if(7>=year>=5):
                print("level matches")
                level_score=1
            else:
                print("level does not match")  
                level_score=0

        elif(int(level)>=7):
            if(year>=7):
                print("level matches")
                level_score=1
            else:
                print("level does not match") 
                level_score=0
                
    if(level_score==1):
        level_string="level matches"
    elif(level_score==0):
        level_string="level not matches"       
    years3.append(level_string)  
    print(level_score)
    score[p]=score[p]+level_score
    print(score)
    p=p+1
    print("\n")
print(score)        
print(industries3)
print(locations3)
print(languages3)
print(years3)

Job ID                                                     1.9976e+09
Date                                                       18-08-2020
Company Name                                                   Hiscox
Post                                              Lead Data Scientist
Location                              London, England, United Kingdom
No.of Applicants                                                   25
Description         Job Description  About the Group Claims Analyt...
Level                                                  Not Applicable
Type                                                        Full-time
Function                                                        Other
Industry                                  Financial ServicesInsurance
Link                                                                 
Review                                                               
Name: 1637, dtype: object

Required languages:
[]

Required skills:
['statistics', 'python

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


['manila']
Candidate location: manila
Distance in km:
2020_CV_Ronn_Kevin_Santos.docx : 10735.678852673145
0
[0]
Languages known : []
Matching languages : 
2020_CV_Ronn_Kevin_Santos.docx : []
0
[0]
0
[0]
Experience level : Not mentioned
level matches
1
[1]


['dubai']
Candidate location: dubai
Distance in km:
Aaditya_CV.docx : 5478.478415221937
0
[1, 0]
Languages known : []
Matching languages : 
Aaditya_CV.docx : []
0
[1, 0]
0
[1, 0]
Experience level : Not mentioned
level matches
1
[1, 1]


['dubai']
Candidate location: dubai
Distance in km:
Abdelrahman-CV-N.docx : 5478.478415221937
0
[1, 1, 0]
Languages known : ['arabic']
Matching languages : 
Abdelrahman-CV-N.docx : []
0
[1, 1, 0]
0
[1, 1, 0]
Experience level : Not mentioned
level matches
1
[1, 1, 1]


['dubai']
Candidate location: dubai
Distance in km:
Abdullah_Alattar_2020.docx : 5478.478415221937
0
[1, 1, 1, 0]
Languages known : ['arabic']
Matching languages : 
Abdullah_Alattar_2020.docx : []
0
[1, 1, 1, 0]
0
[1, 1, 1, 0]
Experienc

['atlanta']
Candidate location: atlanta
Distance in km:
Balgopal_Sabat CV.docx : 6770.1074696358255
0
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 3, 0]
Languages known : ['hindi']
Matching languages : 
Balgopal_Sabat CV.docx : []
0
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 3, 0]
0
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 3, 0]
Experience level : Not mentioned
level matches
1
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 3, 1]


['kano']
Candidate location: kano
Distance in km:
Bamidele Olanrewaju Ajayi-converted.docx : 259.0420608764735
0
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 3, 1, 0]
Languages known : []
Matching languages : 
Bamidele Olanrewaju Ajayi-converted.docx : []
0
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 3, 1, 0]
0
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 3, 1, 0]
Experience level : Not mentioned
level matches
1
[1, 1, 1, 1

['enterprise']
Candidate location: enterprise
Distance in km:
Dheemantha Wijesinghe - CV.docx : 7059.359483858228
0
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 0]
Languages known : []
Matching languages : 
Dheemantha Wijesinghe - CV.docx : []
0
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 0]
0
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 0]
Experience level : 3
level matches
1
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1]


['dubai']
Candidate location: dubai
Distance in km:
DIALA ALMALIK CV.docx : 5478.478415221937
0
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 0]
Languages known : ['arabic']
Matching languages : 
DIALA ALMALIK CV.docx : []
0
[1, 1, 1, 1, 1, 1, 1, 

['lahore']
Candidate location: lahore
Distance in km:
Resume Sajjad Tahir Nov 2019.docx : 6282.352078939211
0
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 0]
Languages known : []
Matching languages : 
Resume Sajjad Tahir Nov 2019.docx : []
0
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 0]
0
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 0]
Experience level : 9
level matches
1
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1]


['hyderabad']
Candidate location: hyderabad
Distance in km:
Shireen-CV-2020.docx : 7719.7640521729845
0
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 3, 1,

#### Matching skills with the job description and the candidate's resume

In [425]:
final_rank={}
matching={}
def fun(candidate,comp,st_n):

    set1=set(candidate)
    set2=set(comp)
    
    set3=set1.intersection(set2)
    
    final_rank.update({st_n:len(set3)})

    matching.update({st_n:set3})
    
for i,j in candidate.items():
    fun(processed_skills_jd,j,i)
     
#Final ranking tells the count of the matching skills 
#print("\nFinal ranking : " ,final_rank)
print("Final ranking : \n")
i=0
for key, value in final_rank.items():
    print(key, ' : ', value)
    score[i]=score[i]+value
    i=i+1

#Matching skills
#print('Matching skills : ' , matching)
print('\nMatching skills : \n')
for key1, value1 in matching.items():
    print(key1, ' : ', value1)
print(score)

Final ranking : 

2020_CV_Ronn_Kevin_Santos.docx  :  3
Aaditya_CV.docx  :  6
Abdelrahman-CV-N.docx  :  2
Abdullah_Alattar_2020.docx  :  5
Abukersh_Jun2020.docx  :  3
AdityaM.docx  :  5
Afra Yaqoob CV.docx  :  2
Agnel Mamachan CV & Cover Letter.docx  :  6
Ahmed El Chafei Resume.docx  :  0
Ahmed Hassan-CV-Resume-August 2020.docx  :  4
Ahmed Nurullah_BI and Data Lead.docx  :  10
Ahmed_Abdelkader_CV-2.docx  :  0
Aizaz CV 2.01 (4).docx  :  7
ajmal_resume.docx  :  3
Alia_cv_final.docx  :  1
Alma-Resume.docx  :  4
Amine CV-9.docx  :  3
Antonio Bastidas_Resume_Aug20.docx  :  10
Ashley_Choy_CV.docx  :  8
Atif_Ahmad_CV.docx  :  9
Ayesha Cv -.docx  :  6
Bachir Barry Data Science CV.docx  :  7
Balgopal_Sabat CV.docx  :  5
Bamidele Olanrewaju Ajayi-converted.docx  :  1
Beatriz Manzano CV.docx  :  7
BI Developer- Anupam Parti.docx  :  6
Bushra's Resume.docx  :  4
CV - DA.docx  :  8
CV - Shalaka Kumar (1).docx  :  2
CV-2020 Mohammed Al Balushi.docx  :  0
CV-John-Richard-Gonzales-n.docx  :  2
CV-Moham

In [427]:
names1=[]
for key1, value1 in matching.items():
    names1.append(key1)
#print(names1)
ski1=[]
for key1, value1 in matching.items():
    ski1.append(value1)
for i in range(len(ski1)):
    ski1[i]=list(ski1[i])
print(ski1)

[['python', 'sql', 'data analytics'], ['python', 'data analytics', 'machine learning', 'azure', 'cloud', 'r'], ['python', 'data analytics'], ['python', 'problem solving', 'data analytics', 'machine learning', 'data visualization'], ['problem solving', 'analytics', 'data analytics'], ['python', 'bi', 'data analytics', 'machine learning', 'data visualization'], ['python', 'sql'], ['python', 'problem solving', 'data analytics', 'machine learning', 'analytics', 'sql'], [], ['r', 'problem solving', 'python', 'data analytics'], ['statistics', 'python', 'bi', 'data analytics', 'go', 'business intelligence', 'azure', 'cloud', 'data visualization', 'sql'], [], ['statistics', 'machine learning', 'analytics', 'cloud', 'google cloud', 'data visualization', 'sql'], ['analytics', 'sql', 'data analytics'], ['analytics'], ['python', 'sql', 'data analytics', 'bi'], ['python', 'data analytics', 'machine learning'], ['statistics', 'python', 'bi', 'data analytics', 'business intelligence', 'machine learni

In [436]:
zipped = list(zip(*sorted(zip(score,industries3,locations3,languages3,years3,ski1,names1))))
score10,industries10,locations10,languages10,years10,ski10,names10 = [ list(tuple) for tuple in zipped]

score10.reverse()
industries10.reverse()
locations10.reverse()
languages10.reverse()
years10.reverse()
ski10.reverse()
names10.reverse()
print(score10,industries10,locations10,languages10,years10,ski10,names10)

[14, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 2, 2, 1, 1, 1] ['industry not found', 'industry not found', 'industry not found', 'insurance', 'insurance', 'industry not found', 'industry not found', 'insurance', 'industry not found', 'industry not found', 'industry not found', 'insurance', 'industry not found', 'industry not found', 'industry not found', 'industry not found', 'industry not found', 'industry not found', 'industry not found', 'industry not found', 'industry not found', 'industry not found', 'insurance', 'insurance', 'industry not found', 'industry not found', 'industry not found', 'industry not found', 'insurance', 'industry not found', 'industry not found', 'industry not found', 'industry not found', 'industry not found', 'industry not found', 'industry not found', 'industry not found', 'industry not found', 'industry not found', 'insurance', 'insurance', 'industry not found'

In [437]:
companies2=[]
companies2.append(title)
companies2.append(1639)
companies2.append(found_industry)
companies2.append(final_languages_jd)
companies2.append(company_location)
companies2.append(level)
companies2.append(processed_skills_jd)
comp_len=len(companies2)
for i in range(len(scores2)-comp_len):
    companies2.append(' ')
#print(companies2)

properties2=[]
properties2.append(company_score)
for i in range(len(scores2)-1):
    properties2.append(' ')
#print(properties2)

In [438]:
resumes3=pd.DataFrame({'Company':companies2,
                      'Property':properties2,
                      'Names':names10,
                      'Score':score10,
                      'Industry':industries10,
                      'Language':languages10,
                      'Location':locations10,
                      'Experience level':years10,
                      'Skills':ski10})
      
#storing in csv file
resumes3.to_csv("Output3.csv", mode='a', header=False, index=0)