# Web Scrapping

In [716]:
from bs4 import BeautifulSoup
import requests
import os
import PyPDF2 

In [717]:
def get_job_links(url_search,url_base):
    """ 
    Workopolis
    Return a list of links to jobs returned from the url_search link
    """
    # use requests to get the search url contents, convert to a soup object
    r = requests.get(url_search)
    soup = BeautifulSoup(r.content)
    
    # get pagination
    pages = get_job_pages(url_search, url_base, soup)
    
    job_links = []
    for page in pages:
        r = requests.get(page)
        soup = BeautifulSoup(r.content)
        
        # extract the links from the job titles
        jobs = soup.find_all("a",{"class":"SerpJob-titleLink"})    
        
        # get jobs from each page
        for job in jobs:
            link = f"{url_base}{job.get('href')}"
            job_links.append(link)
            
    return set(job_links)

In [718]:
def get_job_pages(url_search,url_base,soup):
    """ 
    Workopolis
    Return a list of links to jobs returned from the url_search link
    """
    
    # extract the links from the job titles
    pages = soup.find_all("a",{"class":"Pagination-link"})    
    
    # extract page links
    page_links = [url_search]
    for page in pages:
        p = page.get('href')
        page_links.append(f"{url_base}{p}")
        
    return set(page_links)

In [719]:
def get_job_details(urls):
    """
    Workopolis
    """
    print("Scrapping RSS Job Search Feed")
    print("="*100)
    job_detail = []
    for url in urls:
        try:
            # get url content, convert to soup
            r = requests.get(url)
            soup = BeautifulSoup(r.content)

            # extract content
            title = soup.find("div",{'class':'ViewJobHeaderTitle'}).text
            company = soup.find("div",{'class':'ViewJobHeaderCompany'}).text
            location = soup.find("span",{'class':'ViewJobHeaderPropertiesLocation'}).text
            desc = soup.find("div",{'class':'viewjob-description'}).text


            # extract key words (skills, education, benefits)
            keys = soup.find_all('li',{'class':'nav-item'})
            key_words = [w.text for w in keys]

            # build result-set
            job_detail.append(
                {'title':title,'company':company,'location':location,'desc':desc,'keywords':key_words}
            )
            # print progress
            print(f"{title[:18]:{18}}| {company[:15]:{15}}| {location[:7]:{7}} | {key_words[:3]}...")
        except Exception as e:
            print(f"   =>Skipping Job...{e}")
            continue
    print("="*100)
    return job_detail

In [859]:
def get_linkedin_jobs(urls):
    """
    LinkedIn
    Parse linkedin job link and extract the description and job title
    """
    
    start_text = 'Submit\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n \n\n\n\n\n\n\n\n\n\n\n\n\n'
    stop_text = 'Show more\n\n'
    
    print("Scrapping RSS Job Search Feed")
    print("="*100)
    jobs = []
    for url in urls:
        try:
            # get content
            r = requests.get(url)
            soup = BeautifulSoup(r.content)
            jd = soup.text

            # parse the text
            start = jd.find(start_text)
            stop = jd.find(stop_text)
            desc = jd[start:stop]
            desc = desc.replace(start_text,'').replace(stop_text,'').strip()

            # get the job title
            title = soup.find('h1').text

            jobs.append({'title':title,'company':'unknown','location':'unknown','desc':desc,'keywords':[]})

            # print progress
            print(f"{title[:30]:{30}}| {desc[:60]}...")
        except Exception as e:
            print(f"Error scrapping LinkedIn:{e}")
    return jobs
        

# NLP

In [139]:
# Sumy
import sumy
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lex_rank import LexRankSummarizer
from sumy.summarizers.text_rank import TextRankSummarizer
from sumy.summarizers.lsa import LsaSummarizer
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words
import nltk # required for sumy
nltk.download('punkt') # required for sumy

# spacy
import spacy
from spacy import displacy
NLP = spacy.load('en_core_web_lg')
from spacy.matcher import PhraseMatcher
# from spacy.matcher import Matcher

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


from tabulate import tabulate
from collections import Counter
import docx2txt
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

[nltk_data] Downloading package punkt to /home/ken/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


# Resume Scrapping

In [634]:
def get_resume_text(path,print_text=True):
    """ 
    Read resume text content from file (pdf or docx)
    Params:
        path: string- path to the resume file
        print_text: boolean.  Prints the text
    Returns: A text object with the resume file content
    """
    
    # open file and get file extension
    file = open(path,'rb')
    file_name, file_ext = os.path.splitext(path)
    
    # extract text content
    if file_ext=='.pdf':
        # read pdf
        pdf = PyPDF2.PdfFileReader(file)
        
        text = """"""
        for page_num in range(pdf.numPages):
            page = pdf.getPage(page_num)
            content = page.extract_text()
            text += content
    elif file_ext=='.docx':
        text = docx2txt.process(file)
    elif file_ext=='.txt':
        text = str(file.read())
        
    if print_text:
        print(text)

    file.close()
    return text

In [8]:
def show_pos(text):    
    """Extracts POS from a spacy document and prints the results"""
    d=[]
    for token in doc:
        d.append({
            "Token":token.text,
            "POS":token.pos_,
            "POS-Desc":spacy.explain(token.pos_),
            "Lemma":token.lemma_,
            "Stop-Word":token.is_stop,
            "POS-Detail":token.tag_,
            "POS-Detail-Desc":spacy.explain(token.tag_)
            }
        )
    print(tabulate(d,headers="keys",tablefmt="github"))

In [9]:
def show_entities(doc,display=True):
    """Print a list of named entities and their descriptions"""
    d=[]
    if doc.ents:
        for entity in doc.ents:
            d.append({
                "Word":entity,
                "Entity":entity.label_,
                "Start":entity.start,
                "Stop":entity.end,
                "Entity-Desc":spacy.explain(entity.label_),
                }
            )
        if display:
            print(tabulate(d,headers="keys",tablefmt="github"))
            print("\n")
            displacy.render(doc,style='ent',jupyter=True)
            print("\n")
    else:
        print("no entities found")
    return d   

In [10]:
def attr_counts(doc,att):
    """Return a table of counts by attributes"""
    if att.lower() == 'pos':
        cnts = doc.count_by(spacy.attrs.POS)
    elif att.lower() =='tag':
        cnts = doc.count_by(spacy.attrs.TAG)
    elif att.lower() =='lemma':
        cnts = doc.count_by(spacy.attrs.LEMMA)
    elif att.lower() =='entity':
        cnts = doc.count_by(spacy.attrs.ENT_TYPE)
    elif att.lower() =='email':
        cnts = doc.count_by(spacy.attrs.LIKE_EMAIL)
    elif att.lower() =='url':
        cnts = doc.count_by(spacy.attrs.LIKE_URL)
    elif att.lower() =='currency':
        cnts = doc.count_by(spacy.attrs.IS_CURRENCY)
    elif att.lower() =='numbers':
        cnts = doc.count_by(spacy.attrs.IS_DIGIT)
        
    # lookup attribute name
    d = {doc.vocab[k].text:v for k,v in cnts.items()}
    return d

### Generate Text Summary

In [502]:
def get_text_summary(text,text_source,summarizer,num_sentences,print_summary=True):
    """
    Generate a text summary from a document (extractive)
    Params:
        text: plain text or path to a document
        text_source: text or file
        summarizer: text-summarization method. lex-rank, text-rank,lsa
        num_sentences: Number of summary sentences to generate
        print_summary: T/F
    Returns:
        A list of sentences.
    """
    # Create parser from text-source
    if text_source=='text':
        parser = PlaintextParser.from_string(text,Tokenizer('english'))
    elif text_source=='file':
        parser = PlaintextParser.from_file(text,Tokenizer('english'))
    else:
        raise Exception(f"text source must be 'text' or 'file'!")
        
    # Select summarizer method
    if summarizer=='lex-rank':
        # Lex Rank:  A graph-based summarization method that uses keyword extractions
        s = LexRankSummarizer()
    elif summarizer=='text-rank':
        # Text rank: A graph-based summarization method that uses keyword extractions
        s = TextRankSummarizer()
    elif summarizer=='lsa':
        # Latent semantic analysis 
        # Combines term frequency with singular value decomposition
        s = LsaSummarizer(Stemmer("english"))
        s.stop_words = get_stop_words("english")
    else:
        raise Exception(f"Summarizer must be 'lex-rank','luhn','text-rank' or 'lsa'!")
    
    # create summary
    summary = s(parser.document,num_sentences)
    
    # print summary
    text = ""
    if print_summary:
        print(f'Text Summary:Top {num_sentences} sentences using a {summarizer} summarizer')
        print('='*100)
        for sentence in summary:
            text += " "+ str(sentence)
            print(sentence)
        print('='*100,'\n')
    return text

### Keyword Analysis

In [12]:
def get_terms(path):
    """
    Read a csv of data science terms & return themn as a list
    Params: path -string
    Returns: A list of terms
    """
    with open(path,'r') as f:
        lst = [line.lower().strip() for line in f.readlines()]
    return set(lst)

In [None]:
def extract_keywords(text,num, print_words=True):
    """
    Extracts keywords from a text object.  Ignores common job positng phrases
    Params:
        text: string text object
        num: integer.  The number of keywords to extract
        print_words: boolean. Prints the output
    Returns: A list of the top keywords in the text object
    """
    words = []
    pos = ['NOUN','PROPN']
    common_job_keys = ['experience','job','ability','posting','process','date','stakeholders','skills','solutions','project']
    tokens = NLP(text.lower())
    
    for token in tokens:
        if (token.text not in NLP.Defaults.stop_words and token.text.lower() not in common_job_keys and token.pos_ in pos):
            words.append(token.text)
            
    # get the top keywords by frequency
    keyword_frequency = [word for word in Counter(words).most_common(num)]
    keywords = [word[0] for word in keyword_frequency]
    
    if print_words:
        print(keyword_frequency)
    return keywords            

In [467]:
# extract keywords from the job descriptions
keys = []
for job in job_details:
    keys.extend(extract_keywords(job['desc'],50,False))
keys = set(keys)

In [466]:
# extract keywords from the job keywords
keys = []
for job in job_details:
    keys.extend(job['keywords'])
keys = set(keys)

In [77]:
def get_phrases_from_keywords(keywords):
    lst = []
    for keyword in keywords:
        if " " in keyword:
            lst.append(keyword.lower().replace(" ","-"))
        lst.append(keyword.lower())
    return set(lst)

In [206]:
def get_phrase_matches(keywords,text):
    """
    spacy 3.5 has a built in fuzzy matching option
    """
    # when spacy 3.5 is available on anaconda
#     matcher = Matcher(NLP.vocab)
#     patterns = [{"TEXT": {"FUZZY": "modelling"}},{"TEXT": {"FUZZY": "machine learning"}}]

    # get the term list
    terms = get_phrases_from_keywords(keywords)
    
    # convert text to tokens
    doc = NLP(text.lower())

    # phrase matching
    patterns = [NLP(text) for text in terms]

    # add to matcher
    matcher = PhraseMatcher(NLP.vocab)
    matcher.add('KEYWORDS',patterns)

    # search for phrase matches
    found_matches = matcher(doc)

    matched_phrases = []
    for match_id, start, end in found_matches:
        matched_phrases.append(doc[start:end].text)

    return dict(Counter(matched_phrases)) 

In [388]:
def compare_keywords(resume,job,keyword_path):
    """
    """
    # path to csv of terms
    keys = get_phrases_from_keywords(get_terms(keyword_path))
    
    # match phrases
    matches_resume = get_phrase_matches(keys,resume)
    matches_job = get_phrase_matches(keys,job)

    # convert to dataframes
    df_resume = pd.DataFrame.from_dict(matches_resume,orient='index',columns=['resume'])
    df_job = pd.DataFrame.from_dict(matches_job,orient='index',columns=['job'])
    
    # combine results & sort by job keywords
    df = df_job.join(df_resume)
    df.sort_values(by=['job'], ascending=False,inplace=True)
    
    # calculate the weighted difference in keyword frequencies
    df['wt'] = df['job']/df['job'].sum()
    df['wt-score'] = df['wt']*(df['resume']-df['job'])
    score = df['wt-score'].sum()
    
    # calculate keyword coverage ratio
    matches = df.query('~resume.isnull()').shape[0]
    total = df.shape[0]
    coverage = matches/total
    
    # get missing
    missing = set(df.query('resume.isnull()').index)
    
    
    return df, score, coverage, missing

In [245]:
def compare_job_specific_keywords(resume, job_keywords):
    """
    """
    # match phrases
    matches = get_phrase_matches(job_keywords,resume)
    
    # add missing job keywords to the dict
    for key in job_keywords:
        if key.lower() in matches.keys():
            continue
        else:
            matches[key.lower()]=0
    
    # convert to dateframe
    df = pd.DataFrame.from_dict(matches,orient='index',columns=['resume'])
    
    return df

In [736]:
def plot_keyword_comparison(resume, job_details, keyword_path):
    """ 
    Plot Job keywords vs Resume keywords
    Params: df- dataframe output from compare_keywords
    Returns: None
    """
    
    # compare common keywords, split into matches/no-matches
    df, score, coverage, missing = compare_keywords(resume,job_details['desc'],keyword_path)
    df_missing  = df.query('resume.isnull()')
    df_matching = df.query('~resume.isnull()')
    
    title = f"{job_details['title']}({job_details['company']}) | Keyword Coverage={round(coverage*100,2)}% | Weighted Average={round(score,2)}"
    
    # compare to job specific keywords
    df_spec = compare_job_specific_keywords(resume, job_details['keywords'])
    
    col_scale = [
        [0,'rgba(252,76,1,0.9)'],
        [0.25,'rgba(252,76,1,0.5)'],
        [0.5,'rgba(83,128,141,0.25)'],
        [0.75,'rgba(83,128,141,0.5)'],
        [1.0,'rgba(83,128,141,0.9)']
    ]
    
    # create plots
    fig = make_subplots(
        rows=1,
        cols=3,
        subplot_titles = ('Matching Keywords','Missing Keywords','Job Specific Keywords'),
        column_widths = [0.5,0.25,0.25]
    )
    # matching keywords
    fig.add_trace(
        go.Bar(
            name='Weighted Frequency',
            y = df_matching.index,
            x = df_matching['wt-score'],
            orientation='h',
            marker = dict(color=df_matching['wt-score'],colorscale=col_scale)
        ),row=1,col=1
    )
    # missing keywords
    fig.add_trace(
        go.Bar(
            name='Missing Keywords',
            y = df_missing.job,
            x = df_missing.index,
            marker = dict(color='rgba(252,76,1,0.8)')
        ),row=1,col=2
    )
    # job specific keywords
    fig.add_trace(
        go.Bar(
            name='Job Specific Keywords',
            y = df_spec.resume,
            x = df_spec.index,
            marker = dict(color='rgba(252,76,1,0.8)')
        ),row=1,col=3
    )
    
    # adjusments
    fig.update_xaxes(tickangle=90,row=1,col=2)
    fig.update_xaxes(tickangle=90,row=1,col=3)
    fig.update_xaxes(range=[-2,2],row=1,col=1)
    fig.update_xaxes(showgrid=False)
    fig.update_layout(
        template='plotly_white',
        title=title,
        width = 900,
        height=500,
        showlegend=False
    )
    fig.show()

## Calculate Similarities

In [26]:
def preprocess_text(text):
    """
    Preprocess text documents in preparation for analysis
    Params:
        text: a text object
    Returns: 
        A text object after converting to lowercase lemmas, removing stop-words,punctuation, numbers, and emails
    """
    
    # replace carriage returns and convert to lowercase
    doc = text.replace("\n","").replace("  ","").lower()
    
    # tokenize
    tokens = NLP(doc)

    text_list = []
    for token in tokens:
        if token.text in NLP.Defaults.stop_words:
            continue
        if token.is_punct:
            continue
        if token.is_digit:
            continue
        if token.like_email:
            continue
        if token.lemma_=='-PRON-':
            continue
            
        # lemmetize
        text_list.append(token.lemma_)
    return " ".join(text_list)

In [27]:
def calc_similarity(doc1,doc2):
    """ 
    Calculate the cosine similarity between two documents
    Params:
        doc1 - text document 
        doc2 - text document
    Returns: The cosine similairty score between the documents
    """
    d1 = preprocess_text(doc1)
    d2 = preprocess_text(doc2)
    
    X = TfidfVectorizer().fit_transform([d1,d2])
    score = cosine_similarity(X[0],X[1])
    return score[0][0]

In [390]:
def calc_similarities(resume, jobs,keyword_path):
    """ 
    Calculate the cosine similarity between a resume and a list of job descriptions
    Params:
        resume: a resume (text)
        jobs: a dictionary of jobs listings (output from get_job_details)
    Returns: A dataframe of job title, company, location and similarity score
    """
    scores = []
    titles = []
    companies =[]
    locs = []
    key_cov = []
    key_wt = []
    key_missing = []
    for job in jobs:
        titles.append(job['title'])
        companies.append(job['company'])
        locs.append(job['location'])
        scores.append(calc_similarity(resume, job['desc']))
        _,wt,cov,missing = compare_keywords(resume,job['desc'],keyword_path)
        key_cov.append(cov)
        key_wt.append(wt)
        key_missing.append(missing)
    
    data = list(zip(titles,companies,locs, scores, key_cov,key_wt,key_missing ))
    df = pd.DataFrame(data,columns = ['title','company','location','similarity-score','keyword-coverage','keyword-wt','keywords-missing'])
    
    return df.sort_values(by=['similarity-score'], ascending=False)

In [575]:
def plot_similarity_scores(df_similarities):
    """
    """
    import numpy as np
    
    # min-max scale the keyword weights
    df = df_similarities.copy()
    df['keyword-weight'] = (df['keyword-wt']-min(df['keyword-wt']))/(max(df['keyword-wt'])-min(df['keyword-wt']))
    
    # custom colorscale
    col_scale = [
        [0,'rgba(252,76,1,1.0)'],
        [0.25,'rgba(252,76,1,0.5)'],
        [0.5,'rgba(83,128,141,0.25)'],
        [0.75,'rgba(83,128,141,0.5)'],
        [1.0,'rgba(83,128,141,1.0)']
    ]
    
    # custom hover data
    hover_data = np.stack((df['title'],df['company'],df['location'],df['keyword-weight']),axis=-1)
        
    fig = go.Figure()
    
    fig.add_trace(
        go.Scatter(
            x = df['keyword-coverage'],
            y = df['similarity-score'],
            mode='markers',
            customdata = hover_data,
            hovertemplate = '<b>Title:</b> %{customdata[0]}<br>' +
                            '<b>Company:</b> %{customdata[1]}<br>'+
                            '<b>Location:</b> %{customdata[2]}</br>'+
                            '<b>Similarity:</b> %{y:,.2f}<br>' +
                            '<b>Keyword Coverage:</b> %{x:,.2f}<br>' +
                            '<b>Weighted Keyword Frequency:</b> %{customdata[3]:,.2f}<br>' +
                            '<extra></extra>',
            marker = dict(
                size=df['keyword-weight']*50,
                color = df['similarity-score'],
                colorscale=col_scale
            )
        )
    )
    fig.update_xaxes(title='Keyword Coverge (%)',range=[0,1.0])
    fig.update_yaxes(title='Similarity Score',range=[0,1.0])
    fig.update_layout(
        template='plotly_white',
        title='Resume Similarity to Job Listings',
        width=800,
        height=500
    )
    fig.show()    

## Resume and Job Comparison

In [738]:
url_base = 'https://www.workopolis.com/'
url_search = 'https://www.workopolis.com/jobsearch/find-jobs?ak=data%20science%20machine%20learning%20-intern%20-co-op&l=Toronto%2C%20ON&sr=10&t=-1&mip=%24110%2C000&job=A_BGLPbeQyACv85eEPgbvwWhgdEXSYd20NZdqbx2xZF6J9Wrhn4Ta2T_K_Da2QWp'

links = get_job_links(url_search, url_base)
jobs = get_job_details(links)


Scrapping RSS Job Search Feed
Sr. Account Execut| Anaconda       | Toronto | ['Procurement', 'SaaS', 'Python']...
Machine Learning E| TD Bank        | Toronto | ["Bachelor's", 'Performance tuning', 'Computer science']...
Senior Data Engine| PC Bank        | Toronto | ["Master's degree", "Bachelor's", 'TensorFlow']...
Canada Research Ch| Sunnybrook Heal| Toronto | ['Dental care', 'Life insurance', 'English']...
Forward-Deployed D| Boston Consulti| Toronto | ["Master's degree", 'CI/CD', 'Computer science']...
Sr. Machine Learni| TD Bank        | Toronto | ["Bachelor's", 'Elasticsearch', 'Azure']...
Project Manager, A| EXL Services   | Toronto | ["Master's degree", 'R', 'SQL']...
Staff Data Enginee| BEGiN          | Toronto | ["Master's degree", "Bachelor's", 'Spark']...
Senior Manager - B| Tiger Analytics| Toronto | ["Master's degree", 'QlikView', 'Tableau']...
Product Manager   | Paytm          | Toronto | ["Bachelor's", 'Agile', 'Communication skills']...
Senior Data Engine| Zenni Opti

In [866]:
# get resume + cover letter
r = get_resume_text('data/resume2023.docx',False) 
cl = get_resume_text('data/cover_letter.docx',False)
resume = cl + r
print(resume)

April 18, 2023





Dear Hiring Manager,



I am writing to express my interest in the Analyst/Associate role in the Systematic Strategies Group - CMF. I feel that this career opportunity offered with your organization is a strong match for my experience in data science, programming, and quantitative investment analysis.  I have a strong interest in applying my analytics skills in investment research and my skillet and experience makes me an ideal candidate to perform research in systemic strategies.



As a Chartered Financial Analyst with a Master’s degree in Data Science I have over 15+ years of experience working on data-driven projects, primary in Capital Markets.



In my current role at Firmex, I am the principal data scientist building machine-learning models to predict customer behavior,revenue and expense forecasting, and predicting Merges and Acquisition activity using my knowledge of market data and econometrics.  I am adept at building and automating scalable data pipeline

In [748]:
# calc similarities with each job
df = calc_similarities(resume,jobs,'data/terms.csv')
df

Unnamed: 0,title,company,location,similarity-score,keyword-coverage,keyword-wt,keywords-missing
76,"Director, Data Engineering Data Products (9-mo...",CPP Investments,"Toronto, ON",0.414657,0.644444,2.689189,"{snowflake, ai, computer science, exploration,..."
47,"Director, Enterprise Data Architecture (Client...",Sun Life,"Toronto, ON",0.394452,0.604651,1.481481,"{ai, mentoring, mentor, financial services, ml..."
7,Staff Data Engineer,BEGiN,"Toronto, ON",0.391256,0.666667,4.905660,"{computer science, infrastructure, scala, look..."
66,"Director, Enterprise Data Architecture (Health...",Sun Life,"Toronto, ON",0.390216,0.609756,1.636364,"{ai, mentoring, mentor, financial services, ml..."
40,Senior Manager - Data Science - Toronto,EY,"Toronto, ON",0.371085,0.551020,0.897638,"{ml, scala, mlops, markets, devops, capital ma..."
...,...,...,...,...,...,...,...
45,Education Campus Manager,AlignTech,"Toronto, ON",0.123616,0.800000,3.666667,"{ai, options, presentation}"
59,Senior Engineering Manager: Core ML Systems - ...,Abnormal Security,"Toronto, ON",0.114802,0.692308,2.571429,"{fraud, ml, ai, algorithms}"
65,Senior Android Developer,Intact,"Toronto, ON",0.114774,0.625000,1.090909,"{ai, insurance, software development, collabor..."
3,"Canada Research Chair, Tier 1 - Physical Scien...",Sunnybrook Health Sciences Centre,"Toronto, ON",0.103442,0.700000,-25.390805,"{ai, infrastructure, insurance, collaboration,..."


In [750]:
# plot similarities
plot_similarity_scores(df)

In [760]:
# keyword comparision
plot_keyword_comparison(resume,jobs[15],'data/terms.csv')

In [761]:
# compare job summary and resume summary
summary_job = get_text_summary(jobs[15]['desc'],'text','lsa',3)
summary_resume = get_text_summary(resume,'text','lsa',3)

calc_similarity(summary_resume,summary_job)


Text Summary:Top 3 sentences using a lsa summarizer
At Figment, our mission is to create an Internet that is truly decentralized where users can freely interact, share, collaborate, and exchange goods and services in a trustless environment.
We offer staking, middleware, and application layer solutions for token holders and developers investing in and building on Web 3 technologies.
100% remote-first environment, with co-working spaces in our employee "hubs" across the globe for those who enjoy a hybrid model 4 weeks of PTO that kick in day one, with an additional 1 week of flex days Extended company-paid health benefits that kick in day one Best in class parental leave and flexible arrangements A home office stipend to create a space that you enjoy working in Monthly Wifi reimbursement A yearly Learning & Development budget 401K (US) or RRSP match (Canada) Stock Options in the company Competitive bonus (based on company performance) that is distributed quarterly - we believe that the 

0.039714015864959054

In [860]:
urls = [
    'https://www.linkedin.com/jobs/view/3549932260/?refId=dbfa93b5-d784-445b-97f8-29d8f9ad0646&trackingId=jftaQgEdSpiSvntR2k17cQ%3D%3D',
    'https://www.linkedin.com/jobs/view/3540697616/?refId=59a513ba-522a-4aea-af91-759ff6f76cda&trackingId=2hyQIrX%2BRkOf%2B98YR1qBJA%3D%3D',
    'https://www.linkedin.com/jobs/view/3562298970/?refId=54f09146-9c3e-40e8-ac3c-595f85731463&trackingId=k0bOqRvwSRCieTxu5mwfhg%3D%3D',
    'https://www.linkedin.com/jobs/view/3564381193/?refId=b670d576-6f5e-4a0e-96f7-3903c26a7d54&trackingId=nR%2FGTzkWR6uQzB6FcnQVWg%3D%3D',
    'https://www.linkedin.com/jobs/view/3511070628/?refId=52f1cd7e-492f-46d4-894e-ab9ec08714f3&trackingId=SdhJHD3hRf61c5CnOOCOuw%3D%3D',
    'https://www.linkedin.com/jobs/view/3552670722/?refId=9c54e362-c54a-49f4-8d25-5639afc7d65e&trackingId=GUoqINtfSuWsAtaXC293SA%3D%3D',
    'https://www.linkedin.com/jobs/view/3492994174/?refId=0436884b-6af1-412d-8711-21574b7e80da&trackingId=GYxBqHwDTHqqRAZN3vhSOA%3D%3D',
    'https://www.linkedin.com/jobs/view/3523929176/?refId=f06fd3a4-e203-418a-beae-9762646a74db&trackingId=HHBPl3eGRZ%2Bym6at%2Fjopig%3D%3D',
    'https://www.linkedin.com/jobs/view/3503331250/?refId=9b080d13-deec-416e-b1c8-552165f513c0&trackingId=NGekzdgoQKei%2Fb6zSKO2%2BQ%3D%3D'
]

jobs_ln = get_linkedin_jobs(urls)


Scrapping RSS Job Search Feed
Analyst/Associate, Systematic | Company DescriptionMake an impact at a global and dynamic in...
Quantitative Analyst, Derivati| The Role: Quantitative Analyst, DerivativeThe Team: The Quan...
Quantitative Analyst          | About Upfeat MediaUpfeat is on a mission to be the leader in...
Manager, Machine Learning and | Company DescriptionAt Maple Leaf Sports & Entertainment Part...
Managing Director, Business In| Company DescriptionMake an impact at a global and dynamic in...
Data Scientist II - Machine Le| Credit Sesame is a financial app for consumers who want to b...
Senior Analyst, Business Intel| Company DescriptionMake an impact at a global and dynamic in...
Data Scientist, Algorithms    | At Lyft, our mission is to improve people’s lives with the w...
Lead Data Scientist, Finance  | About this role:We are seeking a Lead Data Scientist with ex...


In [867]:
df_ln = calc_similarities(resume,jobs_ln,'data/terms.csv')
df_ln

Unnamed: 0,title,company,location,similarity-score,keyword-coverage,keyword-wt,keywords-missing
4,"Managing Director, Business Intelligence",unknown,unknown,0.341902,0.764706,2.041667,"{collaboration, infrastructure, insights, gove..."
3,"Manager, Machine Learning and Advanced Analytics",unknown,unknown,0.314836,0.756757,3.84375,"{mathematics, computer science, insights, big ..."
6,"Senior Analyst, Business Intelligence",unknown,unknown,0.250273,0.818182,4.861111,"{computer science, collaboration, business req..."
5,Data Scientist II - Machine Learning,unknown,unknown,0.219274,0.731707,2.714286,"{gradient boosting, boosting, ai, ml, mlops, s..."
0,"Analyst/Associate, Systematic Strategies Group...",unknown,unknown,0.212961,0.764706,2.390244,"{collaboration, phd, economics, infrastructure}"
7,"Data Scientist, Algorithms",unknown,unknown,0.201406,0.73913,4.16129,"{mathematics, exploratory data analysis, compu..."
1,"Quantitative Analyst, Derivatives",unknown,unknown,0.196382,0.666667,2.458333,"{mathematics, workflow, c++, derivatives, java}"
8,"Lead Data Scientist, Finance",unknown,unknown,0.188036,0.772727,2.448276,"{eda, snowflake, ml, collaboration, options}"
2,Quantitative Analyst,unknown,unknown,0.168629,0.833333,6.235294,"{computer science, mathematics}"


In [870]:
# plot similarities
plot_similarity_scores(df_ln)

In [872]:
plot_keyword_comparison(resume,jobs_ln[0],'data/terms.csv')