## Libraries

In [185]:
import openai
import requests
import pandas as pd
from bs4 import BeautifulSoup
import docx2txt
import PyPDF2 
import os
import json
from time import sleep

## Authentication

In [64]:
def get_openai_auth(path='data/api-key.txt'):
    with open(path, 'r') as f:
        key = f.readline().strip()
    return str(key)

key = get_openai_auth()
openai.api_key = key

## Feed Scape

In [53]:
def get_linkedin_jobs(urls):
    """
    LinkedIn
    Parse linkedin job link and extract the description and job title
    """
    
    start_text = 'Submit\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n \n\n\n\n\n\n\n\n\n\n\n\n\n'
    stop_text = 'Show more\n\n'
    
    print("Scrapping RSS Job Search Feed")
    print("="*100)
    jobs = []
    for url in urls:
        try:
            # get content
            r = requests.get(url)
            soup = BeautifulSoup(r.content)
            jd = soup.text

            # parse the text
            start = jd.find(start_text)
            stop = jd.find(stop_text)
            desc = jd[start:stop]
            desc = desc.replace(start_text,'').replace(stop_text,'').strip()

            # get the job title
            title = soup.find('h1').text

            jobs.append({'title':title,'company':'unknown','location':'unknown','desc':desc,'keywords':[]})

            # print progress
            print(f"{title[:30]:{30}}| {desc[:60]}...")
        except Exception as e:
            print(f"Error scrapping LinkedIn:{e}")
    return jobs

In [141]:
urls = [
    'https://www.linkedin.com/jobs/view/3583556352/?refId=b4ebbe79-1d67-46b8-823e-9f52d9c01f9d&trackingId=CfAmnpZbRdWPl42LWX5njg%3D%3D',
    'https://www.linkedin.com/jobs/view/3630234909/?refId=6a2daaf4-b320-413f-a3b3-f31784a2a518&trackingId=hLCaAMJWSASdksQ8PtFXPw%3D%3D',
    'https://www.linkedin.com/jobs/view/3603450410/?refId=eaa552e4-2d43-4a3c-b850-7702181a6efa&trackingId=qsn%2BFCU7SAKVp%2BYrsTwHCw%3D%3D',
    'https://www.linkedin.com/jobs/view/3620683464/?refId=e595ca40-ec69-42fb-a32a-7474c8912c47&trackingId=ytCdrKOERZuV8ODYiYkm9g%3D%3D',
    'https://www.linkedin.com/jobs/view/3608332204/?refId=9038cc56-5f01-4996-99c2-7e441c5fc2c2&trackingId=NJj6mZR%2FRxKTngPt1P44rw%3D%3D',
    'https://www.linkedin.com/jobs/view/3586705675/?refId=52ffa819-3597-4d3d-ab72-f3dfe5f35be1&trackingId=sqiP59pMQky61uMQc%2FU0ng%3D%3D'
]

jobs_ln = get_linkedin_jobs(urls)

Scrapping RSS Job Search Feed
Data Scientist                | Who are we? We are a leading global asset manager with US $7...
Quantitative Analyst, Quant St| Why Join Us?We are currently seeking a Quantitative Analyst ...
Principal Research Data Scient| Leads and executes deep dive diagnostic, predictive, and pre...
Product Data Scientist        | Launched in 2019, BAM Management US Holdings Inc. d/b/a Bina...
Associate/Senior Associate, Da| Company DescriptionMake an impact at a global and dynamic in...
Senior Data Scientist, Experim| Grammarly is excited to offer a remote-first hybrid working ...


## Resume Scrape

In [55]:
def get_resume_text(path,print_text=True):
    """ 
    Read resume text content from file (pdf or docx)
    Params:
        path: string- path to the resume file
        print_text: boolean.  Prints the text
    Returns: A text object with the resume file content
    """
    
    # open file and get file extension
    file = open(path,'rb')
    file_name, file_ext = os.path.splitext(path)
    
    # extract text content
    if file_ext=='.pdf':
        # read pdf
        pdf = PyPDF2.PdfFileReader(file)
        
        text = """"""
        for page_num in range(pdf.numPages):
            page = pdf.getPage(page_num)
            content = page.extract_text()
            text += content
    elif file_ext=='.docx':
        text = docx2txt.process(file)
        text = text.strip().replace('\n','')
    elif file_ext=='.txt':
        text = str(file.read())
        
    if print_text:
        print(text)

    file.close()
    return text

In [56]:
# get resume + cover letter
r = get_resume_text('data/resume2023.docx',False) 
cl = get_resume_text('data/cover_letter.docx',False)
resume = cl + r


In [88]:
resume = """ 

April 27, 2023
Dear Hiring Manager,I am writing to express my interest in the Data Scientist position at Polar Asset Management. 
I feel that this career opportunity offered with your organization is a strong match for my experience in data science and investment research and aligns with my interest in developing alternative data products for investment research.
As a Chartered Financial Analyst with a Master’s degree in Data Science I have over 15 years of experience working on data-driven projects. 

Over the last 13 years, I have worked independently as a technology consultant, primarily in capital markets. I have held various quantitative roles, including data science & analytics, dashboard development, performance attribution, 
financial modeling & simulations, and I have substantial experience  building data pipelines and data visualizations.

In my current role at Firmex, I am the principal data scientist building machine-learning models to forecast customer behavior, predict mergers and acquisition activity, optimize product features, and forecast revenue and expenses.  
I am adept at building and automating scalable data pipelines using large structured and unstructured datasets. What sets me apart from others are strong technical skills, coupled with excellent communication skills.  
I can gather requirements from stakeholders, design solutions, write the code and communicate the results to executives.

I recently completed a master's degree in Data Science through Regis University where I maintained a 4.0-grade point average specializing in machine learning, deep learning, natural language processing, and reinforcement learning. 
I am passionate about technology and conduct and publish my own research in my spare time.  I also enjoy building data visualizations and feel I have a knack for communicating complex data concepts. Check out my github repository 
to see some of my recent projects, including my capstone project where I used deep learning to create a successful Bitcoin trading strategy. Should I have piqued your interest I would welcome the chance to discuss your needs and outline my strengths in person. 

Thank you for your time and consideration of my resume.Ken Constable
 
 
Ken Constable BSc. MSc. CFAken.constable@gmail.com | (647) 349 - 6500
Github: https://github.com/kconstableLinkedIn: https://www.linkedin.com/in/kenconstableconsulting
 
Overview
CFA Charter holder with over 15 years of experience as a data scientist and quantitative analyst/developer with a focus on analyzing investment market data. 
Expertise in applying machine learning in investment settings and extensive experience in data warehousing and automating data-intensive processes. 
A strong communicator with a knack for communicating complex data concepts in elegant data visualizations. 
 
Education
Master’s Degree in Data Science, MSc.  Regis University, Denver Colorado
Chartered Financial Analyst, CFA CFA Institute
Bachelor’s Degree in Chemistry, BSc. University of Waterloo, Waterloo, Ontario
 
Technical Skills
Financial Models : Monte Carlo Simulations, Back-Testing, Scenario-Testing, Portfolio Optimization, Portfolio Analytics,  Bloomberg, Factset, BarraOne, Eagle, Alphavantage
Business Intelligence and Data Engineering : SQL, SQL SERVER, ORACLE, HIVE, POSTGRES, SSIS, SSRS, ETL, ELT, AWS Redshift, AWS S3, Airflow, AWS Glue
Machine Learning, Data Analysis, and Data Mining : NLP, Natural Language Processing, Deep Learning, Reinforcement Learning, Time-series Analysis, Supervised Learning, 
Unsupervised Learning, Regression, Classification, Decision Trees, Random Forest, XGBoost, Statistics, Optimization, Pandas, Scikit-learn, TensorFlow, NumPy, SpaCy, Keras, 
Beautiful Soup, VBA, Microsoft Excel, Object Oriented Programming, Git, Github
Data Visualization : Tableau, Plotly, Dash, R-Shiny, Matplotlib, Communication skills, Visual Storytelling
 
Relevant Career Highlights
A selection of experience demonstrating machine learning, data engineering, financial modeling, and quantitative analysis
 
Firmex : Principal Data Scientist who is responsible for building & deploying machine learning models and data pipelines.  
Design and implementation of predictive models with applications in sales forecasting, customer behavior, helpdesk support, and Merger & Acquisition activity 
( Python, SQL, Airflow, AWS S3, AWS Redshift, AWS Glue, Machine Learning, Data Mining, Data Visualization, Tableau, XGBoost, Random Forest, ETL, ELT, Data Pipeline Automation, Econometrics, Economics, Capital Markets )
 
 
AGF Investments : Developed statistical factor-based investment models for AGF Management.  
Implemented the Portfolio Managers' strategy for over 40 investment models, resulting in a robust system that automatically rolls the models forward, providing automated daily investment recommendations. 
Implemented the key reporting and analytics data warehouse, which hosts accounting, transaction, and market data. Responsibilities included data analysis, data profiling, coding ETL processes, and developing data visualizations and investment performance reports. 
( Python, SQL, VBA, R,  XL-wings, Automation, Trade Automation, Solution Architecture, Economics, Econometrics, Investment Research, Capital Markets )
  
Northwater Capital : Worked as a portfolio analyst at the hedge fund manager Northwater Capital for 6 years. 
Responsibilities included performance measurement, creating data visualizations, financial modeling, back-testing strategies, coding Monte Carlo simulations, and recommending baskets of futures contracts to replicate index performance using supervised learning techniques 
( SQL, SSIS, SSAS, SSRS, VBA, Machine Learning, Time-Series-Analysis, Data Pipeline Automation Monte Carlo Simulations, Economics, Econometrics, Capital Markets )
  
Satori Consulting : Built data models using census data to analyze locations for new credit union branches.  
Identified customers likely to purchase new products using statistical data mining techniques. Built data pipelines and ETL processes
( SQL, SSIS, Machine Learning, Data Mining, Unsupervised Learning, Clustering, Data Pipeline Automation )

Research Projects A collection of academic research and personal interest projects related to data science, machine learning, and data engineering
Bitcoin Price Prediction & Stock Price Prediction   This was the cumulation of my Master’s thesis where I used machine learning to predict the future price of Bitcoin and designed a successful trading strategy using the predictions. 
( LSTM, Deep Learning, Python, Keras, TensorFlow, Pandas, Plotly, Natural Language Processing, NLP, scikit-learn, Economics, Econometrics, Capital Markets, Investment Research )

Portfolio Optimization using Monte Carlo Simulations  This research shows how modern portfolio theory can be used to select the optimal risk-adjusted portfolio.
( Python, Monte Carlo, Plotly, Economics, Econometrics, Investment Research )Resume Optimization:  In this project, I built a process that scrapes an RSS job feed and ranks the positions in order of fit to my resume and cover letter using natural language processing.
And yes, if you’re reading this, my resume has been optimized to match your role :) ( Natural language processing, SpaCY, Python )  
 
Twitter Sentiment Analysis  A demonstration of how natural language processing can be used to understand the sentiment of Twitter posts.  
( R, Plotly, Python, Natural Language Processing, Statistics ) 
 
Pneumonia-Multi class Classification, Alzheimer's Multi-class Classification  Two projects where computer vision and deep learning are utilized to classify disease from images.  
( Computer Vision, Classification, Transfer Learning, Deep Learning, Keras, python, Tensorflow )
 
Mortgage Amortization /Rent vs Buy  An Interactive Dashboard built using Plotly Dash that allows users to visualize mortgage amortization scenarios and determine whether they should rent or buy  
( Python, Plotly, Dash, Data Visualization )
 
MyBand - This is Content Management System for Bands that I designed to help them manage their unstructured data and communications. 
(Product Design, Data Visualization )
 
"""

In [89]:

job_desc = """
Who are we? 
We are a leading global asset manager with US $7.5B assets under management. With teams based in Canada, the US, and the UK, 
we are committed to achieving great risk-adjusted returns.We are looking for a dynamic and passionate Data Scientist to join our growing Data Team. 
You will support the day-to-day data processes and have the opportunity to work with Global Investment Teams across asset classes and strategies.

What will you do? 
Aggregate data sets and enhance analytics to optimize Polar’s current trading and investment strategies
Your primary responsibility is to work with Portfolio Managers and Investment teams to optimize investment strategies using data sets and analytics. 
You will build frameworks to curate and validate datasets and investment models. You will work on business intelligence reporting and dashboards for internal and external circulation.
Learn and develop a deeper appreciation of global financial markets.
You will work closely with your peers and industry experts to understanding Polar’s global investment strategies such as convertible bond.
and risk arbitrage, equities, credit long/short, structured credit, event driven, commodities, fixed income RV, etc.Assist the Data and Investment teams with various projects.
You will support the Data and Investment Teams by leveraging alternative and non-standard datasets and work on various ad hoc projects.

Who are you? You have a strong technical and quantitative background.
You have a Master’s degree in Math, Statistics, Computer Science, Physics, or another quantitative field. 
You have at least 2 years’ experience working within a data science or quantitative finance role. 
You are proficient in SQL, Python, Excel, and R. It would be an asset to have previous Python dashboard experience (Dash, Streamlit, etc).You are an expert in data visualization - You have worked directly with end users on BI, reporting and dashboarding. 
You have been able to effectively communicate complex data and patterns in a simple format.You have a keen eye for detail,
You pay attention to details and are highly meticulous and observant in your work.You enjoy working in a team environment.
You work well in a collaborative and team-oriented environment and have excellent communication skills.You have a keen interest in the financial industry.
You are eager to learn about the financial industry from your peers and industry experts. You are interested in Global financial markets, investments, and keen to learn more and build sector expertise. 
 
 
 Why work with us? We believe in development, mentorship, and training our people.
 We are a passionate, dynamic, and high-performing team who believe in developing our own people, our leaders and team will support your development, and help you get experiences, learn, and challenge yourself for professional and personal growth. 
 Our priority is providing a positive work culture that fosters growth, inclusion, and learning.We provide a competitive compensation and comprehensive benefits package – Our benefits package includes a 
 combination of competitive base salary, bonus, and opportunity for profit sharing, 20 days’ vacation, comprehensive health/vision/dental benefit package, and fitness allowance. 
 We provide a charitable matching program, numerous team-building events, and hybrid work environment that allows you to do your best work.At Polar, we strive to attract, develop, and retain an inclusive workforce that embraces different backgrounds, experience, and ideas. 
 We are an equal opportunity employer and value diversity at our company. We do not discriminate on the basis of race, religion, color, national origin, sex, gender, gender expression, sexual orientation, age, marital status, veteran status, or disability status. 
 We will ensure that individuals with disabilities are provided reasonable accommodation to participate in the job application or interview process, to perform essential job functions, and to receive other benefits and privileges of employment. 
 Please contact us to request accommodation.
"""



## Chat Functions

In [57]:
def get_completion(prompt, model="gpt-3.5-turbo", temp = 0):
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=temp
    )
    return response.choices[0].message["content"]

In [161]:
prompt = f"""
The task is to compare a job description and a resume.  
The resume is delimited by ```
The job description is delimited by ||

Complete the following tasks and return the results in a python dictionary object with the following keys.
< similarity: calculate the similarity between the resume text and job description text and return the value as a decimal >
< job_title: extract the job title from the job description. Return the word unknown if it can't be found >
< skills_hard_resume:Extract a list of technical skills and include their frequency of occurance in the resume in JSON format >
< skills_hard_job:Extract a list of technical skills and include their frequency of occurance in the job description in JSON format >
< skills_hard_missing: Return a list of the technical skills that are listed in the skills_hard_job list but are not in the skills_hard_resume list >
< skills_soft_resume:Extract a list of soft skills and include their frequency of occurance in the resume in JSON format >
< skills_soft_job:Extract a list of soft skills and include their frequency of occurance in the job description in JSON format >
< skills_soft_missing: Return a list of the soft skills that are listed in the skills_soft_job list but are not in the skills_soft_resume list >
< industry: infer the industy based on the text in the job description >
< salary: extract the salary amount or range from the job description. If none is found, return the word unknown >
< job_summary: summarize the job description in 4 sentences or less >
< resume_summary: summarize the resume in 4 sentences or less >
< suggestions: suggest ways to improve the resume to better match the job description. Return the results in 5 sentences or less >

```{resume}```

|{job_desc}|

"""

In [173]:

results = []
for job in jobs_ln:

    # get the job description
    d = job['desc']
    t = job['title']

    print(f'Making Request:{t}')

    # prepare the prompt
    prompt = f"""
    The task is to compare a job description and a resume.  
    The resume is delimited by ```
    The job description is delimited by ||

    Complete the following tasks and return the results in a python dictionary object with the following keys.
    < similarity: calculate the similarity between the resume text and job description text and return the value as a decimal >
    < job_title: extract the job title from the job description. Return the word unknown if it can't be found >
    < skills_hard_resume:Extract a list of technical skills and include their frequency of occurance in the resume in JSON format >
    < skills_hard_job:Extract a list of technical skills and include their frequency of occurance in the job description in JSON format >
    < skills_hard_missing: Return a list of the technical skills that are listed in the skills_hard_job list but are not in the skills_hard_resume list >
    < skills_soft_resume:Extract a list of soft skills and include their frequency of occurance in the resume in JSON format >
    < skills_soft_job:Extract a list of soft skills and include their frequency of occurance in the job description in JSON format >
    < skills_soft_missing: Return a list of the soft skills that are listed in the skills_soft_job list but are not in the skills_soft_resume list >
    < industry: infer the industy based on the text in the job description >
    < salary: extract the salary amount or range from the job description. If none is found, return the word unknown >
    < job_summary: summarize the job description in 4 sentences or less >
    < resume_summary: summarize the resume in 4 sentences or less >
    < suggestions: suggest ways to improve the resume to better match the job description. Return the results in 5 sentences or less >

    ```{resume}```

    |{d}|

    """

    # ask Chat GPT
    try:
        result = get_completion(prompt)
        result_json = json.loads(result)
        results.append(result_json)
        print('\n',result)
    except Exception as e:
        print(f"Error Occured: {e}\n")
        continue
    sleep(10)


Making Request:Data Scientist

 {
    "similarity": 0.123,
    "job_title": "Data Scientist",
    "skills_hard_resume": {
        "Python": 4,
        "SQL": 3,
        "Machine Learning": 3,
        "Data Visualization": 2,
        "Deep Learning": 2,
        "Natural Language Processing": 2,
        "AWS": 2,
        "ETL": 2,
        "Excel": 1,
        "VBA": 1,
        "R": 1,
        "Git": 1,
        "GitHub": 1,
        "Tableau": 1,
        "Plotly": 1,
        "Dash": 1,
        "NumPy": 1,
        "Scikit-learn": 1,
        "TensorFlow": 1,
        "SpaCy": 1,
        "Keras": 1,
        "Beautiful Soup": 1,
        "Object Oriented Programming": 1,
        "Microsoft Excel": 1,
        "Visual Storytelling": 1
    },
    "skills_hard_job": {
        "SQL": 1,
        "Python": 1,
        "Excel": 1,
        "R": 1,
        "Dash": 1,
        "Streamlit": 1
    },
    "skills_hard_missing": [
        "Streamlit"
    ],
    "skills_soft_resume": {
        "Communication skill

In [184]:
results[3]

{'similarity': 0.034,
 'job_title': 'Data Scientist',
 'skills_hard_resume': {'Python': 7,
  'SQL': 4,
  'Machine Learning': 4,
  'Deep Learning': 2,
  'Natural Language Processing': 2,
  'Data Mining': 2,
  'Monte Carlo Simulations': 2,
  'Regression': 1,
  'Classification': 1,
  'Decision Trees': 1,
  'Random Forest': 1,
  'XGBoost': 1,
  'Pandas': 1,
  'Scikit-learn': 1,
  'TensorFlow': 1,
  'NumPy': 1,
  'SpaCy': 1,
  'Git': 1,
  'Github': 1,
  'Tableau': 1,
  'Plotly': 1,
  'Dash': 1,
  'Matplotlib': 1},
 'skills_hard_job': {'SQL': 1,
  'R': 1,
  'Python': 1,
  'Machine Learning': 1,
  'Regression': 1,
  'Decision Tree': 1,
  'Random Forest': 1,
  'XGBoost': 1,
  'Statistics': 1,
  'Optimization': 1,
  'Pandas': 1,
  'Scikit-learn': 1,
  'TensorFlow': 1,
  'NumPy': 1,
  'SpaCy': 1,
  'Git': 1,
  'Github': 1,
  'Tableau': 1,
  'Plotly': 1,
  'Dash': 1,
  'Communication skills': 1,
  'Visual Storytelling': 1},
 'skills_hard_missing': ['R', 'Decision Tree', 'Statistics', 'Optimizatio