In [7]:
import openai
import requests
import pandas as pd
from bs4 import BeautifulSoup
import docx2txt
import PyPDF2 

In [2]:
def get_openai_auth(path='data/api-key.txt'):
    with open(path, 'r') as f:
        key = f.readline().strip()
    return str(key)

key = get_openai_auth()
openai.api_key = key

In [3]:
def get_completion(prompt, model="gpt-3.5-turbo", temp = 0):
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=temp
    )
    return response.choices[0].message["content"]

In [5]:
prompt = f"""
Generate a list of three made-up book titles along \ 
with their authors and genres. 
Provide them in JSON format with the following keys: 
book_id, title, author, genre.
"""
response = get_completion(prompt)
print(response)

[
  {
    "book_id": 1,
    "title": "The Lost City of Zorath",
    "author": "Aria Blackwood",
    "genre": "Fantasy"
  },
  {
    "book_id": 2,
    "title": "The Last Survivors",
    "author": "Ethan Stone",
    "genre": "Science Fiction"
  },
  {
    "book_id": 3,
    "title": "The Secret of the Haunted Mansion",
    "author": "Lila Rose",
    "genre": "Mystery"
  }
]


## Feed Scape

In [4]:
def get_linkedin_jobs(urls):
    """
    LinkedIn
    Parse linkedin job link and extract the description and job title
    """
    
    start_text = 'Submit\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n \n\n\n\n\n\n\n\n\n\n\n\n\n'
    stop_text = 'Show more\n\n'
    
    print("Scrapping RSS Job Search Feed")
    print("="*100)
    jobs = []
    for url in urls:
        try:
            # get content
            r = requests.get(url)
            soup = BeautifulSoup(r.content)
            jd = soup.text

            # parse the text
            start = jd.find(start_text)
            stop = jd.find(stop_text)
            desc = jd[start:stop]
            desc = desc.replace(start_text,'').replace(stop_text,'').strip()

            # get the job title
            title = soup.find('h1').text

            jobs.append({'title':title,'company':'unknown','location':'unknown','desc':desc,'keywords':[]})

            # print progress
            print(f"{title[:30]:{30}}| {desc[:60]}...")
        except Exception as e:
            print(f"Error scrapping LinkedIn:{e}")
    return jobs

In [5]:
urls = [
    'https://www.linkedin.com/jobs/view/3549932260/?refId=dbfa93b5-d784-445b-97f8-29d8f9ad0646&trackingId=jftaQgEdSpiSvntR2k17cQ%3D%3D',
    'https://www.linkedin.com/jobs/view/3540697616/?refId=59a513ba-522a-4aea-af91-759ff6f76cda&trackingId=2hyQIrX%2BRkOf%2B98YR1qBJA%3D%3D',
    'https://www.linkedin.com/jobs/view/3562298970/?refId=54f09146-9c3e-40e8-ac3c-595f85731463&trackingId=k0bOqRvwSRCieTxu5mwfhg%3D%3D',
    'https://www.linkedin.com/jobs/view/3564381193/?refId=b670d576-6f5e-4a0e-96f7-3903c26a7d54&trackingId=nR%2FGTzkWR6uQzB6FcnQVWg%3D%3D',
    'https://www.linkedin.com/jobs/view/3511070628/?refId=52f1cd7e-492f-46d4-894e-ab9ec08714f3&trackingId=SdhJHD3hRf61c5CnOOCOuw%3D%3D',
    'https://www.linkedin.com/jobs/view/3552670722/?refId=9c54e362-c54a-49f4-8d25-5639afc7d65e&trackingId=GUoqINtfSuWsAtaXC293SA%3D%3D',
    'https://www.linkedin.com/jobs/view/3492994174/?refId=0436884b-6af1-412d-8711-21574b7e80da&trackingId=GYxBqHwDTHqqRAZN3vhSOA%3D%3D',
    'https://www.linkedin.com/jobs/view/3523929176/?refId=f06fd3a4-e203-418a-beae-9762646a74db&trackingId=HHBPl3eGRZ%2Bym6at%2Fjopig%3D%3D',
    'https://www.linkedin.com/jobs/view/3503331250/?refId=9b080d13-deec-416e-b1c8-552165f513c0&trackingId=NGekzdgoQKei%2Fb6zSKO2%2BQ%3D%3D',
    'https://www.linkedin.com/jobs/view/3408328555/?alternateChannel=search&refId=oX5cyhsmI7zOQyRPBo0f7Q%3D%3D&trackingId=BajQQkTagVRK2lZohfr5Yg%3D%3D',
    'https://www.linkedin.com/jobs/view/3568963865/?refId=bc1c7e62-b103-4cf0-a4e9-66f20b87bd31&trackingId=3NJ%2BVJGpSlCYPvIldHBErw%3D%3D',
    'https://www.linkedin.com/jobs/view/3603450410/?refId=82be66b8-b28e-4cf0-b0e4-8b41711cbeec&trackingId=nqsYPLMQQpCraxTKReeBbA%3D%3D',
    'https://www.linkedin.com/jobs/view/3602324894/?refId=3063ff17-7a41-48aa-befa-0dde58b0221a&trackingId=CGVdLUP5RmmEdctLP%2FuW1A%3D%3D',
    'https://www.linkedin.com/jobs/view/3586705675/?refId=004b1cd6-ce00-47ad-a6df-143e20e7f7f6&trackingId=vXlOqlYkSyKekz98Nw0Mdw%3D%3D',
    'https://www.linkedin.com/jobs/view/3587185876/?refId=01bb2a72-770c-4faf-bbc7-8e5bad1764c1&trackingId=DjXELnTKRzyA1R5C2gmCxA%3D%3D',
    'https://www.linkedin.com/jobs/view/3598846530/?refId=c3f374fd-11d7-431c-ab51-cbe36fcdde66&trackingId=RmnRmZ98QfiyKN3IumH6%2Bg%3D%3D',
    'https://www.linkedin.com/jobs/view/3571402682/?refId=86a4d9e7-ba0a-481b-8c40-21a6680550be&trackingId=fDr%2BWOFWSZGWq4fLuNp0MA%3D%3D',
    'https://www.linkedin.com/jobs/view/3562148114/?refId=b8a69f54-26ba-4356-bc96-f79f8552c077&trackingId=%2FhlnaWK5SFalqAq8C%2BkCFg%3D%3D'
]

jobs_ln = get_linkedin_jobs(urls)

Scrapping RSS Job Search Feed

2,000+ Strategy Analyst Jobs | ...

353 Quantitative Analyst Jobs| ...
Quantitative Analyst          | About Upfeat MediaUpfeat is on a mission to be the leader in...

761 Manager Of Machine Learni| ...
Managing Director, Business In| Company DescriptionMake an impact at a global and dynamic in...
Data Scientist II - Machine Le| Credit Sesame is a financial app for consumers who want to b...
Senior Analyst, Business Intel| Company DescriptionMake an impact at a global and dynamic in...

11,000+ Data Scientist Jobs i| ...
Lead Data Scientist, Finance  | About this role:We are seeking a Lead Data Scientist with ex...
Associate/Senior Associate, Da| Company DescriptionMake an impact at a global and dynamic in...

191 Senior Data Scientist Job| ...
Principal Research Data Scient| Leads and executes deep dive diagnostic, predictive, and pre...
Senior Machine Learning Engine| Our client, a startup based in Toronto, is seeking a Machine...
Senior Data Scientist,

## Resume Scrape

In [8]:
def get_resume_text(path,print_text=True):
    """ 
    Read resume text content from file (pdf or docx)
    Params:
        path: string- path to the resume file
        print_text: boolean.  Prints the text
    Returns: A text object with the resume file content
    """
    
    # open file and get file extension
    file = open(path,'rb')
    file_name, file_ext = os.path.splitext(path)
    
    # extract text content
    if file_ext=='.pdf':
        # read pdf
        pdf = PyPDF2.PdfFileReader(file)
        
        text = """"""
        for page_num in range(pdf.numPages):
            page = pdf.getPage(page_num)
            content = page.extract_text()
            text += content
    elif file_ext=='.docx':
        text = docx2txt.process(file)
    elif file_ext=='.txt':
        text = str(file.read())
        
    if print_text:
        print(text)

    file.close()
    return text

In [9]:
# get resume + cover letter
r = get_resume_text('data/resume2023.docx',False) 
cl = get_resume_text('data/cover_letter.docx',False)
resume = cl + r
print(resume)

April 27, 2023





Dear Hiring Manager,



I am writing to express my interest in the Data Scientist position at Polar Asset Management. I feel that this career opportunity offered with your organization is a strong match for my experience in data science and investment research and aligns with my interest in developing alternative data products for investment research.



As a Chartered Financial Analyst with a Master’s degree in Data Science I have over 15 years of experience working on data-driven projects. Over the last 13 years, I have worked independently as a technology consultant, primarily in capital markets. I have held various quantitative roles, including data science & analytics, dashboard development, performance attribution, financial modeling & simulations, and I have substantial experience  building data pipelines and data visualizations.



In my current role at Firmex, I am the principal data scientist building machine-learning models to forecast customer behavior, 