# I. IMPORT & SET UP

In [1]:
import pandas as pd
import numpy as np
import time
from datetime import datetime
import urllib.parse
import os
import re

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException

pd.set_option('max_colwidth', 800)

In [2]:
PROJECT_DIR = os.getcwd()
ID_DIR_NAME = 'job_id_dir'
INFO_DIR_NAME = 'job_info_dir'

### 1. DRIVER FUNCTIONS

In [3]:
DRIVER = None

In [4]:
def start_driver():
    global DRIVER
    if DRIVER is None:
        options = Options()
        options.add_argument('--headless=new')
        options.add_argument('--no-sandbox')
        options.add_argument('--incognito')
        options.add_argument('-disable-dev-shm-usage')

        chromedriver_path = PROJECT_DIR + 'Chromedriver/chromedriver_mac_arm64/chromedriver'
        service = Service(chromedriver_path)
        DRIVER = webdriver.Chrome(service=service, options=options)
    else:
        print('A driver is already running. Please close current driver before starting another one.')

def close_driver():
    global DRIVER
    if DRIVER is not None:
        DRIVER.close()
        DRIVER = None

In [5]:
close_driver()

In [6]:
start_driver()

### 2. URL & LOGIN SETUPS

In [7]:
from linkedin_scraper import actions

EMAIL = "minhnnt97@gmail.com"
PASSWORD = "pazqig-higdar-nehmE6"
actions.login(DRIVER, EMAIL, PASSWORD) 

BASE_URL = "https://www.linkedin.com/jobs/search/"
SEARCH_TERM = "Data Scientist"
LOCATION_TERM = "United States"

SEARCH_URL = BASE_URL + f"?keywords={urllib.parse.quote(SEARCH_TERM)}" + f"&location={urllib.parse.quote(LOCATION_TERM)}&refresh=true"
SEARCH_URL

'https://www.linkedin.com/jobs/search/?keywords=Data%20Scientist&location=United%20States&refresh=true'

# II. FUNCTIONS

### 1. FUNCTIONS TO SCRAPE ALL JOB IDS ON SEARCH PAGES

In [25]:
# Function to scrape all products from a page
def get_all_job_ids_from_page(url, num_page=-1):

    global DRIVER, SEARCH_TERM, LOCATION_TERM

    job_id_scrape_time = datetime.now()
    print(f'DATE & TIME: {job_id_scrape_time.strftime(("%Y/%m/%d %H:%M:%S"))}')
    job_id_list = []      
    DRIVER.get(url) 
    time.sleep(5) 

      # Count total number of pages in the search result
    max_page = DRIVER.find_element(By.CLASS_NAME, 'jobs-search-results-list__pagination').find_element(By.TAG_NAME, 'ul').find_elements(By.TAG_NAME, 'li')[-1]
    max_page = int(max_page.text)
    num_page = min(max_page, num_page) if num_page>0 else max_page
    print(f'Scraping {num_page} page(s) out of {max_page} total pages for {SEARCH_TERM} jobs search in {LOCATION_TERM}...')
    total_err_count = 0

      # Loop through num_page pages to scrape all products listed on each page
    for p in range(1, num_page+1):
      url = url + f'&start={(p-1)*25}'  # p=1: start=0, p=2: start = 25 ...
      DRIVER.get(url)
      time.sleep(3)
      # Find all li tags that contain the job information
      products_all = DRIVER.find_element(By.CLASS_NAME, "jobs-search-results-list").find_element(By.CLASS_NAME, "scaffold-layout__list-container").find_elements(By.CLASS_NAME, "scaffold-layout__list-item")
      print(f'Found {len(products_all)} job IDs on page {p}.', end=' ')

      err_count = 0
      for product in products_all:
          try:
              job_id = product.get_attribute("data-occludable-job-id")
              job_id_list.append(job_id)
          except NoSuchElementException:
              err_count += 1

      print(f'Could not scrape {err_count} jobs.')
      total_err_count += err_count

    # Remove dupes
    job_id_list = list(set(job_id_list))
    print(f'---> Found total {len(job_id_list)} unique jobs. Could not scrape total of {total_err_count} jobs.')

    return job_id_list, job_id_scrape_time


### 2. FUNCTION TO SCRAPE INFORMATION OF EACH SINGLE JOB

In [22]:
def get_single_job_info(job_id):

    info = {
        'Job ID': job_id,
        'Job URL': None,
        'Name': None,
        'Company': None,
        'Company Logo URL': None,
        'Location': None,
        'Workplace Type': None,
        'Time Posted': None,
        'Applicants Count': None,
        'Job Overview': None,
        'Company Overview': None,
        'HR URL': None
    }

    JOB_URL = f"https://www.linkedin.com/jobs/view/{job_id}"
    DRIVER.get(JOB_URL)
    time.sleep(3)

    # Job URL
    info['Job URL'] = JOB_URL

    # Job Name
    try:
        name = DRIVER.find_element(By.CLASS_NAME, "jobs-unified-top-card__job-title").get_attribute("innerHTML")
        info['Name'] = name.strip()
    except NoSuchElementException:
        pass

    # Company
    try:
        company = DRIVER.find_element(By.CLASS_NAME, "jobs-unified-top-card__company-name").find_element(By.TAG_NAME, 'a').get_attribute("innerHTML")
        info['Company'] = company.strip()
    except NoSuchElementException:
        pass

    # Company Logo
    try:
        comp_logo_url = DRIVER.find_element(By.CLASS_NAME, 'p5').find_element(By.TAG_NAME, 'img').get_attribute('src')
        info['Company Logo URL'] = comp_logo_url.strip()
    except NoSuchElementException:
        pass

    # Location
    try:
        location = DRIVER.find_element(By.CLASS_NAME, "jobs-unified-top-card__subtitle-primary-grouping").find_element(By.CLASS_NAME, "jobs-unified-top-card__bullet").get_attribute("innerHTML")
        info['Location'] = location.strip()
    except NoSuchElementException:
        pass

    # Workplace Type
    try:
        work_type = DRIVER.find_element(By.CLASS_NAME, "jobs-unified-top-card__workplace-type").get_attribute("innerHTML")
        info['Workplace Type'] = work_type.strip()
    except NoSuchElementException:
        pass

    # Time Posted
    try:
        time_posted = DRIVER.find_element(By.CLASS_NAME, "jobs-unified-top-card__posted-date").get_attribute("innerHTML")
        info['Time Posted'] = time_posted.strip()
    except NoSuchElementException:
        pass

    # Applicants Count
    try:
        applicants = DRIVER.find_element(By.CLASS_NAME, "jobs-unified-top-card__subtitle-secondary-grouping").find_element(By.CLASS_NAME, "jobs-unified-top-card__applicant-count").get_attribute("innerHTML")
        info['Applicants Count'] = applicants.strip()
    except NoSuchElementException:
        try:
            applicants = DRIVER.find_element(By.CLASS_NAME, "jobs-unified-top-card__subtitle-secondary-grouping").find_element(By.CLASS_NAME, "jobs-unified-top-card__bullet").get_attribute("innerHTML")
            info['Applicants Count'] = applicants.strip()
        except NoSuchElementException:
            pass

    # Job & Company Insight
    job_overview_list = ['Temporary', 'Part-time', 'Full-time', 'Internship', 'Contract', 'Entry level', 'Junior', 'Mid-Senior level', 'Associate', 'Executive', 'Director']
    job_overview_pattern = '|'.join(job_overview_list)

    try:
        job_overview, company_overview = DRIVER.find_elements(By.CLASS_NAME, "jobs-unified-top-card__job-insight")[:2]
        # use try except to find info of each column:
        # try:
        #     salary = job_overview.find_element(By.TAG_NAME, 'span').find_element(By.TAG_NAME, 'a').get_attribute("innerHTML")
        #     info['Salary'] = salary.strip('\n<!-> ')
        #     job_overview = job_overview.find_element(By.TAG_NAME, 'span').text # With salary posted
        # except NoSuchElementException:
        #     job_overview = job_overview.find_element(By.TAG_NAME, 'span').get_attribute("innerHTML") # Without salary posted

        # use .text to get all text information
        job_overview = job_overview.find_element(By.TAG_NAME, 'span').text
        job_overview = re.sub(r'<!--(?=.*?-->).*?-->', '', job_overview, flags=re.DOTALL) # Remove all HTML comments

        company_overview = company_overview.find_element(By.TAG_NAME, 'span').text
        company_overview = re.sub(r'<!--(?=.*?-->).*?-->', '', company_overview, flags=re.DOTALL) # Remove all HTML comments

        # if found job overview pattern inside of job_overview
        info['Job Overview'] = job_overview.strip() if re.search(job_overview_pattern, job_overview) is not None else 'Unknown'
        
        info['Company Overview'] = company_overview.strip()
    except NoSuchElementException:
        pass

    # HR URL
    try:
        hr_url = DRIVER.find_element(By.CSS_SELECTOR, "div[class*='hirer-card__hirer-information'] a").get_attribute('href')
        info['HR URL'] = hr_url.strip()
    except NoSuchElementException:
        pass

    # Job details
    try:
        job_details = DRIVER.find_element(By.ID, 'job-details').find_element(By.TAG_NAME, 'span').text
        job_details = re.sub(r'<!--(?=.*?-->).*?-->', '', job_details, flags=re.DOTALL) # Remove all HTML comments
        info['Job Details'] = job_details.strip()
    except NoSuchElementException:
        pass

    return info


### 3. TEST JOB IDS

In [19]:
test_info = get_single_job_info(3525179725) 
test_info

{'Job ID': 3525179725,
 'Job URL': 'https://www.linkedin.com/jobs/view/3525179725',
 'Name': 'AI Research Engineer- Remote',
 'Company': 'Neo Cybernetica',
 'Company Logo URL': 'https://media.licdn.com/dms/image/C560BAQFdt_fuOIJ5TQ/company-logo_100_100/0/1644369586489?e=1688601600&v=beta&t=QyxXmGwyZyxqfDP7SiKcGuUJGrnmgnsnpdG5FbV-ESw',
 'Location': 'Bedford, NH',
 'Workplace Type': 'Remote',
 'Time Posted': '3 weeks ago',
 'Applicants Count': '81 applicants',
 'Job Overview': 'Unknown',
 'Company Overview': 'See how you compare to 81 applicants. Try Premium for free',
 'HR URL': None,
 'Job Details': 'About Us\n\nWe are a next-gen cybernetics start-up backed by a few top-tier investors (led by NEA).\n\nOur R&D blends robotics, machine learning, and high-fidelity simulation. We aim to push the boundaries of what intelligent systems are capable of achieving both autonomously and in collaboration with humans.\n\nBefore starting Neo Cybernetica, our CEO founded the unicorn AI company DataRo

In [20]:
test_info = get_single_job_info(3527055540)
test_info

{'Job ID': 3527055540,
 'Job URL': 'https://www.linkedin.com/jobs/view/3527055540',
 'Name': 'Data Analyst',
 'Company': 'Fenway Health',
 'Company Logo URL': 'https://media.licdn.com/dms/image/C4D0BAQFA0rlVxLg0HA/company-logo_100_100/0/1519856202235?e=1688601600&v=beta&t=RbwZT1Nr218rM1CX3vxqV7L3_VY2xfsUCZy2ieT0rMM',
 'Location': 'Boston, MA',
 'Workplace Type': 'On-site',
 'Time Posted': '2 weeks ago',
 'Applicants Count': '163 applicants',
 'Job Overview': 'Part-time · Entry level',
 'Company Overview': '201-500 employees · Hospitals and Health Care',
 'HR URL': None,
 'Job Details': 'Reporting to the Director of Data Analytics, the Data Analyst I accurately provides data management support, data analyses, and data visualizations to internal and external stakeholders.\n\nRepresentative Duties\n\nQueries, analyzes, visualizes, and interprets data and serves as a content expert on existing data sources and data reporting tools'}

In [23]:
test_info = get_single_job_info(3551940981) # without .find_element(By.TAG_NAME, 'span') --> scrape partial job detail (text in ul is not scraped)
test_info

{'Job ID': 3551940981,
 'Job URL': 'https://www.linkedin.com/jobs/view/3551940981',
 'Name': 'Data Analyst',
 'Company': 'Harvard University',
 'Company Logo URL': 'https://media.licdn.com/dms/image/C4E0BAQF5t62bcL0e9g/company-logo_100_100/0/1519855919126?e=1688601600&v=beta&t=pAF1AvNhhJet9rY9oA43thyAPwGffcV8N7kQ644qj1k',
 'Location': 'Harvard, MA',
 'Workplace Type': 'On-site',
 'Time Posted': '1 day ago',
 'Applicants Count': '34 applicants',
 'Job Overview': 'Temporary · Entry level',
 'Company Overview': '10,001+ employees · Higher Education',
 'HR URL': None,
 'Job Details': 'About the job\nThis job is sourced from a job board. Learn more\n61864BRAuto req ID:61864BRJob Code:I0857P IT Business Analysis Prfss III Department Office Location:USA - MA - Cambridge Business Title:Data AnalystSub-Unit:Division of Continuing Education Salary Grade (https://hr.harvard.edu/salary-ranges#ranges) :057Time Status:Full-time Union:00 - Non Union, Exempt or Temporary Additional Qualifications and 

In [21]:
test_info = get_single_job_info(3551940981)  # with .find_element(By.TAG_NAME, 'span')
test_info

{'Job ID': 3551940981,
 'Job URL': 'https://www.linkedin.com/jobs/view/3551940981',
 'Name': 'Data Analyst',
 'Company': 'Harvard University',
 'Company Logo URL': 'https://media.licdn.com/dms/image/C4E0BAQF5t62bcL0e9g/company-logo_100_100/0/1519855919126?e=1688601600&v=beta&t=pAF1AvNhhJet9rY9oA43thyAPwGffcV8N7kQ644qj1k',
 'Location': 'Harvard, MA',
 'Workplace Type': 'On-site',
 'Time Posted': '1 day ago',
 'Applicants Count': '34 applicants',
 'Job Overview': 'Temporary · Entry level',
 'Company Overview': '10,001+ employees · Higher Education',
 'HR URL': None,
 'Job Details': 'This job is sourced from a job board. Learn more'}

# III. START SCRAPING

### 1. GET ALL JOB IDS INTO A LIST

In [26]:
num_page = 0 # to scrape all pages: set to 0 or -1
job_id_list, job_id_scrape_time = get_all_job_ids_from_page(SEARCH_URL, num_page=num_page)
# job_id_list

DATE & TIME: 2023/04/03 22:11:49
Scraping 40 page(s) out of 40 total pages for Data Scientist jobs search in United States...
Found 25 job IDs on page 1. Could not scrape 0 jobs.
Found 25 job IDs on page 2. Could not scrape 0 jobs.
Found 25 job IDs on page 3. Could not scrape 0 jobs.
Found 25 job IDs on page 4. Could not scrape 0 jobs.
Found 25 job IDs on page 5. Could not scrape 0 jobs.
Found 25 job IDs on page 6. Could not scrape 0 jobs.
Found 25 job IDs on page 7. Could not scrape 0 jobs.
Found 25 job IDs on page 8. Could not scrape 0 jobs.
Found 25 job IDs on page 9. Could not scrape 0 jobs.
Found 25 job IDs on page 10. Could not scrape 0 jobs.
Found 25 job IDs on page 11. Could not scrape 0 jobs.
Found 25 job IDs on page 12. Could not scrape 0 jobs.
Found 25 job IDs on page 13. Could not scrape 0 jobs.
Found 25 job IDs on page 14. Could not scrape 0 jobs.
Found 25 job IDs on page 15. Could not scrape 0 jobs.
Found 25 job IDs on page 16. Could not scrape 0 jobs.
Found 25 job IDs on

### 2. WRITE LIST OF JOB IDS TO FILE

In [28]:
file_name = f'jobs_{job_id_scrape_time.strftime("%y%m%d_%H%M%S")}.txt'
JOB_FILE_PATH = os.path.join(PROJECT_DIR, ID_DIR_NAME, file_name)
print(f'[{job_id_scrape_time.strftime(r"%Y/%m/%d %H:%M:%S")}] Updated job list at {JOB_FILE_PATH}')

# Write file to folder
with open(JOB_FILE_PATH, 'w+') as f:
    for job_id in job_id_list:
        f.write(f'{job_id}\n')

[2023/04/03 22:11:49] Updated job list at /Users/thule/Desktop/DSProjects/linkedin-analysis/job_id_dir/jobs_230403_221149.txt


###  3. LOAD MOST RECENT JOB IDS FILE

In [None]:
# Get most recent id file
JOB_FILE_PATH = os.path.join(PROJECT_DIR, ID_DIR_NAME, sorted(os.listdir(os.path.join(PROJECT_DIR, ID_DIR_NAME)))[-1])

with open(JOB_FILE_PATH, 'r') as f:
    job_id_list = [l.strip() for l in f.readlines()]

### 4. GET INFORMATION FROM ALL JOB PAGES

In [None]:
from tqdm.notebook import tqdm

job_info_list = []

for job_id in tqdm(job_id_list):
    try:
        job_info = get_single_job_info(job_id)
        job_info_list.append(job_info)
    except Exception as e:
        print(e)

job_info_list[:3]

In [34]:
job_df = pd.DataFrame(job_info_list)
job_df.head(5)

Unnamed: 0,Job ID,Job URL,Name,Company,Company Logo URL,Location,Workplace Type,Time Posted,Applicants Count,Job Overview,Company Overview,HR URL,Job Details
0,3520044004,https://www.linkedin.com/jobs/view/3520044004,"Data Scientist, Research",TikTok,https://media.licdn.com/dms/image/C510BAQGCdThXIss7UQ/company-logo_100_100/0/1539940587971?e=1688601600&v=beta&t=VDnIrYH4vqNPMEEKoufCQND_3pgPD7hjF5uaafOJJ0A,"San Jose, CA",Hybrid,2 weeks ago,Over 200 applicants,Full-time,"10,001+ employees · Entertainment Providers",,"About the job\nThe base salary range for this position in the selected city is $144,000 - $312,000 annually.\nCompensation may vary outside of this range depending on a number of factors, including a candidate’s qualifications, skills, competencies and experience, and location. Base pay is one part of the Total Package that is provided to compensate and recognize employees for their work, and this role may be eligible for additional discretionary bonuses/incentives, and restricted stock units."
1,3523743282,https://www.linkedin.com/jobs/view/3523743282,Global Data Scientist,Kimberly-Clark,https://media.licdn.com/dms/image/C560BAQFahtjOdf_ETQ/company-logo_100_100/0/1542208571146?e=1688601600&v=beta&t=rlQc0Ii5WlMa8xcJ2w0YEEsfNoCgzxcGbQbRCdyoHRA,United States,Remote,2 weeks ago,Over 200 applicants,Full-time · Mid-Senior level,"10,001+ employees · Manufacturing",,"About the job\nYou’re not the person who will settle for just any role. Neither are we. Because we’re out to create Better Care for a Better World, and that takes a certain kind of person and teams who care about making a difference. Here, you’ll bring your professional expertise, talent, and drive to building and managing our portfolio of iconic, ground-breaking brands. In your Global Data Scientist role, you’ll help us deliver better care for billions of people around the world."
2,3520459813,https://www.linkedin.com/jobs/view/3520459813,Data Analytics Intern (Summer 2023),Industry Dive,https://media.licdn.com/dms/image/C4E0BAQEAkpLAgFUtpA/company-logo_100_100/0/1520994058492?e=1688601600&v=beta&t=WeD5R9M5oQN332HPtTYdnSGcA8_yOivDYu13ypB1uhM,"Washington, DC",Remote,,,$16/hr - $21/hr (from job description) · Internship · Internship,201-500 employees · Online Audio and Video Media,,"About the job\nCompany Description\n\nIndustry Dive is a leading business journalism company. Nearly 13 million decision-makers across 20+ competitive industries rely on its exclusive insight and analysis delivered through 26 publications. Industry Dive was founded in 2012 to provide business leaders with the information they need to move industries forward. Our team of reporters, editors, designers, and marketers delivers insights and programs that spark innovation, fuel growth and shape agendas in every industry we cover.\n\n\n\nAre you passionate about analyzing big data to tell stories and spot trends? Do you enjoy synthesizing digital metrics such as pageviews, clicks, and user behavior into a narrative that drives decision-making and powers editorial strategy, audience growth, and product development?\n\nThe Data Analytics Intern will be given significant responsibilities that directly impact business strategy by digging into various datasets to highlight insights and themes that help the company better reach, understand, and speak to their readers. You’ll work alongside an experienced analyst to create dashboards that dig into web traffic, email newsletter performance, and subscriber data across 26+ industries. This is a great opportunity to get hands-on experience in data analytics, visualization, and strategy for a fast-growing digital media company.\n\nWe are looking for a smart, creative, and data-savy undergraduate student to complete a summer internship within the marketing data team specifically focused on web traffic and subscriber analytics.\n\n\n\n\n\n\n\nThe salary for this internship is $16-21/hour based on experience."
3,3525723554,https://www.linkedin.com/jobs/view/3525723554,Data Scientist Solution Specialist- IT Internship,Waters Corporation,https://media.licdn.com/dms/image/C560BAQHFDhBFVWfhzg/company-logo_100_100/0/1656651227123?e=1688601600&v=beta&t=EZyQTqSg2K6YVQI4NzPP73QdGTMXzZEtaWzGK7cjLvI,"Milford, MA",On-site,1 week ago,Over 200 applicants,Internship · Internship,"5,001-10,000 employees · Biotechnology Research",,"About the job\nOverview\n\nAn internship within the Information Technology (IT) department of Waters Technology Corporation provides an opportunity for current undergraduate and graduate students to gain training, real-world experience and mentorship by working with industry professionals on high priority initiatives. In this paid and full-time internship, you will help to shape the future of the company and its technology transformation. You will learn about our company’s market, products and culture while driving and participating within IT projects/programs that support business objectives and our overall mission. Throughout the internship, you will receive performance feedback and mentoring, build business/technical acumen and develop skills that will serve as the foundation for your professional career.\n\nYou will also be part of our 2023 Global Intern Program and will be invited to attend virtual Power Hour sessions, roundtables hosted by members of our Executive Committee as well as joining local events on the site you are based. This is a great opportunity to learn more about Waters and the variety of roles we offer as well as learning more about the Life Sciences industry.\n\nThis is a paid internship for a period of 3 months over the summer period and we will have 4 intake dates which will be communicated to you during the course of your application.\n\n\n\nDuring your internship, you will be working in the Information Technology business area based in the Milford, MA facility and completing a project that will involve:\n\n\n\n\n\nWe'd love to hear from you if you have:\n\n\n\n\n\n\n\nWaters Corporation (NYSE: WAT), the world's leading specialty measurement company, has pioneered chromatography, mass spectrometry and thermal analysis innovations serving the life, materials, and food sciences for more than 60 years. With more than 7,000 employees worldwide, Waters operates directly in 35 countries, including 15 manufacturing facilities, with products available in more than 100 countries. Our team focuses on creating business advantages for laboratory-dependent organizations to enable significant advancement in healthcare delivery, environmental management, food safety, and water quality.\n\nWorking at Waters enables our employees to unlock the potential of their careers. Our global team is driven by purpose. We strive to be better, learn and improve every day in everything we do. We’re the problem solvers and innovators that aren’t afraid to take risks to transform the world of human health and well-being. We’re all in it together delivering benefit as one to provide the insights needed today in order to solve the challenges of tomorrow.\n\nDiversity and inclusion are fundamental to our core values at Waters Corporation. It is our responsibility to actively implement programs and practices to drive inclusive behavior and increase diversity across the organization. We are united by diversity and thrive on it for the benefit of our employees, our products, our customers and our community. Waters is proud to be an equal opportunity workplace and is an affirmative action employer. We are committed to equal employment opportunity regardless of race, color, religion, sex, national origin, sexual orientation, age, marital status, disability, gender identity or protected Veteran status.\n\n\n\nintern, summer intern"
4,3527821880,https://www.linkedin.com/jobs/view/3527821880,Data Engineer,Chatham Financial,https://media.licdn.com/dms/image/C4D0BAQFPJJtAqKZSKA/company-logo_100_100/0/1566565993951?e=1688601600&v=beta&t=ZspCbwfzbemZIQ3Si2Hh0UDFic959gFVNnlZ8r1wHRs,"Kennett Square, PA",On-site,2 weeks ago,116 applicants,Full-time · Entry level,"501-1,000 employees · Financial Services",,"About the job\nOverview\n\nWe don’t simply hire employees. We invest in them. When you work at Chatham, we empower you — offering professional development opportunities to help you grow in your career, no matter if you've been here for five months or 15 years. Chatham has worked hard to create a distinct work environment that values people, teamwork, integrity, and client service. You will have immediate opportunities to partner with talented subject matter experts, work on complex projects, and contribute to the value Chatham delivers every day.\n\nData Insight Solutions team is at the forefront of Chatham’s innovation initiatives. Sitting at the nexus of product, technology, and advisory, Data Insight Solutions aims to enhance Chatham’s capacity to deliver data-driven insight to our clients. The team’s multi-disciplinary skillset, that encompasses both finance and technology with additional focus on data-driven storytelling, enables Data Insight Solutions to synthesize murky vision into crystal-clear insight on which clients can make strategic capital market decisions.\n\n\n\n\n\n\n\n\n\nChatham Financial delivers financial risk management advisory and technology solutions to organizations across industries and around the world by helping companies maximize value in the capital markets. At Chatham, we help businesses improve their bottom lines. But that’s not the only place we’re committed to making an impact. We look beyond our clients, too — enhancing the markets where we work, supporting the clients we partner with, helping our employees thrive, and giving back to the world at large. And when you join Chatham, you’re an integral part of this equation, enabling us to live this purpose-driven philosophy every day."


In [35]:
job_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 162 entries, 0 to 161
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Job ID            162 non-null    object
 1   Job URL           162 non-null    object
 2   Name              162 non-null    object
 3   Company           162 non-null    object
 4   Company Logo URL  162 non-null    object
 5   Location          162 non-null    object
 6   Workplace Type    141 non-null    object
 7   Time Posted       125 non-null    object
 8   Applicants Count  125 non-null    object
 9   Job Overview      162 non-null    object
 10  Company Overview  162 non-null    object
 11  HR URL            47 non-null     object
 12  Job Details       162 non-null    object
dtypes: object(13)
memory usage: 16.6+ KB


###  5. SAVE DATAFRAME TO CSV FILE

In [32]:
JOB_INFO_PATH = os.path.join(PROJECT_DIR, INFO_DIR_NAME, os.path.basename(JOB_FILE_PATH)[:-3] + 'csv')

job_df.to_csv(JOB_INFO_PATH, index=False)
print(f'[{job_id_scrape_time.strftime(r"%Y/%m/%d %H:%M:%S")}] Updated job dir at {JOB_INFO_PATH}')