In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service


options = Options()
options.add_argument('--headless')

driver = webdriver.Chrome(service=Service("/usr/bin/chromedriver"), options=options)

driver.set_window_size(1920, 1080)  # width, height
driver.maximize_window()

In [2]:
import time
import random
import pandas as pd
from PIL import Image
from tqdm.auto import tqdm
from selenium.webdriver.common.by import By


def extract_info(element):
    et = element.find_element(By.CLASS_NAME, 'job-title')
    title, url = et.text, et.get_attribute('href')
    company = element.find_element(By.CLASS_NAME, 'job-company').text
    tags = element.find_element(By.CLASS_NAME, 'job-tags').text.split('\n')
    return {'title':title, 'url':url, 'company':company, 'tags':tags}

def extract_description(element):
    location = element.find_element(By.CLASS_NAME, 'job__company').text
    salary = element.find_element(By.CLASS_NAME, 'job__salary').text
    description = element.find_element(By.CLASS_NAME, 'job__description').text
    orig_url = element.find_element(By.CLASS_NAME, 'link-whole-page').get_attribute('href')
    pos = orig_url.find('?utm')
    if pos > 0: orig_url = orig_url[:pos]
    return {'location': location, 'salary': salary, 'description': description, 'orig_url': orig_url}

def extract_one_page(url):
    driver.get(url)
    time.sleep(1)
    job_elements = driver.find_elements(By.CLASS_NAME, 'job-title-company-tags')
    job_info = [extract_info(e) for e in job_elements]
    for i in tqdm(range(len(job_info))):
        info = job_info[i]
        driver.get(info['url'])
        time.sleep(random.random())
        description = extract_description(driver)
        info.update(description)
    return job_info

In [3]:
website_url = 'https://bestaijobs.com/'

info = extract_one_page(website_url)

  0%|          | 0/30 [00:00<?, ?it/s]

In [4]:
df = pd.DataFrame(info)
df.head()

Unnamed: 0,title,url,company,tags,location,salary,description,orig_url
0,Sr. Software Engineer,https://bestaijobs.com/sr-software-engineer-am...,Ambientai,"[San Jose, California, United States, Featured]","Ambientai in San Jose, California, United States",,Ambient.ai is an AI company headquartered in P...,https://boards.greenhouse.io/ambientai/jobs/42...
1,[ Contract ] Instagram and TikTok Marketing Head,https://bestaijobs.com/contract-instagram-and-...,Interior Ai,"[Remote, Featured]",Interior Ai in Remote,,Email interioraijob@interiorai.com with a list...,mailto:interioraijob@interiorai.com
2,Software Support Representative,https://bestaijobs.com/software-support-repres...,Runwayml,"[New York, New York, United States, Remote, $5...","Runwayml in New York, New York, United States","$50,000 - $75,000","At Runway, we believe everyone has a story to ...",https://boards.greenhouse.io/runwayml/jobs/411...
3,"Manager, Customer Support",https://bestaijobs.com/manager-customer-suppor...,Runwayml,"[New York, New York, United States, $90,000 - ...","Runwayml in New York, New York, United States","$90,000 - $120,000","At Runway, we believe everyone has a story to ...",https://boards.greenhouse.io/runwayml/jobs/428...
4,"Senior Manager, Partner Development",https://bestaijobs.com/senior-manager-partner-...,Whatnot,"[Remote - North America, Remote, $20,000]",Whatnot in Remote - North America,"$20,000",🚀 Whatnot\nWhatnot is a livestream shopping pl...,https://boards.greenhouse.io/whatnot/jobs/4908...


In [5]:
df.to_json('ai-jobs-homepage0618.json')