In [8]:
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

# 크롬 드라이버 설정
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 20)

# 직무별 URL과 직무 이름 매핑
categories = [
    {
        'role': '머신러닝 엔지니어',
        'url': 'https://www.wanted.co.kr/wdlist/518/1634?country=kr&job_sort=job.recommend_order&years=-1&selected=1634&locations=all'
    },
    {
        'role': '데이터 엔지니어',
        'url': 'https://www.wanted.co.kr/wdlist/518/655?country=kr&job_sort=job.recommend_order&years=-1&selected=655&locations=all'
    },
    {
        'role': '데이터 사이언티스트',
        'url': 'https://www.wanted.co.kr/wdlist/518/1024?country=kr&job_sort=job.recommend_order&years=-1&selected=1024&locations=all'
    },
    {
        'role': '빅데이터 엔지니어',
        'url': 'https://www.wanted.co.kr/wdlist/518/1025?country=kr&job_sort=job.recommend_order&years=-1&selected=1025&locations=all'
    }
]

# 모든 직무 데이터를 저장할 리스트
all_job_data = []

for category in categories:
    print(f"Fetching data for role: {category['role']}")
    driver.get(category['url'])
    time.sleep(5)  # 페이지 로드 대기

    # 스크롤 끝까지 내리기
    scroll_pause_time = 2
    last_height = driver.execute_script("return document.body.scrollHeight")

    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(scroll_pause_time)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

    # 공고 리스트 가져오기
    try:
        job_elements = wait.until(
            EC.presence_of_all_elements_located((By.XPATH, '//*[@id="__next"]/div[3]/div[2]/ul/li'))
        )
    except Exception as e:
        print(f"Error locating job elements for {category['role']}: {e}")
        job_elements = []

    # 공고 데이터 추출
    for job_element in job_elements:
        try:
            job_title = job_element.find_element(By.XPATH, './div/a/div[2]/span[1]').text
            company_name = job_element.find_element(By.XPATH, './div/a/div[2]/span[2]').text
            location = job_element.find_element(By.XPATH, './div/a/div[2]/span[3]').text
            job_url = job_element.find_element(By.XPATH, './div/a').get_attribute('href')

            # 데이터 저장 (직무 추가)
            all_job_data.append({
                'role': category['role'],
                'title': job_title,
                'company': company_name,
                'location': location,
                'url': job_url
            })
        except Exception as e:
            print(f"Error extracting job data for {category['role']}: {e}")

# CSV 파일로 저장
csv_file = 'wanted_jobs.csv'
with open(csv_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=['role', 'title', 'company', 'location', 'url'])
    writer.writeheader()
    writer.writerows(all_job_data)

print(f"Data saved to {csv_file}. Total records: {len(all_job_data)}")

# # 드라이버 종료
# driver.quit()


Fetching data for role: 머신러닝 엔지니어
Fetching data for role: 데이터 엔지니어
Fetching data for role: 데이터 사이언티스트
Fetching data for role: 빅데이터 엔지니어
Data saved to wanted_jobs.csv. Total records: 806
