In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

base_url = "https://doda.jp/DodaFront/View/CompanyJobs/j_id__"

data = []

# CSVファイルから会社IDのリストを読み込む
csv_file = 'company_ids.csv'
company_IDs = pd.read_csv(csv_file)['Company_ID'].tolist()

# 各企業の求人情報を取得
for company_id in company_IDs:
    url = f"{base_url}{company_id}/"
    
    try:
        # HTTPリクエストを送信し、HTMLを取得
        res = requests.get(url)
        time.sleep(1)  # リクエスト間隔を1秒間隔にする
        
        # BeautifulSoupを使用してHTMLを解析
        soup = BeautifulSoup(res.text, 'html.parser')
        
        # 必要な情報を取得
        recruit_count = soup.find(class_="all_job_count search__listCount")
        n_recruit_count = int(recruit_count.text) if recruit_count else None
        
        company_name = soup.find(class_="name").find('p').text.strip()
        
        employee_count = soup.find_all(class_="dateSet")
        n_employee_count = employee_count[1].find('dd').find('span').text
        u_employee_count_fix = int(n_employee_count.replace(",", "")) if n_employee_count else None
        
        industry = soup.find(class_="measuringEffect_industry_textLink").text.strip()
        
        job_title = soup.find(class_="seoLinkListOc").text.replace("\n", "").replace(' ', '')
        
        id_info = url.split('id__')[-1].replace('/', '')
        
        # データを辞書に格納
        datum = {
            "会社名": company_name,
            "業種": industry,
            "従業員数": u_employee_count_fix,
            "募集職種": job_title,
            "求人出稿数": n_recruit_count,
            "URL": url,
            "機関コード": id_info
        }
        
        data.append(datum)
        
        # 前回のデータと比較して求人出稿数の増加分を計算し、表示
        previous_data = pd.read_csv('previous_data.csv')
        previous_recruit_count = previous_data[previous_data['機関コード'] == id_info]['求人出稿数'].values
        
        if previous_recruit_count and n_recruit_count > previous_recruit_count:
            increase = n_recruit_count - previous_recruit_count
            print(f"{company_name} の求人出稿数が {increase} 増加しました！")
            datum["増加分"] = increase
        else:
            datum["増加分"] = None
    
    except Exception as e:
        print(f"エラーが発生しました: {e}")
        continue

# 新しいデータをDataFrameに変換してCSVに保存
df = pd.DataFrame(data)
df.to_csv('new_data.csv', index=False)

