In [25]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from tqdm import tqdm

# データを保存するリスト
job_title = []
unit_price = []
required_experience = []
details_page_link = []
representative_name = []
headquarters_locations = []
industry = []
contact_number = []
company_info_link = []
all_data = []

# ページ番号を1から96までループ
for page_number in tqdm(range(1, 3), desc="Processing pages"):
    try:
        url = f'https://next.rikunabi.com/rnc/docs/cp_s00700.jsp?page={page_number}&cur=ACgAAQAoAAAAAAAAAAAAAAACMT4N4gECAQkUBgU6vVJcMq7CpmpPjbzo3HJe4dF7UXdwgM%2BXoqqxn4oQb50yRo6ag3iAKyyeqyFDcEzcwkGZDsn8prbqAeX62tWArTu1z3hoh%2FE%3D&cur_p=2&occupation_cd=EHPW9&wrk_plc_long_cd=0313113103&wrk_plc_long_cd=0313113105&wrk_plc_long_cd=0313113106&wrk_plc_long_cd=0313113107&wrk_plc_long_cd=0313113108&wrk_plc_long_cd=0313113109&wrk_plc_long_cd=0313113110&wrk_plc_long_cd=0313113112&wrk_plc_long_cd=0313113113&wrk_plc_long_cd=0313113114&wrk_plc_long_cd=0313113115&wrk_plc_long_cd=0313113116&wrk_plc_long_cd=0313113117&wrk_plc_long_cd=0313113118&wrk_plc_long_cd=0313113119&wrk_plc_long_cd=0313113120&wrk_plc_long_cd=0313113121&wrk_plc_long_cd=0313113122&wrk_plc_long_cd=0313113123&employ_frm_cd=01'
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')

        # 各求人情報を取得
        job_elements = soup.select("li.rnn-jobOfferList__item")
        total_jobs = len(job_elements)

        # 各求人情報をループ
        for job_element in tqdm(job_elements, total=total_jobs, desc=f"Processing page {page_number} jobs", leave=False):
            # 各情報を取得
            title = job_element.select_one("h2.rnn-textLl.js-abScreen__title").text.strip()

            try:
                income_details = job_element.select_one("tr.js-abScreen__income td.rnn-offerDetail__text").text.strip()
                income_details_cleaned = ' '.join(income_details.split())
            except:
                income_details_cleaned = ''

            try:
                requirements = job_element.select_one("tr.js-abScreen__prefer td.rnn-offerDetail__text").text.strip()
            except:
                requirements = ''

            # 詳細ページのリンクを取得
            link = job_element.select_one("a.rnn-linkText.rnn-linkText--black")['href']
            details_url = f'https://next.rikunabi.com{link}'
            details_response = requests.get(details_url)
            details_soup = BeautifulSoup(details_response.content, 'html.parser')

            # 「リクルートエージェントからの求人」が含まれているか確認
            if 'リクルートエージェントからの求人' in details_soup.get_text():
                company_name = details_soup.select_one('p.rnn-offerCompanyName').text.strip()

                table_rows = details_soup.select('table.rnn-detailTable tr.rnn-tableGrid')

                def get_value_from_table(row_title):
                    for row in table_rows:
                        th_element = row.select_one('th')
                        if th_element and th_element.text.strip() == row_title:
                            td_element = row.select_one('td')
                            return td_element.text.strip() if td_element else ''
                    return ''

                ceo_name = get_value_from_table('代表者')
                headquarters_location = get_value_from_table('事業所')
                industry_heading = get_value_from_table('業種')

            else:
                syosai_elements = details_soup.find_all('span', class_='rn3-companyOfferTabMenu__navItemText')

                if len(syosai_elements) == 2:
                    # URLの/nx1の部分を/nx2に変更
                    modified_url = details_url.replace('/nx1', '/nx2')
                    # 変更後のURLにアクセス
                    response = requests.get(modified_url)
                    details_soup = BeautifulSoup(response.content, 'html.parser')  # ここを修正
                    details_url = modified_url
                    
                company_info = details_soup.select_one('div.rn3-companyOfferCompany')

                try:
                    company_name = company_info.find('h3', string='社名').find_next_sibling('p', class_='rn3-companyOfferCompany__text').get_text(strip=True).replace('\xa0', ' ').replace('\n', ' ')
                except:
                    company_name = ''

                try:
                    ceo_name = company_info.find('h3', string='代表者').find_next_sibling('p', class_='rn3-companyOfferCompany__text').get_text(strip=True).replace('\xa0', ' ').replace('\n', ' ')
                except:
                    ceo_name = ''

                try:
                    headquarters_location = company_info.find('h3', string='本社所在地').find_next_sibling('p', class_='rn3-companyOfferCompany__text').get_text(strip=True).replace('\xa0', ' ').replace('\n', ' ')
                except:
                    headquarters_location = ''

                try:
                    industry_headings = company_info.find('h3', string='業種').find_next_sibling('p', class_='rn3-companyOfferCompany__text').get_text(strip=True).replace('\xa0', ' ').replace('\n', ' ')
                    parts = industry_headings.split('/')
                    industry_heading = parts[0].replace('\xa0', ' ').replace('\n', ' ')
                except:
                    industry_heading = ''

                try:
                    contact_heading = details_soup.find('h3', class_='rn3-companyOfferEntry__heading', string='連絡先')
                    contact_info_div = contact_heading.find_next_sibling('div')
                    contact_info_text = contact_info_div.get_text(separator='\n', strip=True)
                    contact_info_text = contact_info_text.replace('\xa0', ' ')
                    contact_info_text = ' '.join(contact_info_text.split())
                    page = contact_info_div.find('a')
                    url = 'https://next.rikunabi.com/' + page['href']
                except:
                    url = ''
                    contact_info_text = ''

            # リストに情報を追加
            job_title.append(title)
            unit_price.append(income_details_cleaned)
            required_experience.append(requirements)
            details_page_link.append(details_url)
            representative_name.append(ceo_name)
            headquarters_locations.append(headquarters_location)
            industry.append(industry_heading)
            contact_number.append(contact_info_text)
            company_info_link.append(url)

            data = {
                "job_title": title,
                "company_name": company_name,
                "contact_number": contact_info_text,
                "industry": industry_heading,
                "representative_name": ceo_name,
                "company_info_link": url,
                "unit_price": income_details_cleaned,
                "headquarters_location": headquarters_location,
                "qualification": '',  # qualification の情報がどこにあるかによって変更
                "required_experience": requirements,
                "details_page_link": details_url,
                "contact_url": url
            }

            all_data.append(data)
    except Exception as e:
        print(f"Error processing page {page_number}: {e}")
        continue

# データフレームに変換
df1 = pd.DataFrame(all_data)

Processing pages:   0%|          | 0/2 [00:00<?, ?it/s]
Processing page 1 jobs:   0%|          | 0/50 [00:00<?, ?it/s][A
Processing page 1 jobs:   2%|▏         | 1/50 [00:00<00:37,  1.31it/s][A
Processing page 1 jobs:   4%|▍         | 2/50 [00:01<00:38,  1.23it/s][A
Processing page 1 jobs:   6%|▌         | 3/50 [00:02<00:36,  1.29it/s][A
Processing page 1 jobs:   8%|▊         | 4/50 [00:03<00:35,  1.29it/s][A
Processing page 1 jobs:  10%|█         | 5/50 [00:03<00:33,  1.35it/s][A
Processing page 1 jobs:  12%|█▏        | 6/50 [00:04<00:32,  1.37it/s][A
Processing page 1 jobs:  14%|█▍        | 7/50 [00:05<00:30,  1.39it/s][A
Processing page 1 jobs:  16%|█▌        | 8/50 [00:05<00:30,  1.39it/s][A
Processing page 1 jobs:  18%|█▊        | 9/50 [00:06<00:30,  1.36it/s][A
Processing page 1 jobs:  20%|██        | 10/50 [00:07<00:28,  1.38it/s][A
Processing page 1 jobs:  22%|██▏       | 11/50 [00:08<00:28,  1.35it/s][A
Processing page 1 jobs:  24%|██▍       | 12/50 [00:08<00:28,  

In [26]:
df1.to_csv('test.csv', index=False, encoding='utf-8')

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from tqdm import tqdm

# データを保存するリスト
job_title = []
unit_price = []
required_experience = []
details_page_link = []
representative_name = []
headquarters_locations = []
industry = []
contact_number = []
company_info_link = []
all_data = []

# ページ番号を1から96までループ
for page_number in tqdm(range(1, 3), desc="Processing pages"):
    try:
        url = f'https://next.rikunabi.com/rnc/docs/cp_s00700.jsp?page={page_number}&cur=ACgAAQAoAAAAAAAAAAAAAAACMT4N4gECAQkUBgU6vVJcMq7CpmpPjbzo3HJe4dF7UXdwgM%2BXoqqxn4oQb50yRo6ag3iAKyyeqyFDcEzcwkGZDsn8prbqAeX62tWArTu1z3hoh%2FE%3D&cur_p=2&occupation_cd=EHPW9&wrk_plc_long_cd=0313113103&wrk_plc_long_cd=0313113105&wrk_plc_long_cd=0313113106&wrk_plc_long_cd=0313113107&wrk_plc_long_cd=0313113108&wrk_plc_long_cd=0313113109&wrk_plc_long_cd=0313113110&wrk_plc_long_cd=0313113112&wrk_plc_long_cd=0313113113&wrk_plc_long_cd=0313113114&wrk_plc_long_cd=0313113115&wrk_plc_long_cd=0313113116&wrk_plc_long_cd=0313113117&wrk_plc_long_cd=0313113118&wrk_plc_long_cd=0313113119&wrk_plc_long_cd=0313113120&wrk_plc_long_cd=0313113121&wrk_plc_long_cd=0313113122&wrk_plc_long_cd=0313113123&employ_frm_cd=01'
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')

        # 各求人情報を取得
        job_elements = soup.select("li.rnn-jobOfferList__item")
        total_jobs = len(job_elements)

        # 各求人情報をループ
        for job_element in tqdm(job_elements, total=total_jobs, desc=f"Processing page {page_number} jobs", leave=False):
            # 各情報を取得
            title = job_element.select_one("h2.rnn-textLl.js-abScreen__title").text.strip()

            try:
                income_details = job_element.select_one("tr.js-abScreen__income td.rnn-offerDetail__text").text.strip()
                income_details_cleaned = ' '.join(income_details.split())
            except:
                income_details_cleaned = ''

            try:
                requirements = job_element.select_one("tr.js-abScreen__prefer td.rnn-offerDetail__text").text.strip()
            except:
                requirements = ''

            # 詳細ページのリンクを取得
            link = job_element.select_one("a.rnn-linkText.rnn-linkText--black")['href']
            details_url = f'https://next.rikunabi.com{link}'
            details_response = requests.get(details_url)
            
            
            details_soup = BeautifulSoup(details_response.content, 'html.parser')
            
            if 

            # 「リクルートエージェントからの求人」が含まれているか確認
            if 'リクルートエージェントからの求人' in details_soup.get_text():
                company_name = details_soup.select_one('p.rnn-offerCompanyName').text.strip()

                table_rows = details_soup.select('table.rnn-detailTable tr.rnn-tableGrid')

                def get_value_from_table(row_title):
                    for row in table_rows:
                        th_element = row.select_one('th')
                        if th_element.text.strip() == row_title:
                            td_element = row.select_one('td')
                            return td_element.text.strip()
                    return ''

                ceo_name = get_value_from_table('代表者')
                headquarters_location = get_value_from_table('事業所')
                industry_heading = get_value_from_table('業種')

            else:
                
                syosai_elements = details_soup.find_all('span', class_='rn3-companyOfferTabMenu__navItemText')

                if len(syosai_elements) == 2:
                # URLの/nx1の部分を/nx2に変更
                    original_url = "https://next.rikunabi.com/company/cmi4259035001/nx1_rq0027219998/?list_disp_no=9&jrtk=5-nrt1-0-1i62insqgh9f3800-PPP0027219998%252D0627027213&refnum=0027219998-0627027213&leadtc=n_ichiran_cst_n5_ttl"
                    modified_url = original_url.replace('/nx1', '/nx2')
                    # 変更後のURLにアクセス
                    response = requests.get(modified_url)
                    details_soup = BeautifulSoup(details_response.content, 'html.parser')

                
                company_info = details_soup.select_one('div.rn3-companyOfferCompany')

                try:
                    company_name = company_info.find('h3', string='社名').find_next_sibling('p', class_='rn3-companyOfferCompany__text').get_text(strip=True).replace('\xa0', ' ').replace('\n', ' ')
                except:
                    company_name = ''

                try:
                    ceo_name = company_info.find('h3', string='代表者').find_next_sibling('p', class_='rn3-companyOfferCompany__text').get_text(strip=True).replace('\xa0', ' ').replace('\n', ' ')
                except:
                    ceo_name = ''

                try:
                    headquarters_location = company_info.find('h3', string='本社所在地').find_next_sibling('p', class_='rn3-companyOfferCompany__text').get_text(strip=True).replace('\xa0', ' ').replace('\n', ' ')
                except:
                    headquarters_location = ''

                try:
                    industry_headings = company_info.find('h3', string='業種').find_next_sibling('p', class_='rn3-companyOfferCompany__text').get_text(strip=True).replace('\xa0', ' ').replace('\n', ' ')
                    parts = industry_headings.split('/')
                    industry_heading = parts[0].replace('\xa0', ' ').replace('\n', ' ')
                except:
                    industry_heading = ''

                try:
                    contact_heading = details_soup.find('h3', class_='rn3-companyOfferEntry__heading', string='連絡先')
                    contact_info_div = contact_heading.find_next_sibling('div')
                    contact_info_text = contact_info_div.get_text(separator='\n', strip=True)
                    contact_info_text = contact_info_text.replace('\xa0', ' ')
                    contact_info_text = ' '.join(contact_info_text.split())
                    page = contact_info_div.find('a')
                    url = 'https://next.rikunabi.com/' + page['href']
                except:
                    url = ''
                    contact_info_text = ''

            # リストに情報を追加
            job_title.append(title)
            unit_price.append(income_details_cleaned)
            required_experience.append(requirements)
            details_page_link.append(details_url)
            representative_name.append(ceo_name)
            headquarters_locations.append(headquarters_location)
            industry.append(industry_heading)
            contact_number.append(contact_info_text)
            company_info_link.append(url)

            data = {
                "job_title": title,
                "company_name": company_name,
                "contact_number": contact_info_text,
                "industry": industry_heading,
                "representative_name": ceo_name,
                "company_info_link": url,
                "unit_price": income_details_cleaned,
                "headquarters_location": headquarters_location,
                "qualification": '',  # qualification の情報がどこにあるかによって変更
                "required_experience": requirements,
                "details_page_link": details_url,
                "contact_url": url
            }

            all_data.append(data)
    except Exception as e:
        print(f"Error processing page {page_number}: {e}")
        continue

# データフレームに変換
df1 = pd.DataFrame(all_data)
print(df1)

Processing pages:   0%|          | 0/2 [00:00<?, ?it/s]
Processing page 1 jobs:   0%|          | 0/50 [00:00<?, ?it/s][A
Processing page 1 jobs:   2%|▏         | 1/50 [00:00<00:20,  2.44it/s][A
Processing page 1 jobs:   4%|▍         | 2/50 [00:00<00:20,  2.32it/s][A
Processing page 1 jobs:   6%|▌         | 3/50 [00:01<00:21,  2.17it/s][A
Processing page 1 jobs:   8%|▊         | 4/50 [00:02<00:39,  1.15it/s][A
Processing page 1 jobs:  10%|█         | 5/50 [00:03<00:31,  1.44it/s][A
Processing page 1 jobs:  12%|█▏        | 6/50 [00:03<00:26,  1.64it/s][A
Processing page 1 jobs:  14%|█▍        | 7/50 [00:04<00:23,  1.83it/s][A
Processing page 1 jobs:  16%|█▌        | 8/50 [00:04<00:20,  2.04it/s][A
Processing page 1 jobs:  18%|█▊        | 9/50 [00:04<00:19,  2.07it/s][A
Processing page 1 jobs:  20%|██        | 10/50 [00:05<00:19,  2.09it/s][A
Processing page 1 jobs:  22%|██▏       | 11/50 [00:05<00:18,  2.14it/s][A
Processing page 1 jobs:  24%|██▍       | 12/50 [00:06<00:17,  

                                job_title company_name contact_number  \
0          【還元率80％超】上場G・案件選択可・年休130｜SE・PG                               
1           未経験歓迎＊新しい一歩をサポート◎IT関連事務・PG/s3                               
2          上場グループ・前給保証・案件選択可・年休130日｜SE・PG                               
3          未経験から安心！充実研修あり【ITエンジニア】残業12.8h                               
4          創りたい！が自社で作れるゲームエンジニアへ（IT・Web等）                               
..                                    ...          ...            ...   
95                          ゲームエンジニア・テスター   株式会社ZOSTEC                  
96               【ホワイトハッカー】未経験育成・正社員・専門研修     株式会社ジェーン                  
97   \\ 未経験育成//【ITエンジニア】　正社員/研修あり　完全在宅も叶う      株式会社ミクス                  
98                                ITエンジニア     株式会社ソリット                  
99  【北千住】最先端AIエンジニア//Python//スクール研修//未経験可     (株)AIベース                  

   industry representative_name company_info_link  \
0                                                   
1                




In [17]:
url = f'https://next.rikunabi.com/company/cmi3109485001/nx1_rq0027227217/?list_disp_no=33&jrtk=5-nrt1-0-1i62insqgh9f3800-PPP0027227217%252D0313113103&refnum=0027227217-0313113103&leadtc=n_ichiran_cst_n4_ttl'
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

In [18]:
syosai_elements = soup.find_all('span', class_='rn3-companyOfferTabMenu__navItemText')

if len(syosai_elements) == 2:
    # 要素が2つある場合の処理 (A)
    syosai_texts = [element.text for element in syosai_elements]
    # 例: 取得したテキストを表示
    print("要素が2つあります:", syosai_texts)
else:
    # 要素が2つでない場合の処理 (B)
    # 例: エラーメッセージを表示
    print("要素が2つではありません。要素の数:", len(syosai_elements))

要素が2つではありません。要素の数: 1


In [15]:
syosai_texts

['企業からのメッセージ', '求人情報']

In [None]:
if len(syosai_elements) == 2:
    # URLの/nx1の部分を/nx2に変更
    original_url = "https://next.rikunabi.com/company/cmi4259035001/nx1_rq0027219998/?list_disp_no=9&jrtk=5-nrt1-0-1i62insqgh9f3800-PPP0027219998%252D0627027213&refnum=0027219998-0627027213&leadtc=n_ichiran_cst_n5_ttl"
    modified_url = original_url.replace('/nx1', '/nx2')
    print("Modified URL:", modified_url)
    
    # 変更後のURLにアクセス
    response = requests.get(modified_url)