In [6]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options

from webdriver_manager.chrome import ChromeDriverManager

# WebDriverのセットアップ
options = Options()
options.headless = False  # ヘッドレスモードを無効にしてブラウザを表示する

# Serviceを使用してドライバーのパスを指定
service = Service(ChromeDriverManager().install())

# WebDriverのセットアップ
driver = webdriver.Chrome(service=service, options=options)

# URLを開く
url = "https://doge.gov/savings"
driver.get(url)



In [7]:
# 「see more」ボタンをクリックする
see_more_button = driver.find_element(By.XPATH, "//td[text()='see more']")
see_more_button.click()

# ページの読み込みを待つ
driver.implicitly_wait(5)

# ページのHTMLを取得
html = driver.page_source

# BeautifulSoupでパース
from bs4 import BeautifulSoup
soup = BeautifulSoup(html, 'html.parser')

# テーブルを取得
table = soup.find('table', {'class': 'min-w-full'})

# テーブルの内容を抽出
rows = table.find_all('tr')

In [13]:
import pandas as pd

# ヘッダー（列名）を抽出
headers = [header.get_text(strip=True) for header in rows[0].find_all('th')]

# 各行のデータを抽出
data = []
for row in rows[1:]:  # rows[0]はヘッダーなので、それ以外の行を処理
    cells = row.find_all('td')
    data.append([cell.get_text(strip=True) for cell in cells])

# DataFrameに変換
df = pd.DataFrame(data, columns=headers)

In [14]:
df.head()

Unnamed: 0,Agency,Description,Uploaded on,Link,Value
0,BUREAU OF LAND MANAGEMENT,EEO DEIA SUPPORT,2/13/2025,,"$440,000"
1,COMMITTEE FOR PURCHASE FROM PEOPLE WHO ARE BLI...,Legal Database that allows ...,2/20/2025,,"$199,305"
2,COMMODITY FUTURES TRADING COMMISSION,RENEW WEST PRINT SUBSCRIPTI...,2/11/2025,,"$21,382.14"
3,COMMODITY FUTURES TRADING COMMISSION,Annual subscription to Poli...,2/13/2025,,"$255,863"
4,CONSUMER FINANCIAL PROTECTION BUREAU,OMWI Training Support - DE...,1/29/2025,,"$9,999,999"


In [16]:
# 各行のデータを抽出
data = []
for row in rows[1:]:  # rows[0]はヘッダーなので、それ以外の行を処理
    cells = row.find_all('td')
    # リンクを取得（aタグのhref属性）
    link = cells[3].find('a')['href'] if cells[3].find('a') else None
    data.append([cell.get_text(strip=True) for cell in cells[:3]] + [link] + [cells[4].get_text(strip=True)])

# DataFrameに変換
df = pd.DataFrame(data, columns=headers)
df.head()

Unnamed: 0,Agency,Description,Uploaded on,Link,Value
0,BUREAU OF LAND MANAGEMENT,EEO DEIA SUPPORT,2/13/2025,https://www.fpds.gov/ezsearch/jsp/viewLinkCont...,"$440,000"
1,COMMITTEE FOR PURCHASE FROM PEOPLE WHO ARE BLI...,Legal Database that allows ...,2/20/2025,https://www.fpds.gov/ezsearch/jsp/viewLinkCont...,"$199,305"
2,COMMODITY FUTURES TRADING COMMISSION,RENEW WEST PRINT SUBSCRIPTI...,2/11/2025,https://www.fpds.gov/ezsearch/jsp/viewLinkCont...,"$21,382.14"
3,COMMODITY FUTURES TRADING COMMISSION,Annual subscription to Poli...,2/13/2025,https://www.fpds.gov/ezsearch/jsp/viewLinkCont...,"$255,863"
4,CONSUMER FINANCIAL PROTECTION BUREAU,OMWI Training Support - DE...,1/29/2025,https://www.fpds.gov/ezsearch/jsp/viewLinkCont...,"$9,999,999"


In [17]:
df.iloc[0,-2]

'https://www.fpds.gov/ezsearch/jsp/viewLinkController.jsp?agencyID=9300&PIID=93310023P0021&modNumber=A00002&idvAgencyID=&idvPIID=&contractType=AWARD'

In [18]:
df.iloc[1,-2]

'https://www.fpds.gov/ezsearch/jsp/viewLinkController.jsp?agencyID=9518&PIID=95044124P0006&modNumber=P00001&idvAgencyID=&idvPIID=&contractType=AWARD'

In [11]:
rows[0]

<tr><th class="px-6 py-3 text-left text-xs font-medium text-gray-300 uppercase tracking-wider cursor-pointer">Agency</th><th class="px-6 py-3 text-left text-xs font-medium text-gray-300 uppercase tracking-wider cursor-pointer">Description</th><th class="px-6 py-3 text-left text-xs font-medium text-gray-300 uppercase tracking-wider cursor-pointer">Uploaded on</th><th class="px-6 py-3 text-left text-xs font-medium text-gray-300 uppercase tracking-wider cursor-pointer">Link</th><th class="px-6 py-3 text-right text-xs font-medium text-gray-300 uppercase tracking-wider cursor-pointer">Value</th></tr>

In [12]:
rows[-1]

<tr class="hover:bg-gray-900 cursor-pointer"><td class="px-6 py-4 whitespace-nowrap text-sm text-gray-300 truncate max-w-xs" title="USAID">USAID</td><td class="px-6 py-4 whitespace-nowrap text-sm text-gray-300 truncate max-w-xs">New PPL/LER Evaluation IDIQs</td><td class="px-6 py-4 whitespace-nowrap text-sm text-gray-300 truncate max-w-xs">2/12/2025</td><td class="px-6 py-4 whitespace-nowrap text-sm text-gray-300 truncate max-w-xs" title="https://www.fpds.gov/ezsearch/jsp/viewLinkController.jsp?agencyID=7200&amp;PIID=7200AA20D00023&amp;modNumber=P00004&amp;idvAgencyID=&amp;idvPIID=&amp;contractType=IDV"><a href="https://www.fpds.gov/ezsearch/jsp/viewLinkController.jsp?agencyID=7200&amp;PIID=7200AA20D00023&amp;modNumber=P00004&amp;idvAgencyID=&amp;idvPIID=&amp;contractType=IDV" rel="noopener noreferrer" target="_blank"><svg class="lucide lucide-external-link w-5 h-5" fill="none" height="24" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" viewbox="0 