In [None]:
import requests
from bs4 import BeautifulSoup
import sqlite3
import time
import re


DB_NAME = "google_repos.db"
URL = "https://github.com/google?tab=repositories"


def parse_stars(star_text):
    """スター数のテキストを整数に変換する関数"""
    if not star_text: return 0
    text = star_text.strip().lower()
    try:
        if 'k' in text:
            return int(float(text.replace('k', '')) * 1000)
        return int(re.sub(r'[^\d]', '', text))
    except ValueError:
        return 0


# DB作成と接続

conn = sqlite3.connect(DB_NAME)
cur = conn.cursor()

cur.execute("DROP TABLE IF EXISTS repos")
cur.execute("""
CREATE TABLE repos (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    name TEXT,
    language TEXT,
    stars INTEGER
)
""")
conn.commit()


# スクレイピングとDB保存

try:
    res = requests.get(URL)
    res.raise_for_status() 
except requests.exceptions.RequestException:
    conn.close()
    exit()

soup = BeautifulSoup(res.text, "html.parser")
repos = soup.select("li.Box-row")

for repo in repos:
    try:
        # リポジトリ名
        name_tag = repo.select_one("a[data-hovercard-type='repository']")
        name = name_tag.text.strip() if name_tag else "N/A"
        
        # 主要言語
        lang_tag = repo.select_one("span[itemprop='programmingLanguage']")
        language = lang_tag.text.strip() if lang_tag else None
        
        # スター数
        star_tag = repo.select("a.Link--muted")[0] 
        star_text = star_tag.text.strip()
        stars = parse_stars(star_text)

        # DB保存
        cur.execute(
            "INSERT INTO repos (name, language, stars) VALUES (?, ?, ?)",
            (name, language, stars)
        )
        conn.commit()
        
        time.sleep(1)  
        
    except IndexError:
        pass 

# SELECTで表示

cur.execute("SELECT name, language, stars FROM repos ORDER BY stars DESC")
rows = cur.fetchall()

print("\n=== DBに保存されたデータ ===")
for r in rows:
    print(r)

conn.close() 


=== DBに保存されたデータ ===
('angle', 'C++', 3843)
('zerocopy', 'Rust', 2080)
('tunix', 'Python', 1916)
('meridian', 'Python', 1186)
('nearby', 'C++', 888)
('orbax', 'Python', 455)
('go-attestation', 'Go', 406)
('cel-java', 'Java', 227)
('skia-buildbot', 'Go', 158)
('device-infra', 'Java', 58)
