In [1]:
import requests
from bs4 import BeautifulSoup
import sqlite3
import time

DB_NAME = 'google_repos.db'

def init_db():
    conn = sqlite3.connect(DB_NAME)
    cursor = conn.cursor()
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS repositories (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            name TEXT,
            language TEXT,
            stars TEXT
        )
    ''')
    conn.commit()
    conn.close()

def save_to_db(data_list):
    conn = sqlite3.connect(DB_NAME)
    cursor = conn.cursor()
    
    for data in data_list:
        cursor.execute('''
            INSERT INTO repositories (name, language, stars)
            VALUES (?, ?, ?)
        ''', (data['name'], data['language'], data['stars']))
    
    conn.commit()
    conn.close()
    print(f"{len(data_list)} 件")

def show_data():
    print("\nデータ")
    conn = sqlite3.connect(DB_NAME)
    cursor = conn.cursor()
    
    cursor.execute('SELECT * FROM repositories')
    rows = cursor.fetchall()
    
    for row in rows:
        print(f"ID: {row[0]} | Name: {row[1]} | Lang: {row[2]} | Stars: {row[3]}")
    
    conn.close()

def scrape_google_repos():
    url = "https://github.com/google?tab=repositories"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    response = requests.get(url, headers=headers)
    
    time.sleep(1)

    if response.status_code != 200:
        return []

    soup = BeautifulSoup(response.text, 'html.parser')
    repos_data = []

    repo_list = soup.find_all('li', class_='col-12')

    for repo in repo_list:
        name_tag = repo.find('a', itemprop='name codeRepository')
        name = name_tag.get_text(strip=True) if name_tag else "Unknown"

        lang_tag = repo.find('span', itemprop='programmingLanguage')
        language = lang_tag.get_text(strip=True) if lang_tag else "None"

        star_tag = repo.find('a', href=lambda x: x and x.endswith('/stargazers'))
        stars = star_tag.get_text(strip=True) if star_tag else "0"

        repos_data.append({
            "name": name,
            "language": language,
            "stars": stars
        })
        
        print(f"{name}")

    return repos_data

if __name__ == "__main__":
    init_db()
    
    data = scrape_google_repos()
    
    if data:
        save_to_db(data)
    
    show_data()

Unknown
Unknown
Unknown
Unknown
Unknown
Unknown
6 件

データ
ID: 1 | Name: Unknown | Lang: None | Stars: 52.6k
ID: 2 | Name: Unknown | Lang: Java | Stars: 51.3k
ID: 3 | Name: Unknown | Lang: JavaScript | Stars: 44.9k
ID: 4 | Name: Unknown | Lang: HTML | Stars: 38.7k
ID: 5 | Name: Unknown | Lang: C++ | Stars: 38.4k
ID: 6 | Name: Unknown | Lang: C++ | Stars: 37.5k
ID: 7 | Name: Unknown | Lang: None | Stars: 52.6k
ID: 8 | Name: Unknown | Lang: Java | Stars: 51.3k
ID: 9 | Name: Unknown | Lang: JavaScript | Stars: 44.9k
ID: 10 | Name: Unknown | Lang: HTML | Stars: 38.7k
ID: 11 | Name: Unknown | Lang: C++ | Stars: 38.4k
ID: 12 | Name: Unknown | Lang: C++ | Stars: 37.5k
ID: 13 | Name: Unknown | Lang: None | Stars: 52.6k
ID: 14 | Name: Unknown | Lang: Java | Stars: 51.3k
ID: 15 | Name: Unknown | Lang: JavaScript | Stars: 44.9k
ID: 16 | Name: Unknown | Lang: HTML | Stars: 38.7k
ID: 17 | Name: Unknown | Lang: C++ | Stars: 38.4k
ID: 18 | Name: Unknown | Lang: C++ | Stars: 37.5k
ID: 19 | Name: Unknown