In [56]:
import requests
from bs4 import BeautifulSoup
import sqlite3
import time

In [57]:
path = ''
db_name = 'github_google.db'

try:
    #DBへのコネクションを確立
    conn = sqlite3.connect(path + db_name)

    #SQL(ROBを操作するための言語)を実行するためのカーソルオブジェクトを取得
    cur = conn.cursor()

    # SQL文の作成
    # テーブルの作成
    sql = "CREATE TABLE cars (id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT, pl TEXT, star INTEGER);"

    # SQL文の実行
    cur.execute(sql)

    conn.commit()  # 変更を保存

except sqlite3.Error as e:
    print('SQLite error:', e)
    
finally:
    # DBへの接続を閉じる
    conn.close()

In [58]:
page_now = 1

In [59]:
# https://github.com/orgs/google/repositories?page=1  ---  リポジトリ一覧ページのURL
repositories = []

for page in range(page_now, 95):
    time.sleep(1)
    url = f"https://github.com/orgs/google/repositories?page={page}"
    print(f"ページ{page}: {url}")
    try:
        res = requests.get(url)

        #200以外の接続を弾く
        res.raise_for_status()
        
        soup = BeautifulSoup(res.text, 'html.parser')

        selsectors = soup.find_all(class_ = "ListItem-module__listItem--k4eMk")
        print(f"ページ{page}のセクター数: {len(selsectors)}")
        for selsector in selsectors:

            # リポジトリ名、プログラミング言語、スター数を取得し改行を削除して文字に変換
            name = selsector.find(class_ = "Title-module__anchor--GmXUE Title-module__inline--oM0P7")
            name = name.text.strip() if name else "N/A"
            pl = selsector.find(class_ = "ReposListItem-module__Text_4--mkG7R")
            pl = pl.text.strip() if pl else "N/A"
            star = selsector.find(class_ = "ReposListItem-module__Link_1--v5NDF prc-Link-Link-85e08")
            star = star.text.strip() if star else "0"

            # スター数の文字列を整数に変換
            if star[-1] == "k":
                star = int(float(star[:-1]) * 1000)
            else:
                star = int(star)

            #取得したデータをリポジトリリストに追加
            print(f"name:{name} pl:{pl} star:{star}")
            repositories.append((name, pl, star))
            
    except requests.RequestException as e:
        print(f"Error fetching page {page}: {e}")
        print("次回はこのページから再開します")
        page_now = page
        break

ページ1: https://github.com/orgs/google/repositories?page=1
ページ1のセクター数: 30
name:adk-java pl:Java star:197
name:perfetto pl:C++ star:619
name:clusterfuzz pl:Python star:592
name:conscrypt pl:Java star:310
name:or-tools pl:C++ star:2300
name:workflow-graph pl:TypeScript star:27
name:dagger pl:Java star:3100
name:nearby pl:C++ star:185
name:crubit pl:C++ star:54
name:osv-scalibr pl:Go star:90
name:heir pl:C++ star:106
name:site-kit-wp pl:JavaScript star:326
name:oss-fuzz pl:Shell star:2500
name:horologist pl:Kotlin star:109
name:dawn pl:C++ star:136
name:xls pl:C++ star:217
name:adk-samples pl:Python star:1900
name:nomulus pl:Java star:295
name:orbax pl:Python star:71
name:earthengine-catalog pl:Jsonnet star:92
name:open-dice pl:C++ star:9
name:yggdrasil-decision-forests pl:C++ star:70
name:device-infra pl:Java star:23
name:gemma.cpp pl:C++ star:573
name:ground-android pl:Kotlin star:138
name:koladata pl:C++ star:14
name:j2cl pl:Java star:153
name:garf pl:Python star:3
name:highway pl:C++ st

In [60]:
path = ''
db_name = 'github_google.db'

try:
    #DBへのコネクションを確立
    conn = sqlite3.connect(path + db_name)

    #SQL(ROBを操作するための言語)を実行するためのカーソルオブジェクトを取得
    cur = conn.cursor()

    # SQL文の作成
    # 複数レコードの挿入
    sql = "INSERT INTO cars (name, pl, star) VALUES (?, ?, ?);"

    # SQL文の実行
    cur.executemany(sql, repositories)

    conn.commit()  # 変更を保存
    

except sqlite3.Error as e:
    print('SQLite error:', e)
    
finally:
    # DBへの接続を閉じる
    conn.close()