In [3]:
import requests
from bs4 import BeautifulSoup
import sqlite3
import time

In [7]:
path = ''
db_name = 'github_google.db'

try:
    #DBへのコネクションを確立
    conn = sqlite3.connect(path + db_name)

    #SQL(ROBを操作するための言語)を実行するためのカーソルオブジェクトを取得
    cur = conn.cursor()

    # SQL文の作成
    # テーブルの作成
    sql = "CREATE TABLE repositories (id INTEGER PRIMARY KEY, name TEXT, pl TEXT, star INTEGER);"

    # SQL文の実行
    cur.execute(sql)

    conn.commit()  # 変更を保存

except sqlite3.Error as e:
    print('SQLite error:', e)
    
finally:
    # DBへの接続を閉じる
    conn.close()

In [8]:
page_now = 1

In [14]:
# https://github.com/orgs/google/repositories?page=1  ---  リポジトリ一覧ページのURL
repositories = []

for page in range(page_now, 95):
    time.sleep(1)
    url = f"https://github.com/orgs/google/repositories?page={page}"
    print(f"ページ{page}: {url}")
    try:
        res = requests.get(url)

        #200以外の接続を弾く
        res.raise_for_status()
        
        soup = BeautifulSoup(res.text, 'html.parser')

        selsectors = soup.find_all(class_ = "ListItem-module__listItem--k4eMk")
        print(f"ページ{page}のセクター数: {len(selsectors)}")
        for selsector in selsectors:

            # リポジトリ名、プログラミング言語、スター数を取得し改行を削除して文字に変換
            name = selsector.find(class_ = "Title-module__anchor--GmXUE Title-module__inline--oM0P7")
            name = name.text.strip() if name else "N/A"
            pl = selsector.find(class_ = "ReposListItem-module__Text_4--mkG7R")
            pl = pl.text.strip() if pl else "N/A"
            star = selsector.find(class_ = "ReposListItem-module__Link_1--v5NDF prc-Link-Link-85e08")
            star = star.text.strip() if star else "0"

            # スター数の文字列を整数に変換
            if star[-1] == "k":
                star = int(float(star[:-1]) * 1000)
            else:
                star = int(star)

            #取得したデータをリポジトリリストに追加
            print(f"name:{name} pl:{pl} star:{star}")
            repositories.append((name, pl, star))
            
    except requests.RequestException as e:
        print(f"Error fetching page {page}: {e}")
        print("次回はこのページから再開します")
        page_now = page
        break

ページ53: https://github.com/orgs/google/repositories?page=53
ページ53のセクター数: 30
name:dart-json_diff pl:Dart star:19
name:mystyle pl:Python star:18
name:web-bsd-hunt pl:Go star:8
name:vk_callback_swapchain pl:C++ star:14
name:butteraugli pl:C++ star:141
name:spirv-tutor pl:Shell star:8
name:stm32-bootloader-client-rs pl:Rust star:3
name:gfw-deployments pl:Python star:39
name:coop-analytics pl:Python star:3
name:fledge-key-value-server pl:N/A star:0
name:ms-tpm-20-ref pl:C star:142
name:libprotobuf-mutator-asn1 pl:C++ star:9
name:open-source-pdks pl:N/A star:9
name:magic-github-proxy pl:Python star:8
name:codeu-starter-project pl:Java star:20
name:codeu_project_2018 pl:Java star:5
name:u2f-ref-code pl:JavaScript star:179
name:google-drive-proxy pl:C# star:39
name:google-drive-shell-extension pl:C++ star:69
name:chive-prosody pl:HTML star:4
name:cross-device-sdk pl:Kotlin star:14
name:schemaorg-java pl:Java star:41
name:crumsort-rs pl:Rust star:5
name:plusfish pl:C++ star:10
name:sqlcommenter-

In [15]:
path = ''
db_name = 'github_google.db'

try:
    #DBへのコネクションを確立
    conn = sqlite3.connect(path + db_name)

    #SQL(ROBを操作するための言語)を実行するためのカーソルオブジェクトを取得
    cur = conn.cursor()

    # SQL文の作成
    # 複数レコードの挿入
    sql = "INSERT INTO repositories (name, pl, star) VALUES (?, ?, ?);"

    # SQL文の実行
    cur.executemany(sql, repositories)

    conn.commit()  # 変更を保存
    

except sqlite3.Error as e:
    print('SQLite error:', e)
    
finally:
    # DBへの接続を閉じる
    conn.close()

In [4]:
path = ''
db_name = 'github_google.db'

try:
    #DBへのコネクションを確立
    conn = sqlite3.connect(path + db_name)

    #SQL(ROBを操作するための言語)を実行するためのカーソルオブジェクトを取得
    cur = conn.cursor()

    # SQL文の作成
    # データの挿入
    sql = "SELECT * FROM repositories;"
    
    # SQL文の実行
    cur.execute(sql)

except sqlite3.Error as e:
    print('SQLite error:', e)
    
else:
    # 取得したデータを表示
    for row in cur:
        id, name, pl, star = row
        print(f"id: {id}, name: {name}, pl: {pl}, star: {star}")

finally:
    # DBへの接続を閉じる
    conn.close()

id: 1, name: go-containerregistry, pl: Go, star: 601
id: 2, name: quiche, pl: C++, star: 161
id: 3, name: benchmark, pl: C++, star: 1700
id: 4, name: jetpack-camera-app, pl: Kotlin, star: 49
id: 5, name: gvisor, pl: Go, star: 1500
id: 6, name: adk-python, pl: Python, star: 2400
id: 7, name: android-cuttlefish, pl: C++, star: 175
id: 8, name: XNNPACK, pl: C, star: 450
id: 9, name: tunix, pl: Python, star: 172
id: 10, name: device-infra, pl: Java, star: 23
id: 11, name: dive, pl: C++, star: 10
id: 12, name: nomulus, pl: Java, star: 295
id: 13, name: cel-java, pl: Java, star: 28
id: 14, name: dawn, pl: C++, star: 137
id: 15, name: j2cl, pl: Java, star: 153
id: 16, name: xls, pl: C++, star: 217
id: 17, name: orbax, pl: Python, star: 71
id: 18, name: ml-metrics, pl: Python, star: 4
id: 19, name: perfetto, pl: C++, star: 619
id: 20, name: automotive-design-compose, pl: Rust, star: 32
id: 21, name: TestParameterInjector, pl: Java, star: 37
id: 22, name: grain, pl: Python, star: 56
id: 23, nam