In [1]:
import os
import dlt
from oss_analytics.source.github_source import paginated_getter
from dotenv import load_dotenv
import duckdb

load_dotenv()
github_token = os.getenv("GITHUB_TOKEN")

### ingestion

In [2]:
pipeline = dlt.pipeline(
        pipeline_name="github_pipeline",
        destination="duckdb",
        dataset_name="events_dataset",
    )

run_info = pipeline.run(
    paginated_getter(
        repo="supabase/supabase",
        endpoint="events",
        token=github_token
    ),
    table_name="events",
    write_disposition="replace",
)


@dlt.resource(columns={'payload__issue__assignee': {'data_type': 'text'}})



@dlt.resource(columns={'payload__issue__milestone': {'data_type': 'text'}})



@dlt.resource(columns={'payload__issue__type': {'data_type': 'text'}})



@dlt.resource(columns={'payload__issue__active_lock_reason': {'data_type': 'text'}})



@dlt.resource(columns={'payload__issue__pull_request__merged_at': {'data_type': 'text'}})



@dlt.resource(columns={'payload__issue__performed_via_github_app': {'data_type': 'text'}})



@dlt.resource(columns={'payload__pull_request__assignee': {'data_type': 'text'}})



@dlt.resource(columns={'payload__pull_request__milestone': {'data_type': 'text'}})



@dlt.resource(columns={'payload__pull_request__head__repo__mirror_url': {'data_type': 'text'}})



@dlt.resource(columns={'payload__pull_request__base__repo__mirror_url': {'data_type': 'text'}})



@dlt.resource(columns={'payload__pull_request__auto_merge': {'data_type': 'text'}})



@dlt.resource(columns={'payload__pull

### Database inspections
- table_names
- table df
- pipeline.sql_client()
- dlt pipeline 
  - _dlt_pipeline_state
  - _dlt_loads
  - _dlt_version
- DuckDB
  - list of tables: FROM information_schema.tables
  - df: FROM events_dataset.events

In [5]:
print(pipeline.dataset(dataset_type="default").schema.data_table_names())

['events', 'events__payload__commits', 'events__payload__pull_request__head__repo__topics', 'events__payload__pull_request__base__repo__topics', 'events__payload__comment__performed_via_github_app__events', 'events__payload__issue__labels', 'events__payload__issue__assignees', 'events__payload__pull_request__assignees', 'events__payload__pull_request__requested_reviewers', 'events__payload__pull_request__labels', 'events__payload__release__mentions', 'events__payload__pull_request__requested_teams']


In [7]:
pipeline.dataset(dataset_type="default").events.df().head()

Unnamed: 0,id,type,actor__id,actor__login,actor__display_login,actor__gravatar_id,actor__url,actor__avatar_url,repo__id,repo__name,...,payload__comment__performed_via_github_app__permissions__attestations,payload__comment__performed_via_github_app__permissions__discussions,payload__comment__performed_via_github_app__permissions__merge_queues,payload__comment__performed_via_github_app__permissions__models,payload__comment__performed_via_github_app__permissions__packages,payload__comment__performed_via_github_app__permissions__pages,payload__comment__performed_via_github_app__permissions__repository_projects,payload__comment__performed_via_github_app__permissions__security_events,payload__comment__performed_via_github_app__permissions__vulnerability_alerts,payload__issue__pull_request__merged_at
0,51094393247,PushEvent,19742402,joshenlim,joshenlim,,https://api.github.com/users/joshenlim,https://avatars.githubusercontent.com/u/19742402?,214587193,supabase/supabase,...,,,,,,,,,,NaT
1,51094317768,PushEvent,37541088,jordienr,jordienr,,https://api.github.com/users/jordienr,https://avatars.githubusercontent.com/u/37541088?,214587193,supabase/supabase,...,,,,,,,,,,NaT
2,51094089084,WatchEvent,22812353,hktklxz,hktklxz,,https://api.github.com/users/hktklxz,https://avatars.githubusercontent.com/u/22812353?,214587193,supabase/supabase,...,,,,,,,,,,NaT
3,51093887562,IssueCommentEvent,35613825,vercel[bot],vercel,,https://api.github.com/users/vercel[bot],https://avatars.githubusercontent.com/u/35613825?,214587193,supabase/supabase,...,,,,,,,,,,NaT
4,51093881002,PullRequestEvent,19742402,joshenlim,joshenlim,,https://api.github.com/users/joshenlim,https://avatars.githubusercontent.com/u/19742402?,214587193,supabase/supabase,...,,,,,,,,,,NaT


In [8]:
pipeline.dataset(dataset_type="default").events.df().shape

(278, 757)

In [None]:
sql = """
SELECT *
FROM events e
JOIN events__payload__pull_request__base__repo__topics c
ON e._dlt_id = c._dlt_parent_id
"""
with pipeline.sql_client() as client:
    with client.execute_query(sql) as cursor:
        data = cursor.df()
data

Unnamed: 0,name
0,_dlt_loads
1,_dlt_pipeline_state
2,_dlt_version
3,events
4,events__payload__comment__performed_via_github...
5,events__payload__commits
6,events__payload__issue__assignees
7,events__payload__issue__labels
8,events__payload__pull_request__assignees
9,events__payload__pull_request__base__repo__topics


In [10]:
pipeline.dataset(dataset_type="default")._dlt_pipeline_state.df()

Unnamed: 0,version,engine_version,pipeline_name,state,created_at,version_hash,_dlt_load_id,_dlt_id
0,1,4,github_pipeline,eNpdj1FrwkAQhP/LvhrEghoM+KDUUrDqa2spxyXZmmsvl5...,2025-06-19 09:12:48.066185+00:00,6MOXBOD7z7mLw9Y5Ke+Mljo7vEEymZrFNwHx50AcK6A=,1750324361.488451,N/87pgUi4lLHIg


In [11]:
pipeline.dataset(dataset_type="default")._dlt_loads.df()

Unnamed: 0,load_id,schema_name,status,inserted_at,schema_version_hash
0,1750324361.488451,github,0,2025-06-19 09:12:49.604073+00:00,GmYMyAjUwhFJbnbfHxlD5mCToZNSZDQrL7vN3aqkAv0=
1,1750324555.113997,github,0,2025-06-19 09:16:03.746041+00:00,K4L2LSS1hk8vJA3SjtQQzA84Etv2zyy597DDjG1viaM=


In [12]:
pipeline.dataset(dataset_type="default")._dlt_version.df()

Unnamed: 0,version,engine_version,inserted_at,schema_name,version_hash,schema
0,2,11,2025-06-19 09:12:48.378930+00:00,github,GmYMyAjUwhFJbnbfHxlD5mCToZNSZDQrL7vN3aqkAv0=,"{""version"":2,""version_hash"":""GmYMyAjUwhFJbnbfH..."
1,3,11,2025-06-19 09:16:02.581367+00:00,github,K4L2LSS1hk8vJA3SjtQQzA84Etv2zyy597DDjG1viaM=,"{""version"":3,""version_hash"":""K4L2LSS1hk8vJA3Sj..."


In [2]:
conn = duckdb.connect('github_pipeline.duckdb')

sql = """
SELECT *
FROM information_schema.tables
"""
conn.execute(sql).df()

Unnamed: 0,table_catalog,table_schema,table_name,table_type,self_referencing_column_name,reference_generation,user_defined_type_catalog,user_defined_type_schema,user_defined_type_name,is_insertable_into,is_typed,commit_action,TABLE_COMMENT
0,github_pipeline,events_dataset,events,BASE TABLE,,,,,,YES,NO,,
1,github_pipeline,events_dataset,events__payload__comment__performed_via_github...,BASE TABLE,,,,,,YES,NO,,
2,github_pipeline,events_dataset,events__payload__commits,BASE TABLE,,,,,,YES,NO,,
3,github_pipeline,events_dataset,events__payload__issue__labels,BASE TABLE,,,,,,YES,NO,,
4,github_pipeline,events_dataset,events__payload__pull_request__base__repo__topics,BASE TABLE,,,,,,YES,NO,,
5,github_pipeline,events_dataset,events__payload__pull_request__head__repo__topics,BASE TABLE,,,,,,YES,NO,,
6,github_pipeline,events_dataset,events__payload__pull_request__labels,BASE TABLE,,,,,,YES,NO,,
7,github_pipeline,events_dataset,events__payload__pull_request__requested_revie...,BASE TABLE,,,,,,YES,NO,,
8,github_pipeline,events_dataset,events__payload__pull_request__requested_teams,BASE TABLE,,,,,,YES,NO,,
9,github_pipeline,events_dataset,_dlt_loads,BASE TABLE,,,,,,YES,NO,,


In [4]:
sql = """
SELECT *
FROM events_dataset.events
"""
conn.execute(sql).df()

Unnamed: 0,id,type,actor__id,actor__login,actor__display_login,actor__gravatar_id,actor__url,actor__avatar_url,repo__id,repo__name,...,payload__pull_request__auto_merge__enabled_by__type,payload__pull_request__auto_merge__enabled_by__user_view_type,payload__pull_request__auto_merge__enabled_by__site_admin,payload__pull_request__auto_merge__merge_method,payload__pull_request__auto_merge__commit_title,payload__pull_request__auto_merge__commit_message,payload__review__body,payload__comment__in_reply_to_id,payload__pull_request__mergeable,payload__pull_request__rebaseable
0,51128078967,IssueCommentEvent,132864931,supabase[bot],supabase,,https://api.github.com/users/supabase[bot],https://avatars.githubusercontent.com/u/132864...,214587193,supabase/supabase,...,,,,,,,,,,
1,51128078733,IssueCommentEvent,35613825,vercel[bot],vercel,,https://api.github.com/users/vercel[bot],https://avatars.githubusercontent.com/u/35613825?,214587193,supabase/supabase,...,,,,,,,,,,
2,51128077376,PullRequestEvent,19742402,joshenlim,joshenlim,,https://api.github.com/users/joshenlim,https://avatars.githubusercontent.com/u/19742402?,214587193,supabase/supabase,...,,,,,,,,,,
3,51128053273,CreateEvent,19742402,joshenlim,joshenlim,,https://api.github.com/users/joshenlim,https://avatars.githubusercontent.com/u/19742402?,214587193,supabase/supabase,...,,,,,,,,,,
4,51127943160,WatchEvent,412511,Siliconrob,Siliconrob,,https://api.github.com/users/Siliconrob,https://avatars.githubusercontent.com/u/412511?,214587193,supabase/supabase,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
276,51028020694,IssueCommentEvent,35613825,vercel[bot],vercel,,https://api.github.com/users/vercel[bot],https://avatars.githubusercontent.com/u/35613825?,214587193,supabase/supabase,...,,,,,,,,,,
277,51028018489,PullRequestEvent,26616127,charislam,charislam,,https://api.github.com/users/charislam,https://avatars.githubusercontent.com/u/26616127?,214587193,supabase/supabase,...,,,,,,,,,True,True
278,51028006353,PushEvent,26616127,charislam,charislam,,https://api.github.com/users/charislam,https://avatars.githubusercontent.com/u/26616127?,214587193,supabase/supabase,...,,,,,,,,,,
279,51027983583,CreateEvent,26616127,charislam,charislam,,https://api.github.com/users/charislam,https://avatars.githubusercontent.com/u/26616127?,214587193,supabase/supabase,...,,,,,,,,,,


In [3]:
pipeline = dlt.pipeline(
        pipeline_name="github_pipeline",
        destination="duckdb",
        dataset_name="commits_dataset",
    )

run_info = pipeline.run(
    paginated_getter(
        repo="supabase/supabase",
        endpoint="commits",
        token=github_token
    ),
    table_name="commits",
    write_disposition="replace",
)


@dlt.resource(columns={'author': {'data_type': 'text'}})



@dlt.resource(columns={'committer': {'data_type': 'text'}})




In [4]:
conn = duckdb.connect('github_pipeline.duckdb')

sql = """
SELECT *
FROM information_schema.tables
"""
conn.execute(sql).df()

Unnamed: 0,table_catalog,table_schema,table_name,table_type,self_referencing_column_name,reference_generation,user_defined_type_catalog,user_defined_type_schema,user_defined_type_name,is_insertable_into,is_typed,commit_action,TABLE_COMMENT
0,github_pipeline,commits_dataset,commits,BASE TABLE,,,,,,YES,NO,,
1,github_pipeline,commits_dataset,commits__parents,BASE TABLE,,,,,,YES,NO,,
2,github_pipeline,commits_dataset,_dlt_loads,BASE TABLE,,,,,,YES,NO,,
3,github_pipeline,commits_dataset,_dlt_pipeline_state,BASE TABLE,,,,,,YES,NO,,
4,github_pipeline,commits_dataset,_dlt_version,BASE TABLE,,,,,,YES,NO,,
5,github_pipeline,events_dataset,events,BASE TABLE,,,,,,YES,NO,,
6,github_pipeline,events_dataset,events__payload__comment__performed_via_github...,BASE TABLE,,,,,,YES,NO,,
7,github_pipeline,events_dataset,events__payload__commits,BASE TABLE,,,,,,YES,NO,,
8,github_pipeline,events_dataset,events__payload__issue__labels,BASE TABLE,,,,,,YES,NO,,
9,github_pipeline,events_dataset,events__payload__pull_request__base__repo__topics,BASE TABLE,,,,,,YES,NO,,


In [6]:
sql = """
SELECT *
FROM commits_dataset.commits
"""
df = conn.execute(sql).df()
df

Unnamed: 0,sha,node_id,commit__author__name,commit__author__email,commit__author__date,commit__committer__name,commit__committer__email,commit__committer__date,commit__message,commit__tree__sha,...,committer__subscriptions_url,committer__organizations_url,committer__repos_url,committer__events_url,committer__received_events_url,committer__type,committer__user_view_type,committer__site_admin,_dlt_load_id,_dlt_id
0,7cc64d3bd174c00ca61cf1f746a48bf22bcaff54,C_kwDODMpXOdoAKDdjYzY0ZDNiZDE3NGMwMGNhNjFjZjFm...,Charis,26616127+charislam@users.noreply.github.com,2025-06-19 18:14:29+00:00,GitHub,noreply@github.com,2025-06-19 18:14:29+00:00,docs(edge functions): document that sentry nee...,6dc16061a3641ac99db060107f23e09fa8d118f3,...,https://api.github.com/users/web-flow/subscrip...,https://api.github.com/users/web-flow/orgs,https://api.github.com/users/web-flow/repos,https://api.github.com/users/web-flow/events{/...,https://api.github.com/users/web-flow/received...,User,public,False,1750393271.2874556,xMYALiukkZ4NPQ
1,5cc2617b5f8eeb361e9e5b7f7084109188298714,C_kwDODMpXOdoAKDVjYzI2MTdiNWY4ZWViMzYxZTllNWI3...,Charis,26616127+charislam@users.noreply.github.com,2025-06-19 14:54:43+00:00,GitHub,noreply@github.com,2025-06-19 14:54:43+00:00,fix(docs): add more descriptive error message ...,5904a3f4257c677b0f327ccd7265eb43c1c5b648,...,https://api.github.com/users/web-flow/subscrip...,https://api.github.com/users/web-flow/orgs,https://api.github.com/users/web-flow/repos,https://api.github.com/users/web-flow/events{/...,https://api.github.com/users/web-flow/received...,User,public,False,1750393271.2874556,+ReRupwwV0r3pA
2,380a55e9bf8b6e49157f28f5dc4466d505cf1658,C_kwDODMpXOdoAKDM4MGE1NWU5YmY4YjZlNDkxNTdmMjhm...,Tristan Smith,67556218+TJLSmith0831@users.noreply.github.com,2025-06-19 13:53:53+00:00,GitHub,noreply@github.com,2025-06-19 13:53:53+00:00,docs: clarify that migrations must be applied ...,bd058e1c5fc6a35f320e0138510be16c768b6690,...,https://api.github.com/users/web-flow/subscrip...,https://api.github.com/users/web-flow/orgs,https://api.github.com/users/web-flow/repos,https://api.github.com/users/web-flow/events{/...,https://api.github.com/users/web-flow/received...,User,public,False,1750393271.2874556,0hkd+G0yp8gDdg
3,b1ad7649f3dd5c8cb571e51e33e14266e3f4b1f0,C_kwDODMpXOdoAKGIxYWQ3NjQ5ZjNkZDVjOGNiNTcxZTUx...,Francesco Sansalvadore,f.sansalvadore@gmail.com,2025-06-19 11:01:41+00:00,GitHub,noreply@github.com,2025-06-19 11:01:41+00:00,db report (#36516)\n\n* release new charts to ...,0a3861a1fb57e355e0acb8907fd9ff5dcda5dadb,...,https://api.github.com/users/web-flow/subscrip...,https://api.github.com/users/web-flow/orgs,https://api.github.com/users/web-flow/repos,https://api.github.com/users/web-flow/events{/...,https://api.github.com/users/web-flow/received...,User,public,False,1750393271.2874556,Psyg40XK3sfqXA
4,058b25d7c30a33787e5ea3e5ea7f071c036b4362,C_kwDODMpXOdoAKDA1OGIyNWQ3YzMwYTMzNzg3ZTVlYTNl...,Kevin Grüneberg,k.grueneberg1994@gmail.com,2025-06-19 03:34:52+00:00,GitHub,noreply@github.com,2025-06-19 03:34:52+00:00,chore: show org error message for longer (#36526),01bfe4a2e97efc6e89c62802abbbb35fe771f329,...,https://api.github.com/users/web-flow/subscrip...,https://api.github.com/users/web-flow/orgs,https://api.github.com/users/web-flow/repos,https://api.github.com/users/web-flow/events{/...,https://api.github.com/users/web-flow/received...,User,public,False,1750393271.2874556,HQYfluh6zK9xLA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31656,3951357072668f330c1acaa1ad11aea2d89b81ee,MDY6Q29tbWl0MjE0NTg3MTkzOjM5NTEzNTcwNzI2NjhmMz...,Paul Copplestone,pcopplestone@gmail.com,2019-10-12 08:55:38+00:00,Paul Copplestone,pcopplestone@gmail.com,2019-10-12 08:55:38+00:00,fat fingered typo for build:docs,680248454dcdbe5b0f73495d7ca0bafd37ce6f4a,...,https://api.github.com/users/kiwicopple/subscr...,https://api.github.com/users/kiwicopple/orgs,https://api.github.com/users/kiwicopple/repos,https://api.github.com/users/kiwicopple/events...,https://api.github.com/users/kiwicopple/receiv...,User,public,False,1750393271.2874556,0VH1zFjLK3t4Cg
31657,cb24db899d07fbe83c734b0e18f7d9d618904b3b,MDY6Q29tbWl0MjE0NTg3MTkzOmNiMjRkYjg5OWQwN2ZiZT...,Paul Copplestone,pcopplestone@gmail.com,2019-10-12 07:05:05+00:00,Paul Copplestone,pcopplestone@gmail.com,2019-10-12 07:05:05+00:00,Adding netlify deploy config,2f303fc61187989bb1ea96cc0bf819dc7ac18605,...,https://api.github.com/users/kiwicopple/subscr...,https://api.github.com/users/kiwicopple/orgs,https://api.github.com/users/kiwicopple/repos,https://api.github.com/users/kiwicopple/events...,https://api.github.com/users/kiwicopple/receiv...,User,public,False,1750393271.2874556,6y0UnTmfmyVClQ
31658,bec67900fa9e0a4b2892d69f8f4968d3c344121b,MDY6Q29tbWl0MjE0NTg3MTkzOmJlYzY3OTAwZmE5ZTBhNG...,Paul Copplestone,pcopplestone@gmail.com,2019-10-12 06:35:30+00:00,Paul Copplestone,pcopplestone@gmail.com,2019-10-12 06:35:30+00:00,Hiding signup,0c436efcbec30afbcd81e3b820dd0ed5691e2d9d,...,https://api.github.com/users/kiwicopple/subscr...,https://api.github.com/users/kiwicopple/orgs,https://api.github.com/users/kiwicopple/repos,https://api.github.com/users/kiwicopple/events...,https://api.github.com/users/kiwicopple/receiv...,User,public,False,1750393271.2874556,uM7dy9h8To6Fiw
31659,ff6a1d151783d0e07aec2b40c6950817ef03f09f,MDY6Q29tbWl0MjE0NTg3MTkzOmZmNmExZDE1MTc4M2QwZT...,Paul Copplestone,pcopplestone@gmail.com,2019-10-12 06:33:00+00:00,Paul Copplestone,pcopplestone@gmail.com,2019-10-12 06:33:00+00:00,Adding sign up form,de0891add3037438686b57961564c30ee04dba8c,...,https://api.github.com/users/kiwicopple/subscr...,https://api.github.com/users/kiwicopple/orgs,https://api.github.com/users/kiwicopple/repos,https://api.github.com/users/kiwicopple/events...,https://api.github.com/users/kiwicopple/receiv...,User,public,False,1750393271.2874556,eyUb1+T4y7Oh+g


### commits dataset

In [7]:
df.head()

Unnamed: 0,sha,node_id,commit__author__name,commit__author__email,commit__author__date,commit__committer__name,commit__committer__email,commit__committer__date,commit__message,commit__tree__sha,...,committer__subscriptions_url,committer__organizations_url,committer__repos_url,committer__events_url,committer__received_events_url,committer__type,committer__user_view_type,committer__site_admin,_dlt_load_id,_dlt_id
0,7cc64d3bd174c00ca61cf1f746a48bf22bcaff54,C_kwDODMpXOdoAKDdjYzY0ZDNiZDE3NGMwMGNhNjFjZjFm...,Charis,26616127+charislam@users.noreply.github.com,2025-06-19 18:14:29+00:00,GitHub,noreply@github.com,2025-06-19 18:14:29+00:00,docs(edge functions): document that sentry nee...,6dc16061a3641ac99db060107f23e09fa8d118f3,...,https://api.github.com/users/web-flow/subscrip...,https://api.github.com/users/web-flow/orgs,https://api.github.com/users/web-flow/repos,https://api.github.com/users/web-flow/events{/...,https://api.github.com/users/web-flow/received...,User,public,False,1750393271.2874556,xMYALiukkZ4NPQ
1,5cc2617b5f8eeb361e9e5b7f7084109188298714,C_kwDODMpXOdoAKDVjYzI2MTdiNWY4ZWViMzYxZTllNWI3...,Charis,26616127+charislam@users.noreply.github.com,2025-06-19 14:54:43+00:00,GitHub,noreply@github.com,2025-06-19 14:54:43+00:00,fix(docs): add more descriptive error message ...,5904a3f4257c677b0f327ccd7265eb43c1c5b648,...,https://api.github.com/users/web-flow/subscrip...,https://api.github.com/users/web-flow/orgs,https://api.github.com/users/web-flow/repos,https://api.github.com/users/web-flow/events{/...,https://api.github.com/users/web-flow/received...,User,public,False,1750393271.2874556,+ReRupwwV0r3pA
2,380a55e9bf8b6e49157f28f5dc4466d505cf1658,C_kwDODMpXOdoAKDM4MGE1NWU5YmY4YjZlNDkxNTdmMjhm...,Tristan Smith,67556218+TJLSmith0831@users.noreply.github.com,2025-06-19 13:53:53+00:00,GitHub,noreply@github.com,2025-06-19 13:53:53+00:00,docs: clarify that migrations must be applied ...,bd058e1c5fc6a35f320e0138510be16c768b6690,...,https://api.github.com/users/web-flow/subscrip...,https://api.github.com/users/web-flow/orgs,https://api.github.com/users/web-flow/repos,https://api.github.com/users/web-flow/events{/...,https://api.github.com/users/web-flow/received...,User,public,False,1750393271.2874556,0hkd+G0yp8gDdg
3,b1ad7649f3dd5c8cb571e51e33e14266e3f4b1f0,C_kwDODMpXOdoAKGIxYWQ3NjQ5ZjNkZDVjOGNiNTcxZTUx...,Francesco Sansalvadore,f.sansalvadore@gmail.com,2025-06-19 11:01:41+00:00,GitHub,noreply@github.com,2025-06-19 11:01:41+00:00,db report (#36516)\n\n* release new charts to ...,0a3861a1fb57e355e0acb8907fd9ff5dcda5dadb,...,https://api.github.com/users/web-flow/subscrip...,https://api.github.com/users/web-flow/orgs,https://api.github.com/users/web-flow/repos,https://api.github.com/users/web-flow/events{/...,https://api.github.com/users/web-flow/received...,User,public,False,1750393271.2874556,Psyg40XK3sfqXA
4,058b25d7c30a33787e5ea3e5ea7f071c036b4362,C_kwDODMpXOdoAKDA1OGIyNWQ3YzMwYTMzNzg3ZTVlYTNl...,Kevin Grüneberg,k.grueneberg1994@gmail.com,2025-06-19 03:34:52+00:00,GitHub,noreply@github.com,2025-06-19 03:34:52+00:00,chore: show org error message for longer (#36526),01bfe4a2e97efc6e89c62802abbbb35fe771f329,...,https://api.github.com/users/web-flow/subscrip...,https://api.github.com/users/web-flow/orgs,https://api.github.com/users/web-flow/repos,https://api.github.com/users/web-flow/events{/...,https://api.github.com/users/web-flow/received...,User,public,False,1750393271.2874556,HQYfluh6zK9xLA


In [25]:
cols = [
    'sha', 
    # 'node_id', 
    # 'commit__author__name', 
    # 'commit__author__email',
    'commit__author__date', 
    # 'commit__committer__name','commit__committer__email', 'commit__committer__date',
    'commit__message', 
    # 'commit__tree__sha','commit__tree__url','commit__url', 'commit__comment_count',
    # 'commit__verification__verified', 'commit__verification__reason',
    # 'commit__verification__signature', 'commit__verification__payload', 'commit__verification__verified_at', 'url', 'html_url', 
    # 'comments_url',
    'author__login', 
    # 'author__id', 'author__node_id', 'author__avatar_url','author__gravatar_id', 
    # 'author__url', 'author__html_url', 'author__followers_url', 'author__following_url', 'author__gists_url',
    # 'author__starred_url', 'author__subscriptions_url',
    # 'author__organizations_url', 'author__repos_url', 'author__events_url',
    # 'author__received_events_url', 'author__type', 'author__user_view_type',
    # 'author__site_admin', 'committer__login', 'committer__id',
    # 'committer__node_id', 'committer__avatar_url', 'committer__gravatar_id',
    # 'committer__url', 'committer__html_url', 'committer__followers_url',
    # 'committer__following_url', 'committer__gists_url',
    # 'committer__starred_url', 'committer__subscriptions_url',
    # 'committer__organizations_url', 'committer__repos_url',
    # 'committer__events_url', 'committer__received_events_url',
    # 'committer__type', 'committer__user_view_type', 'committer__site_admin',
    # '_dlt_load_id', '_dlt_id'
]


In [26]:
df[cols]

Unnamed: 0,sha,commit__author__date,commit__message,author__login
0,7cc64d3bd174c00ca61cf1f746a48bf22bcaff54,2025-06-19 18:14:29+00:00,docs(edge functions): document that sentry nee...,charislam
1,5cc2617b5f8eeb361e9e5b7f7084109188298714,2025-06-19 14:54:43+00:00,fix(docs): add more descriptive error message ...,charislam
2,380a55e9bf8b6e49157f28f5dc4466d505cf1658,2025-06-19 13:53:53+00:00,docs: clarify that migrations must be applied ...,TJLSmith0831
3,b1ad7649f3dd5c8cb571e51e33e14266e3f4b1f0,2025-06-19 11:01:41+00:00,db report (#36516)\n\n* release new charts to ...,fsansalvadore
4,058b25d7c30a33787e5ea3e5ea7f071c036b4362,2025-06-19 03:34:52+00:00,chore: show org error message for longer (#36526),kevcodez
...,...,...,...,...
31656,3951357072668f330c1acaa1ad11aea2d89b81ee,2019-10-12 08:55:38+00:00,fat fingered typo for build:docs,kiwicopple
31657,cb24db899d07fbe83c734b0e18f7d9d618904b3b,2019-10-12 07:05:05+00:00,Adding netlify deploy config,kiwicopple
31658,bec67900fa9e0a4b2892d69f8f4968d3c344121b,2019-10-12 06:35:30+00:00,Hiding signup,kiwicopple
31659,ff6a1d151783d0e07aec2b40c6950817ef03f09f,2019-10-12 06:33:00+00:00,Adding sign up form,kiwicopple


In [17]:
df['author__starred_url'][0]

'https://api.github.com/users/charislam/starred{/owner}{/repo}'

## normalization, data contract

### 