hubcap

A dol (i.e. dict-like) interface to github

To install: pip install hubcap

Examples

Basics

The simplest facade to github data.

Interact with github like you'd interact with a dict object.

Warning: You'll need to have a github api token (google it if you don't have one; it's easy to get). You'll have to specify this token when making hubcap objects, or put it in an environmental variable under the name GITHUB_TOKEN or HUBCAP_GITHUB_TOKEN (useful since github actions doesn't allow you to have env variables starting with GITHUB).

>>> s = GitHubReader('thorwhalen')  # connnecting to a particular user/organization
>>> list(s)  # doctest: +SKIP
['agen',
 'aix',
 ...
 'viral',
 'wealth',
 'wrapt']
>>> 'a_non_existing_repository_name' in s
False
>>> 'hubcap' in s  # of course, this will be true, it's what you're using now!
True
>>> repo = s['hubcap']
>>> list(repo)
['master']
>>> branch = repo['master']
>>> list(branch)  # doctest: +NORMALIZE_WHITESPACE
['/.gitattributes',
 '/.github/',
 '/.gitignore',
 '/LICENSE',
 '/README.md',
 '/docsrc/',
 '/hubcap/',
 '/setup.cfg',
 '/setup.py']
>>> content = branch['/setup.cfg']
>>> print(content[:32].decode())
[metadata]
name = hubcap
version

Listing repositories and information about them

List repositories for a given user or organization, along with 78 fields of info.

>>> from hubcap.examples import repos_info, actions_info
>>>
>>> repos = repos_info('i2mint')  # doctest: +SKIP
>>> print(repos.shape)   # doctest: +SKIP
(60, 78)
>>> repos.head()  # doctest: +SKIP
                         id                           node_id       name  ... watchers  default_branch                                        permissions
full_name                                                                 ...
i2mint/py2mqtt    425694616                      R_kgDOGV-VmA    py2mqtt  ...        0            main  {'admin': True, 'maintain': True, 'push': True...
i2mint/mongodol   341721959  MDEwOlJlcG9zaXRvcnkzNDE3MjE5NTk=   mongodol  ...        0          master  {'admin': True, 'maintain': True, 'push': True...
i2mint/dol        299438731  MDEwOlJlcG9zaXRvcnkyOTk0Mzg3MzE=        dol  ...        4          master  {'admin': True, 'maintain': True, 'push': True...
i2mint/stream2py  238989487  MDEwOlJlcG9zaXRvcnkyMzg5ODk0ODc=  stream2py  ...        2          master  {'admin': True, 'maintain': True, 'push': True...
i2mint/creek      321448350  MDEwOlJlcG9zaXRvcnkzMjE0NDgzNTA=      creek  ...        0          master  {'admin': True, 'maintain': True, 'push': True...

[5 rows x 78 columns]
>>> list(repos.columns)   # doctest: +SKIP
['id', 'node_id', 'name', 'full_name', 'private', 'owner', 'html_url', 'description',
'fork', 'url', 'forks_url', 'keys_url', 'collaborators_url', 'teams_url', 'hooks_url
', 'issue_events_url', 'events_url', 'assignees_url', 'branches_url', 'tags_url',
'blobs_url', 'git_tags_url', 'git_refs_url', 'trees_url', 'statuses_url', 'languages_url
', 'stargazers_url', 'contributors_url', 'subscribers_url', 'subscription_url',
'commits_url', 'git_commits_url', 'comments_url', 'issue_comment_url', 'contents_url',
'compare_url', 'merges_url', 'archive_url', 'downloads_url', 'issues_url', 'pulls_url',
'milestones_url', 'notifications_url', 'labels_url', 'releases_url', 'deployments_url
', 'created_at', 'updated_at', 'pushed_at', 'git_url', 'ssh_url', 'clone_url', 'svn_url',
'homepage', 'size', 'stargazers_count', 'watchers_count', 'language', 'has_issue
s', 'has_projects', 'has_downloads', 'has_wiki', 'has_pages', 'forks_count',
'mirror_url', 'archived', 'disabled', 'open_issues_count', 'license', 'allow_forking',
'is_template', 'topics', 'visibility', 'forks', 'open_issues', 'watchers',
'default_branch', 'permissions']

Get info about github actions for a given repository.

>>> actions = actions_info('i2mint/mongodol')   # doctest: +SKIP
>>> print(actions.shape)   # doctest: +SKIP
(10, 30)
>>> actions.head()    # doctest: +SKIP
           id                    name  ...                                         repository                                    head_repository
0  1468986198  Continuous Integration  ...  {'id': 341721959, 'node_id': 'MDEwOlJlcG9zaXRv...  {'id': 341721959, 'node_id': 'MDEwOlJlcG9zaXRv...
1  1445456774  Continuous Integration  ...  {'id': 341721959, 'node_id': 'MDEwOlJlcG9zaXRv...  {'id': 341721959, 'node_id': 'MDEwOlJlcG9zaXRv...
2  1437461380  Continuous Integration  ...  {'id': 341721959, 'node_id': 'MDEwOlJlcG9zaXRv...  {'id': 341721959, 'node_id': 'MDEwOlJlcG9zaXRv...
3  1343133456  Continuous Integration  ...  {'id': 341721959, 'node_id': 'MDEwOlJlcG9zaXRv...  {'id': 341721959, 'node_id': 'MDEwOlJlcG9zaXRv...
4  1262878182  Continuous Integration  ...  {'id': 341721959, 'node_id': 'MDEwOlJlcG9zaXRv...  {'id': 341721959, 'node_id': 'MDEwOlJlcG9zaXRv...

[5 rows x 30 columns]
>>>
>>> list(actions.columns)   # doctest: +SKIP
['id', 'name', 'node_id', 'head_branch', 'head_sha', 'run_number', 'event', 'status',
'conclusion', 'workflow_id', 'check_suite_id', 'check_suite_node_id', 'url', 'html_url',
'pull_requests', 'created_at', 'updated_at', 'run_attempt', 'run_started_at',
'jobs_url', 'logs_url', 'check_suite_url', 'artifacts_url', 'cancel_url', 'rerun_url',
'previous_attempt_url', 'workflow_url', 'head_commit', 'repository', 'head_repository']

Find most recently changed repositories and check if their CI failed or not.

>>> from hubcap.examples import date_selection_lidx
>>> updated_recently = repos.iloc
...     [date_selection_lidx(repos, hours_ago=24)]  # doctest: +SKIP
>>> {repo: get_last_build_status(repo)
...     for repo in updated_recently['full_name']}  # doctest: +SKIP
{'i2mint/py2mqtt': 'failure',
 'i2mint/mongodol': 'success',
 'i2mint/dol': 'success',
 'i2mint/stream2py': 'success',
 'i2mint/creek': 'success'}

Note: You can get this directly using the ci_status function

>>> from hubcap.examples import ci_status
>>> ci_status('i2mint', hours_ago=24)  # doctest: +SKIP
{'i2mint/py2mqtt': 'failure',
 'i2mint/mongodol': 'success',
 'i2mint/dol': 'success',
 'i2mint/stream2py': 'success',
 'i2mint/creek': 'success'}

RepoReader

>>> from hubcap import RepoReader
>>> r = RepoReader('thorwhalen/test_repo')
>>> sorted(r)  # doctest: +SKIP
['artifacts', 'assignees', 'autolinks', 'branches', 'codescan_alerts', 'collaborators', 
'comments', 'commits', 'contributors', 'deployments', 'discussions', 'downloads', 
'environments', 'events', 'forks', 'git_refs', 'hooks', 'issues', 'issues_comments', 
'issues_events', 'labels', 'languages', 'milestones', 'network_events', 
'notifications', 'pending_invitations', 'projects', 'pulls', 'pulls_comments', 
'pulls_review_comments', 'releases', 'repository_advisories', 'secrets', 
'self_hosted_runners', 'stargazers', 'stargazers_with_dates', 'stats_contributors', 
'subscribers', 'tags', 'teams', 'top_paths', 'top_referrers', 'topics', 'variables', 
'watchers', 'workflow_runs', 'workflows']
>>>
>>> 'issues' in r
True
>>> issues = r['issues']
>>> sorted(issues)
[4, 5]
>>> issue_obj = issues[4]
>>> issue_obj
Issue(title="Test Issue A", number=4)
>>> sorted([attr for attr in dir(issue_obj) if not attr.startswith('_')])  # doctest: +SKIP
['CHECK_AFTER_INIT_FLAG', 'active_lock_reason', 'add_to_assignees', 'add_to_labels', 
'as_pull_request', 'assignee', 'assignees', 'body', 'closed_at', 'closed_by', 
'comments', 'comments_url', 'create_comment', 'create_reaction', 'created_at', 
'delete_labels', 'delete_reaction', 'edit', 'etag', 'events_url', 'get__repr__', 
'get_comment', 'get_comments', 'get_events', 'get_labels', 'get_reactions', 
'get_timeline', 'html_url', 'id', 'labels', 'labels_url', 'last_modified', 'lock', 
'locked', 'milestone', 'number', 'pull_request', 'raw_data', 'raw_headers', 
'remove_from_assignees', 'remove_from_labels', 'repository', 'setCheckAfterInitFlag', 
'set_labels', 'state', 'state_reason', 'title', 'unlock', 'update', 'updated_at', 
'url', 'user']
>>> issue_obj.number
4
>>> issue_obj.state
'open'
>>> issue_obj.labels
[Label(name="documentation"), Label(name="enhancement")]
>>> # title of issue
>>> issue_obj.title
'Test Issue A'
>>> # content of issue
>>> issue_obj.body
'Contents of Test Issue A'
>>>
>>> issue_obj.comments
2
>>>
>>> list(issue_obj.get_comments())  # doctest: +NORMALIZE_WHITESPACE
[IssueComment(user=NamedUser(login="thorwhalen"), id=1801792378), 
IssueComment(user=NamedUser(login="thorwhalen"), id=1801792855)]
>>> issue_comment = issue_obj.get_comment(1801792378)
>>> issue_comment.body
'Comment 1 of Test Issue A'
>>>
>>> 'discussions' in r
True
>>> discussions = r['discussions']  # doctest: +SKIP
>>> sorted(discussions)  # doctest: +SKIP
[1, 2, 3]

hub function

The high level function hub is the simplest way to get started. It's a function that takes a path to a github resource and returns a mapping to that resource. The mapping is lazy, so it's only when you access a key that the resource is actually fetched from github.

>>> repositories = hub('thorwhalen')
>>> 'hubcap' in repositories
True
>>> hubcap_repo = repositories['hubcap']
>>>
>>> repo = hub('thorwhalen/hubcap')
>>> 'master' in repo
True
>>> master_files = repo['master']
>>>
>>> files = hub('thorwhalen/hubcap/master')
>>> '/README.md' in files
True
>>> '/hubcap/' in files
True
>>> hubcap_files = files['/hubcap/']
>>> '/hubcap/base.py' in hubcap_files
True

Access issues

>>> issues = hub('thorwhalen/hubcap/issues')
>>> 3 in issues  # there's a "number 3" issue
True
>>> issue = issues[3]
>>> issue.title
'Test Issue'
>>> issue.body
'This is just a test issue to test that hubcap can see it.\r\n'
>>> issue.comments  # meaning "number of comments"
1

Access discussions

>>> discussions = hub('thorwhalen/hubcap/discussions')  # doctest: +SKIP
>>> # get a list of discussion (keys)
>>> list(discussions)  # doctest: +SKIP
[2, ...]
>>> # the discussion 2 should be included in that list
>>> 2 in discussions  # doctest: +SKIP
True
>>> discussion = discussions[2]   # doctest: +SKIP
>>> discussion  # doctest: +SKIP
{'number': 2,
 'title': 'Root interface of hubcap',
 'body': 'Every time I need to do something with `hubcap` ...',
 'author': {'login': 'thorwhalen'},
 'createdAt': '2023-11-07T09:30:59Z',
 'updatedAt': '2023-12-05T08:25:39Z',
 'comments': [{'body': 'Further it would be nice if we ...',
   'author': 'thorwhalen',
   'replies': []}]}
>>> discussion['title']    # doctest: +SKIP
'Root interface of hubcap'

Here's a nice trick for those want to download a discussion in a nice readable format, for you, or some AI, to look through.

>>> from hubcap import create_markdown_from_jdict
>>> markdown_string = create_markdown_from_jdict(discussion)  # doctest: +SKIP
>>> print(markdown_string)  # doctest: +SKIP
# Root interface of hubcap
<BLANKLINE>
Every time I need to do something with `hubcap` I need to look up how it works again. 
<BLANKLINE>
That's negative points

GithubReader

One of the main classes is GithubReader. It's a mapping that connects to a github user or organization, and returns a mapping of repositories. The repositories are also mappings, that return mappings of branches, and so on.

>>> from hubcap import GithubReader
>>> s = GithubReader('thorwhalen')  # connnecting to a particular user/organization
>>> list(s)  # doctest: +SKIP
['agen',
 'aix',
 ...
 'viral',
 'wealth',
 'wrapt']
>>> 'a_non_existing_repository_name' in s
False
>>> 'hubcap' in s  # of course, this will be true, it's what you're using now!
True
>>> repo = s['hubcap']
>>> list(repo)
['master']
>>> branch = repo['master']
>>> list(branch)  # doctest: +NORMALIZE_WHITESPACE
['/.gitattributes', '/.github/', '/.gitignore', '/LICENSE',
'/README.md', '/docsrc/', '/hubcap/', '/misc/', '/setup.cfg', '/setup.py']
>>> content = branch['/setup.cfg']
>>> print(content[:32].decode())
[metadata]
name = hubcap
version

>>> from hubcap import get_repository_info
>>> info = get_repository_info('thorwhalen/hubcap')
>>> list(info)  # doctest: +NORMALIZE_WHITESPACE
['name', 'full_name', 'description', 'stargazers_count',
'forks_count', 'watchers_count', 'html_url', 'last_commit_date']
>>> info['name']
'hubcap'
>>> info['html_url']
'https://github.com/thorwhalen/hubcap'
>>> info['stargazers_count'] >= 1
True

You also have other useful objects, like Issues, IssueComments, Discussions, etc.

github_repo_text_aggregate

>>> owner_repo_files = github_repo_text_aggregate('thorwhalen/hubcap')  # doctest: +SKIP
>>> markdown_output = github_repo_text_aggregate(owner_repo_files)  # doctest: +SKIP

Recipes

Clone to temp folder and get store (mapping) of files

from hubcap import git_clone, git_wiki_clone
from dol import TextFiles, filt_iter, Pipe

repo_py_files = Pipe(
    git_clone, TextFiles, filt_iter(filt=lambda x: x.endswith('.py'))
)
repo_wiki_files = Pipe(git_wiki_clone, TextFiles)

py_files = repo_py_files('i2mint/dol')
len(py_files)
# 37

wiki_files = repo_wiki_files('i2mint/dol')
list(wiki_files)
# ['Recipes.md',
#  'Critiques-and-their-comebacks.md',
#  'Home.md',
#  'Mapping-Views.md']

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Repository files navigation

hubcap

Examples

Basics

Listing repositories and information about them

RepoReader

hub function

Access issues

Access discussions

GithubReader

github_repo_text_aggregate

Recipes

Clone to temp folder and get store (mapping) of files

About

Releases

Packages

Languages

Name		Name	Last commit message	Last commit date
Latest commit History 88 Commits
.github/workflows		.github/workflows
docsrc		docsrc
hubcap		hubcap
misc		misc
.gitattributes		.gitattributes
.gitignore		.gitignore
LICENSE		LICENSE
README.md		README.md
setup.cfg		setup.cfg
setup.py		setup.py

License

thorwhalen/hubcap

Folders and files

Latest commit

History

Repository files navigation

hubcap

Examples

Basics

Listing repositories and information about them

RepoReader

hub function

Access issues

Access discussions

GithubReader

github_repo_text_aggregate

Recipes

Clone to temp folder and get store (mapping) of files

About

Resources

License

Stars

Watchers

Forks

Releases

Packages 0

Languages

Packages