Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 58 additions & 29 deletions github_backup/github_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,15 @@
FILE_URI_PREFIX = "file://"
logger = logging.getLogger(__name__)


class RepositoryUnavailableError(Exception):
"""Raised when a repository is unavailable due to legal reasons (e.g., DMCA takedown)."""

def __init__(self, message, dmca_url=None):
super().__init__(message)
self.dmca_url = dmca_url


# Setup SSL context with fallback chain
https_ctx = ssl.create_default_context()
if https_ctx.get_ca_certs():
Expand Down Expand Up @@ -612,6 +621,19 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False):

status_code = int(r.getcode())

# Handle DMCA takedown (HTTP 451) - raise exception to skip entire repository
if status_code == 451:
dmca_url = None
try:
response_data = json.loads(r.read().decode("utf-8"))
dmca_url = response_data.get("block", {}).get("html_url")
except Exception:
pass
raise RepositoryUnavailableError(
"Repository unavailable due to legal reasons (HTTP 451)",
dmca_url=dmca_url
)

# Check if we got correct data
try:
response = json.loads(r.read().decode("utf-8"))
Expand Down Expand Up @@ -1668,40 +1690,47 @@ def backup_repositories(args, output_directory, repositories):

continue # don't try to back anything else for a gist; it doesn't exist

download_wiki = args.include_wiki or args.include_everything
if repository["has_wiki"] and download_wiki:
fetch_repository(
repository["name"],
repo_url.replace(".git", ".wiki.git"),
os.path.join(repo_cwd, "wiki"),
skip_existing=args.skip_existing,
bare_clone=args.bare_clone,
lfs_clone=args.lfs_clone,
no_prune=args.no_prune,
)
if args.include_issues or args.include_everything:
backup_issues(args, repo_cwd, repository, repos_template)
try:
download_wiki = args.include_wiki or args.include_everything
if repository["has_wiki"] and download_wiki:
fetch_repository(
repository["name"],
repo_url.replace(".git", ".wiki.git"),
os.path.join(repo_cwd, "wiki"),
skip_existing=args.skip_existing,
bare_clone=args.bare_clone,
lfs_clone=args.lfs_clone,
no_prune=args.no_prune,
)
if args.include_issues or args.include_everything:
backup_issues(args, repo_cwd, repository, repos_template)

if args.include_pulls or args.include_everything:
backup_pulls(args, repo_cwd, repository, repos_template)
if args.include_pulls or args.include_everything:
backup_pulls(args, repo_cwd, repository, repos_template)

if args.include_milestones or args.include_everything:
backup_milestones(args, repo_cwd, repository, repos_template)
if args.include_milestones or args.include_everything:
backup_milestones(args, repo_cwd, repository, repos_template)

if args.include_labels or args.include_everything:
backup_labels(args, repo_cwd, repository, repos_template)
if args.include_labels or args.include_everything:
backup_labels(args, repo_cwd, repository, repos_template)

if args.include_hooks or args.include_everything:
backup_hooks(args, repo_cwd, repository, repos_template)
if args.include_hooks or args.include_everything:
backup_hooks(args, repo_cwd, repository, repos_template)

if args.include_releases or args.include_everything:
backup_releases(
args,
repo_cwd,
repository,
repos_template,
include_assets=args.include_assets or args.include_everything,
)
if args.include_releases or args.include_everything:
backup_releases(
args,
repo_cwd,
repository,
repos_template,
include_assets=args.include_assets or args.include_everything,
)
except RepositoryUnavailableError as e:
logger.warning(f"Repository {repository['full_name']} is unavailable (HTTP 451)")
if e.dmca_url:
logger.warning(f"DMCA notice: {e.dmca_url}")
logger.info(f"Skipping remaining resources for {repository['full_name']}")
continue

if args.incremental:
if last_update == "0000-00-00T00:00:00Z":
Expand Down
143 changes: 143 additions & 0 deletions tests/test_http_451.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
"""Tests for HTTP 451 (DMCA takedown) handling."""

import json
from unittest.mock import Mock, patch

import pytest

from github_backup import github_backup


class TestHTTP451Exception:
"""Test suite for HTTP 451 DMCA takedown exception handling."""

def test_repository_unavailable_error_raised(self):
"""HTTP 451 should raise RepositoryUnavailableError with DMCA URL."""
# Create mock args
args = Mock()
args.as_app = False
args.token_fine = None
args.token_classic = None
args.username = None
args.password = None
args.osx_keychain_item_name = None
args.osx_keychain_item_account = None
args.throttle_limit = None
args.throttle_pause = 0

# Mock HTTPError 451 response
mock_response = Mock()
mock_response.getcode.return_value = 451

dmca_data = {
"message": "Repository access blocked",
"block": {
"reason": "dmca",
"created_at": "2024-11-12T14:38:04Z",
"html_url": "https://github.com/github/dmca/blob/master/2024/11/2024-11-04-source-code.md"
}
}
mock_response.read.return_value = json.dumps(dmca_data).encode("utf-8")
mock_response.headers = {"x-ratelimit-remaining": "5000"}
mock_response.reason = "Unavailable For Legal Reasons"

def mock_get_response(request, auth, template):
return mock_response, []

with patch("github_backup.github_backup._get_response", side_effect=mock_get_response):
with pytest.raises(github_backup.RepositoryUnavailableError) as exc_info:
list(github_backup.retrieve_data_gen(args, "https://api.github.com/repos/test/dmca/issues"))

# Check exception has DMCA URL
assert exc_info.value.dmca_url == "https://github.com/github/dmca/blob/master/2024/11/2024-11-04-source-code.md"
assert "451" in str(exc_info.value)

def test_repository_unavailable_error_without_dmca_url(self):
"""HTTP 451 without DMCA details should still raise exception."""
args = Mock()
args.as_app = False
args.token_fine = None
args.token_classic = None
args.username = None
args.password = None
args.osx_keychain_item_name = None
args.osx_keychain_item_account = None
args.throttle_limit = None
args.throttle_pause = 0

mock_response = Mock()
mock_response.getcode.return_value = 451
mock_response.read.return_value = b'{"message": "Blocked"}'
mock_response.headers = {"x-ratelimit-remaining": "5000"}
mock_response.reason = "Unavailable For Legal Reasons"

def mock_get_response(request, auth, template):
return mock_response, []

with patch("github_backup.github_backup._get_response", side_effect=mock_get_response):
with pytest.raises(github_backup.RepositoryUnavailableError) as exc_info:
list(github_backup.retrieve_data_gen(args, "https://api.github.com/repos/test/dmca/issues"))

# Exception raised even without DMCA URL
assert exc_info.value.dmca_url is None
assert "451" in str(exc_info.value)

def test_repository_unavailable_error_with_malformed_json(self):
"""HTTP 451 with malformed JSON should still raise exception."""
args = Mock()
args.as_app = False
args.token_fine = None
args.token_classic = None
args.username = None
args.password = None
args.osx_keychain_item_name = None
args.osx_keychain_item_account = None
args.throttle_limit = None
args.throttle_pause = 0

mock_response = Mock()
mock_response.getcode.return_value = 451
mock_response.read.return_value = b"invalid json {"
mock_response.headers = {"x-ratelimit-remaining": "5000"}
mock_response.reason = "Unavailable For Legal Reasons"

def mock_get_response(request, auth, template):
return mock_response, []

with patch("github_backup.github_backup._get_response", side_effect=mock_get_response):
with pytest.raises(github_backup.RepositoryUnavailableError):
list(github_backup.retrieve_data_gen(args, "https://api.github.com/repos/test/dmca/issues"))

def test_other_http_errors_unchanged(self):
"""Other HTTP errors should still raise generic Exception."""
args = Mock()
args.as_app = False
args.token_fine = None
args.token_classic = None
args.username = None
args.password = None
args.osx_keychain_item_name = None
args.osx_keychain_item_account = None
args.throttle_limit = None
args.throttle_pause = 0

mock_response = Mock()
mock_response.getcode.return_value = 404
mock_response.read.return_value = b'{"message": "Not Found"}'
mock_response.headers = {"x-ratelimit-remaining": "5000"}
mock_response.reason = "Not Found"

def mock_get_response(request, auth, template):
return mock_response, []

with patch("github_backup.github_backup._get_response", side_effect=mock_get_response):
# Should raise generic Exception, not RepositoryUnavailableError
with pytest.raises(Exception) as exc_info:
list(github_backup.retrieve_data_gen(args, "https://api.github.com/repos/test/notfound/issues"))

assert not isinstance(exc_info.value, github_backup.RepositoryUnavailableError)
assert "404" in str(exc_info.value)


if __name__ == "__main__":
pytest.main([__file__, "-v"])