Skip to content

Commit

Permalink
馃悰 Source GitHub: Continue Sync on Stream failure (airbytehq#34700)
Browse files Browse the repository at this point in the history
  • Loading branch information
artem1205 authored and jatinyadav-cc committed Feb 26, 2024
1 parent 2922bd3 commit 20906df
Show file tree
Hide file tree
Showing 12 changed files with 202 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ data:
connectorSubtype: api
connectorType: source
definitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e
dockerImageTag: 1.5.7
dockerImageTag: 1.6.0
dockerRepository: airbyte/source-github
documentationUrl: https://docs.airbyte.com/integrations/sources/github
githubIssueLabel: source-github
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,19 @@
"permissions": {
"type": ["null", "object"],
"properties": {
"pull": {
"admin": {
"type": ["null", "boolean"]
},
"maintain": {
"type": ["null", "boolean"]
},
"push": {
"type": ["null", "boolean"]
},
"admin": {
"pull": {
"type": ["null", "boolean"]
},
"triage": {
"type": ["null", "boolean"]
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,99 @@
"issue": {
"type": ["null", "object"],
"properties": {
"active_lock_reason": {
"type": ["null", "string"]
},
"assignee": {
"$ref": "user.json"
},
"assignees": {
"type": ["null", "array"],
"items": {
"$ref": "user.json"
}
},
"author_association": {
"type": ["null", "string"]
},
"closed_at": {
"type": ["null", "string"],
"format": "date-time"
},
"updated_at": {
"type": ["null", "string"],
"format": "date-time"
},
"comments": {
"type": ["null", "integer"]
},
"draft": {
"type": ["null", "boolean"]
},
"created_at": {
"type": ["null", "string"],
"format": "date-time"
},
"labels": {
"type": ["null", "array"],
"items": {
"type": ["null", "object"],
"properties": {
"id": {
"type": ["null", "integer"]
},
"node_id": {
"type": ["null", "string"]
},
"url": {
"type": ["null", "string"]
},
"name": {
"type": ["null", "string"]
},
"description": {
"type": ["null", "string"]
},
"color": {
"type": ["null", "string"]
},
"default": {
"type": ["null", "boolean"]
}
}
}
},
"locked": {
"type": ["null", "boolean"]
},
"milestone": {
"type": ["null", "object"]
},
"performed_via_github_app": {
"type": ["null", "object"]
},
"state_reason": {
"type": ["null", "string"]
},
"pull_request": {
"type": ["null", "object"],
"properties": {
"merged_at": {
"type": ["string", "null"],
"format": "date-time"
},
"diff_url": { "type": ["string", "null"] },
"html_url": { "type": ["string", "null"] },
"patch_url": { "type": ["string", "null"] },
"url": { "type": ["string", "null"] }
}
},
"timeline_url": {
"type": ["null", "string"]
},
"reactions": {
"$ref": "reactions.json"
},
"id": {
"type": ["null", "integer"]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -970,7 +970,7 @@
"$ref": "events/reviewed.json"
},
"commented": {
"$ref": "events/commented.json"
"$ref": "events/comment.json"
},
"commit_commented": {
"$ref": "events/commented.json"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,9 @@
},
"repo_id": {
"type": ["null", "integer"]
},
"user": {
"$ref": "user.json"
}
}
},
Expand All @@ -264,6 +267,12 @@
},
"repo_id": {
"type": ["null", "integer"]
},
"repo": {
"type": ["null", "object"]
},
"user": {
"$ref": "user.json"
}
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,22 @@
"type": ["null", "string"]
}
}
},
"secret_scanning_validity_checks": {
"type": ["null", "object"],
"properties": {
"status": {
"type": ["null", "string"]
}
}
},
"dependabot_security_updates": {
"type": ["null", "object"],
"properties": {
"status": {
"type": ["null", "string"]
}
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
"description": "Issues are a great way to keep track of tasks, enhancements, and bugs for your projects.",
"type": "object",
"properties": {
"author_association": { "type": ["string", "null"] },
"performed_via_github_app": { "type": ["object", "null"] },
"id": { "type": "integer" },
"node_id": { "type": "string" },
"url": { "type": "string" },
Expand Down Expand Up @@ -247,6 +249,7 @@
"locked": { "type": "boolean" },
"active_lock_reason": { "type": ["string", "null"] },
"comments": { "type": "integer" },
"draft": { "type": ["boolean", "null"] },
"pull_request": {
"type": "object",
"properties": {
Expand Down Expand Up @@ -775,6 +778,41 @@
}
}
}
},
"reactions": {
"type": "object",
"properties": {
"url": {
"type": "string"
},
"total_count": {
"type": "integer"
},
"+1": {
"type": "integer"
},
"-1": {
"type": "integer"
},
"laugh": {
"type": "integer"
},
"confused": {
"type": "integer"
},
"heart": {
"type": "integer"
},
"hooray": {
"type": "integer"
},
"eyes": {
"type": "integer"
},
"rocket": {
"type": "integer"
}
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@


class SourceGithub(AbstractSource):

continue_sync_on_stream_failure = True

@staticmethod
def _get_org_repositories(config: Mapping[str, Any], authenticator: MultipleTokenAuthenticator) -> Tuple[List[str], List[str]]:
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from airbyte_cdk.sources.streams.availability_strategy import AvailabilityStrategy
from airbyte_cdk.sources.streams.http import HttpStream
from airbyte_cdk.sources.streams.http.exceptions import DefaultBackoffException
from airbyte_cdk.utils import AirbyteTracedException
from requests.exceptions import HTTPError

from . import constants
Expand Down Expand Up @@ -214,10 +215,9 @@ def read_records(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iter
raise e

self.logger.warning(error_msg)
except GitHubAPILimitException:
self.logger.warning(
f"Stream: `{self.name}`, slice: `{stream_slice}`. Limits for all provided tokens are reached, please try again later"
)
except GitHubAPILimitException as e:
message = f"Stream: `{self.name}`, slice: `{stream_slice}`. Limits for all provided tokens are reached, please try again later"
raise AirbyteTracedException(message) from e


class GithubStream(GithubStreamABC):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
from unittest.mock import patch

import pendulum
import pytest
import responses
from airbyte_cdk.utils import AirbyteTracedException
from freezegun import freeze_time
from source_github import SourceGithub
from source_github.streams import Organizations
Expand Down Expand Up @@ -39,7 +41,7 @@ def test_authenticator_counter(rate_limit_mock_response):


@responses.activate
def test_multiple_token_authenticator_with_rate_limiter(caplog):
def test_multiple_token_authenticator_with_rate_limiter():
"""
This test ensures that:
1. The rate limiter iterates over all tokens one-by-one after the previous is fully drained.
Expand Down Expand Up @@ -91,10 +93,11 @@ def request_callback_orgs(request):
callback=request_callback_orgs,
content_type="application/json",
)

list(read_full_refresh(stream))
with pytest.raises(AirbyteTracedException) as e:
list(read_full_refresh(stream))
assert [(x.count_rest, x.count_graphql) for x in authenticator._tokens.values()] == [(0, 500), (0, 500), (0, 500)]
assert "Stream: `organizations`, slice: `{'organization': 'org1'}`. Limits for all provided tokens are reached, please try again later" in caplog.messages
message = "Stream: `organizations`, slice: `{'organization': 'org1'}`. Limits for all provided tokens are reached, please try again later"
assert e.value.internal_message == message


@freeze_time("2021-01-01 12:00:00")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ def check_source(repo_line: str) -> AirbyteConnectionStatus:
return source.check(logger_mock, config)


def test_source_will_continue_sync_on_stream_failure():
source = SourceGithub()
assert source.continue_sync_on_stream_failure


@responses.activate
@pytest.mark.parametrize(
"config, expected",
Expand Down
19 changes: 17 additions & 2 deletions docs/integrations/sources/github.md
Original file line number Diff line number Diff line change
Expand Up @@ -171,11 +171,25 @@ Expand to see details about GitHub connector limitations and troubleshooting.
### Connector limitations

#### Rate limiting
The GitHub connector should not run into GitHub API limitations under normal usage. Please [create an issue](https://github.com/airbytehq/airbyte/issues) if you see any rate limit issues that are not automatically retried successfully. Refer to GitHub article [Rate limits for the REST API](https://docs.github.com/en/rest/overview/rate-limits-for-the-rest-api).

You can use a personal access token to make API requests. Additionally, you can authorize a GitHub App or OAuth app, which can then make API requests on your behalf.
All of these requests count towards your personal rate limit of 5,000 requests per hour (15,000 requests per hour if the app is owned by a GitHub Enterprise Cloud organization ).

:::info `REST API` and `GraphQL API` rate limits are counted separately
:::

:::tip
In the event that limits are reached before all streams have been read, it is recommended to take the following actions:
1. Utilize Incremental sync mode.
2. Set a higher sync interval.
3. Divide the sync into separate connections with a smaller number of streams.
:::

Refer to GitHub article [Rate limits for the REST API](https://docs.github.com/en/rest/overview/rate-limits-for-the-rest-api).

#### Permissions and scopes

If you use OAuth authentication method, the OAuth2.0 application requests the next list of [scopes](https://docs.github.com/en/developers/apps/building-oauth-apps/scopes-for-oauth-apps#available-scopes): **repo**, **read:org**, **read:repo_hook**, **read:user**, **read:discussion**, **workflow**. For [personal access token](https://github.com/settings/tokens) you need to manually select needed scopes.
If you use OAuth authentication method, the OAuth2.0 application requests the next list of [scopes](https://docs.github.com/en/developers/apps/building-oauth-apps/scopes-for-oauth-apps#available-scopes): **repo**, **read:org**, **read:repo_hook**, **read:user**, **read:discussion**, **read:project**, **workflow**. For [personal access token](https://github.com/settings/tokens) you need to manually select needed scopes.

Your token should have at least the `repo` scope. Depending on which streams you want to sync, the user generating the token needs more permissions:

Expand All @@ -193,6 +207,7 @@ Your token should have at least the `repo` scope. Depending on which streams you

| Version | Date | Pull Request | Subject |
|:--------|:-----------|:------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| 1.6.0 | 2024-02-02 | [34700](https://github.com/airbytehq/airbyte/pull/34700) | Continue Sync on Stream failure |
| 1.5.7 | 2024-01-29 | [34598](https://github.com/airbytehq/airbyte/pull/34598) | Fix MultipleToken sleep time |
| 1.5.6 | 2024-01-26 | [34503](https://github.com/airbytehq/airbyte/pull/34503) | Fix MultipleToken rotation logic |
| 1.5.5 | 2023-12-26 | [33783](https://github.com/airbytehq/airbyte/pull/33783) | Fix retry for 504 error in GraphQL based streams |
Expand Down

0 comments on commit 20906df

Please sign in to comment.