Skip to content

Commit

Permalink
airbyte-lib: Validation helper command (airbytehq#34002)
Browse files Browse the repository at this point in the history
  • Loading branch information
Joe Reuter authored and jatinyadav-cc committed Feb 26, 2024
1 parent 3d4eea1 commit db05c25
Show file tree
Hide file tree
Showing 8 changed files with 171 additions and 3 deletions.
12 changes: 11 additions & 1 deletion airbyte-lib/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,14 @@ airbyte-lib is a library that allows to run Airbyte syncs embedded into any Pyth
* Make sure [Poetry is installed](https://python-poetry.org/docs/#).
* Run `poetry install`
* For examples, check out the `examples` folder. They can be run via `poetry run python examples/<example file>`
* Unit tests and type checks can be run via `poetry run pytest`
* Unit tests and type checks can be run via `poetry run pytest`

## Validating source connectors

To validate a source connector for compliance, the `airbyte-lib-validate-source` script can be used. It can be used like this:

```
airbyte-lib-validate-source —connector-dir . -—sample-config secrets/config.json
```

The script will install the python package in the provided directory, and run the connector against the provided config. The config should be a valid JSON file, with the same structure as the one that would be provided to the connector in Airbyte. The script will exit with a non-zero exit code if the connector fails to run.
113 changes: 113 additions & 0 deletions airbyte-lib/airbyte_lib/validate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
"""Defines the `airbyte-lib-validate-source` CLI, which checks if connectors are compatible with airbyte-lib."""

import argparse
import json
import os
import subprocess
import sys
import tempfile
from pathlib import Path
from typing import List

import airbyte_lib as ab
import yaml


def _parse_args():
parser = argparse.ArgumentParser(description="Validate a connector")
parser.add_argument(
"--connector-dir",
type=str,
required=True,
help="Path to the connector directory",
)
parser.add_argument(
"--sample-config",
type=str,
required=True,
help="Path to the sample config.json file",
)
return parser.parse_args()


def _run_subprocess_and_raise_on_failure(args: List[str]):
result = subprocess.run(args)
if result.returncode != 0:
raise Exception(f"{args} exited with code {result.returncode}")


def tests(connector_name, sample_config):
print("Creating source and validating spec and version...")
source = ab.get_connector(connector_name, config=json.load(open(sample_config)))

print("Running check...")
source.check()

print("Fetching streams...")
streams = source.get_available_streams()

# try to peek all streams - if one works, stop, if none works, throw exception
for stream in streams:
try:
print(f"Trying to read from stream {stream}...")
record = next(source.read_stream(stream))
assert record, "No record returned"
break
except Exception as e:
print(f"Could not read from stream {stream}: {e}")
else:
raise Exception(f"Could not read from any stream from {streams}")


def run():
"""
This is a CLI entrypoint for the `airbyte-lib-validate-source` command.
It's called like this: airbyte-lib-validate-source —connector-dir . -—sample-config secrets/config.json
It performs a basic smoke test to make sure the connector in question is airbyte-lib compliant:
* Can be installed into a venv
* Can be called via cli entrypoint
* Answers according to the Airbyte protocol when called with spec, check, discover and read
"""

# parse args
args = _parse_args()
connector_dir = args.connector_dir
sample_config = args.sample_config
validate(connector_dir, sample_config)


def validate(connector_dir, sample_config):
# read metadata.yaml
metadata_path = Path(connector_dir) / "metadata.yaml"
with open(metadata_path, "r") as stream:
metadata = yaml.safe_load(stream)["data"]

# TODO: Use remoteRegistries.pypi.packageName once set for connectors
connector_name = metadata["dockerRepository"].replace("airbyte/", "")

# create a venv and install the connector
venv_name = f".venv-{connector_name}"
venv_path = Path(venv_name)
if not venv_path.exists():
_run_subprocess_and_raise_on_failure([sys.executable, "-m", "venv", venv_name])

pip_path = os.path.join(venv_name, "bin", "pip")

_run_subprocess_and_raise_on_failure([pip_path, "install", "-e", connector_dir])

# write basic registry to temp json file
registry = {
"sources": [
{
"dockerRepository": f"airbyte/{connector_name}",
"dockerImageTag": "0.0.1",
}
]
}

with tempfile.NamedTemporaryFile(mode="w+t", delete=True) as temp_file:
temp_file.write(json.dumps(registry))
temp_file.seek(0)
os.environ["AIRBYTE_LOCAL_REGISTRY"] = str(temp_file.name)
tests(connector_name, sample_config)
13 changes: 12 additions & 1 deletion airbyte-lib/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion airbyte-lib/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ python = "^3.10"
jsonschema = "3.2.0"
requests = "^2.31.0"
airbyte-protocol-models = "^1.0.1"
types-pyyaml = "^6.0.12.12"

[tool.poetry.group.dev.dependencies]
pytest = "^7.4.3"
Expand All @@ -26,4 +27,7 @@ build-backend = "poetry.core.masonry.api"
ignore_missing_imports = true

[tool.pytest.ini_options]
addopts = "--mypy"
addopts = "--mypy"

[tool.poetry.scripts]
airbyte-lib-validate-source = "airbyte_lib.validate:run"
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{ "apiKey": "wrong" }
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
data:
connectorSubtype: api
connectorType: source
definitionId: 47f17145-fe20-4ef5-a548-e29b048adf84
dockerImageTag: 0.0.0
dockerRepository: airbyte/source-test
githubIssueLabel: source-test
name: Test
releaseDate: 2023-08-25
releaseStage: alpha
supportLevel: community
documentationUrl: https://docs.airbyte.com/integrations/sources/apify-dataset
metadataSpecVersion: "1.0"
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{ "apiKey": "test" }
15 changes: 15 additions & 0 deletions airbyte-lib/tests/integration_tests/test_validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.

import os
import shutil

import pytest
from airbyte_lib.validate import validate


def test_validate_success():
validate("./tests/integration_tests/fixtures/source-test", "./tests/integration_tests/fixtures/valid_config.json")

def test_validate_failure():
with pytest.raises(Exception):
validate("./tests/integration_tests/fixtures/source-test", "./tests/integration_tests/fixtures/invalid_config.json")

0 comments on commit db05c25

Please sign in to comment.