Skip to content

Commit

Permalink
Merge pull request #617 from splitgraph/feature/CU-1z4bhdb-reintrospe…
Browse files Browse the repository at this point in the history
…ction-mode

Allow reintrospecting datasets when running `sgr cloud load`
  • Loading branch information
mildbyte committed Jan 21, 2022
2 parents bfb1a23 + d8c741a commit 7277439
Show file tree
Hide file tree
Showing 6 changed files with 111 additions and 12 deletions.
5 changes: 3 additions & 2 deletions .ci/prepare_doc_bundle.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@ python generate_reference.py sgr "$TARGET_DIR"/sgr
echo "Generating configuration reference"
python generate_reference.py config "$TARGET_DIR"/0100_config-flag-reference.mdx

echo "Building Asciinema casts"
TARGET_DIR=$TARGET_DIR "$CI_DIR"/rebuild_asciicasts.sh
# Temporarily disabled: these take way too much time and aren't used by the website.
# echo "Building Asciinema casts"
# TARGET_DIR=$TARGET_DIR "$CI_DIR"/rebuild_asciicasts.sh

echo "Archiving the bundle $OUTPUT.tar.gz"
cd "$TARGET_DIR"/..
Expand Down
31 changes: 28 additions & 3 deletions splitgraph/cloud/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,12 @@
from splitgraph.cloud.models import (
AddExternalCredentialRequest,
AddExternalRepositoriesRequest,
AddExternalRepositoriesResponse,
AddExternalRepositoryRequest,
ExportJobStatus,
ExternalResponse,
IngestionJobStatus,
IntrospectionMode,
ListExternalCredentialsResponse,
MetadataResponse,
Plugin,
Expand Down Expand Up @@ -74,6 +76,7 @@
from splitgraph.config.management import patch_and_save_config
from splitgraph.exceptions import (
AuthAPIError,
DataSourceError,
GQLAPIError,
GQLRepoDoesntExistError,
GQLUnauthenticatedError,
Expand Down Expand Up @@ -530,15 +533,37 @@ def ensure_external_credential(
raise JSONSchemaValidationError(message="[MASKED]")
raise

def bulk_upsert_external(self, repositories: List[AddExternalRepositoryRequest]):
request = AddExternalRepositoriesRequest(repositories=repositories)
self._perform_request(
def bulk_upsert_external(
self,
repositories: List[AddExternalRepositoryRequest],
introspection_mode: IntrospectionMode = IntrospectionMode.EMPTY,
raise_errors: bool = False,
) -> None:
request = AddExternalRepositoriesRequest(
repositories=repositories, introspection_mode=introspection_mode
)
response = self._perform_request(
"/bulk-add",
self.access_token,
request,
endpoint=self.externals_endpoint,
jsonschema_endpoint=True,
response_class=AddExternalRepositoriesResponse,
)
assert response
if response.errors:
for repo_errors in response.errors:
for error in repo_errors.errors:
logging.warning(
"Error adding table %s/%s/%s: %s (%s)",
repo_errors.namespace,
repo_errors.repository,
error.table_name,
error.error,
error.error_text,
)
if raise_errors:
raise DataSourceError("Error introspecting some tables!")


def AuthAPIClient(*args, **kwargs):
Expand Down
24 changes: 23 additions & 1 deletion splitgraph/cloud/models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Definitions for responses from the cloud GQL/REST APIs
"""
import enum
import logging
from datetime import datetime
from typing import Any, Dict, List, Optional
Expand All @@ -15,7 +16,7 @@
Source,
Table,
)
from splitgraph.core.types import Params, TableSchema
from splitgraph.core.types import MountError, Params, TableSchema


class Plugin(BaseModel):
Expand All @@ -30,6 +31,16 @@ class Plugin(BaseModel):
supports_sync: bool


class IntrospectionMode(str, enum.Enum):
"""
Which tables to (re)introspect when adding an external.
"""

NONE = "none" # Don't reintrospect any tables
EMPTY = "empty" # Introspect tables with an empty schema
ALL = "all" # Reintrospect all tables


# GQL response for the catalog metadata


Expand Down Expand Up @@ -289,3 +300,14 @@ def from_external(

class AddExternalRepositoriesRequest(BaseModel):
repositories: List[AddExternalRepositoryRequest]
introspection_mode: IntrospectionMode = IntrospectionMode.EMPTY


class AddExternalRepositoriesResponse(BaseModel):
class RepositoryMountError(BaseModel):
namespace: str
repository: str
errors: List[MountError]

live_image_hashes: List[Optional[str]]
errors: Optional[List[RepositoryMountError]] = None
29 changes: 26 additions & 3 deletions splitgraph/commandline/cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import click
from click import wrap_text

from splitgraph.cloud.models import AddExternalRepositoryRequest
from splitgraph.cloud.models import AddExternalRepositoryRequest, IntrospectionMode
from splitgraph.cloud.project.models import Metadata, SplitgraphYAML
from splitgraph.commandline.common import (
ImageType,
Expand Down Expand Up @@ -613,9 +613,28 @@ def dump_c(remote, readme_dir, repositories_file, limit_repositories):
is_flag=True,
help="Only set up the metadata, not the external data source settings",
)
@click.option(
"--introspection-mode",
type=click.Choice(IntrospectionMode),
default=IntrospectionMode.EMPTY,
help="Whether to reintrospect tables. none: never reintrospect. all: reintrospect all tables. "
"empty: only reintrospect tables with an empty schema.",
)
@click.option(
"--ignore-introspection-errors",
is_flag=True,
help="If set, will ignore errors when introspecting tables.",
)
@click.argument("limit_repositories", type=str, nargs=-1)
def load_c(
remote, readme_dir, skip_external, initial_private, repositories_file, limit_repositories
remote,
readme_dir,
skip_external,
initial_private,
repositories_file,
limit_repositories,
introspection_mode,
ignore_introspection_errors,
):
"""
Load a Splitgraph catalog from a YAML file.
Expand Down Expand Up @@ -666,7 +685,11 @@ def load_c(
initial_private=initial_private,
)
external_repositories.append(external_repository)
rest_client.bulk_upsert_external(repositories=external_repositories)
rest_client.bulk_upsert_external(
repositories=external_repositories,
introspection_mode=introspection_mode,
raise_errors=not ignore_introspection_errors,
)
logging.info(f"Uploaded images for {pluralise('repository', len(external_repositories))}")

logging.info("Updating metadata...")
Expand Down
24 changes: 22 additions & 2 deletions test/splitgraph/commandline/http_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,11 +399,12 @@ def add_external_credential(request, uri, response_headers):
]


def add_external_repo(initial_private=False):
def add_external_repo(initial_private=False, error=False):
def cb(request, uri, response_headers):
data = json.loads(request.body)

assert data["repositories"] is not None
assert data["introspection_mode"] == "empty"
assert data["repositories"] == [
{
"credential_id": "98765432-aaaa-bbbb-a456-000000000000",
Expand Down Expand Up @@ -450,7 +451,26 @@ def cb(request, uri, response_headers):
return [
200,
response_headers,
json.dumps({"live_image_hashes": ["abcdef12" * 8, "ghijkl34" * 8, "mnoprs56" * 8]}),
json.dumps(
{
"live_image_hashes": ["abcdef12" * 8, "ghijkl34" * 8, "mnoprs56" * 8],
"errors": [
{
"namespace": "otheruser",
"repository": "somerepo_2",
"errors": [
{
"table_name": "table_1",
"error": "SomeError",
"error_text": "Something bad happened",
}
],
}
]
if not error
else [],
}
),
]

return cb
Expand Down
10 changes: 9 additions & 1 deletion test/splitgraph/commandline/test_cloud_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,8 +273,9 @@ def test_commandline_dump(snapshot):


@pytest.mark.parametrize("initial_private", [True, False])
@pytest.mark.parametrize("errors", [True, False])
@httpretty.activate(allow_net_connect=False)
def test_commandline_load(initial_private):
def test_commandline_load(initial_private, errors):
runner = CliRunner()

httpretty.register_uri(
Expand Down Expand Up @@ -326,6 +327,7 @@ def get_remote_param(remote, param):
os.path.join(RESOURCES, "splitgraph_yml", "readmes"),
"-f",
os.path.join(RESOURCES, "splitgraph_yml", "splitgraph.yml"),
"--ignore-introspection-errors",
],
catch_exceptions=False,
)
Expand All @@ -339,6 +341,12 @@ def get_remote_param(remote, param):
reqs.pop() # discard duplicate request
assert_repository_profiles(reqs.pop())

if errors:
assert (
"Error adding table otheruser/somerepo_2/table_1: "
"SomeError (Something bad happened)" in result.output
)


def test_project_validate(snapshot):
# Use the same file as the merging test
Expand Down

0 comments on commit 7277439

Please sign in to comment.