_data/meltano/extractors/tap-s3-csv/s7clarke10.yml

capabilities:
- discover
- properties
- state
description: Extract CSV files from S3
domain_url: https://aws.amazon.com/s3/
keywords:
- api
label: AWS S3 CSV
logo_url: /assets/logos/extractors/s3-csv.png
maintenance_status: active
name: tap-s3-csv
namespace: tap_s3_csv
pip_url: git+https://github.com/s7clarke10/pipelinewise-tap-s3-csv.git
quality: silver
repo: https://github.com/s7clarke10/pipelinewise-tap-s3-csv
settings:
- description: S3 Access Key Id. If not provided, aws_profile or AWS_ACCESS_KEY_ID
    environment variable will be used.
  kind: password
  label: AWS S3 Access Key ID
  name: aws_access_key_id
  sensitive: true
- description: The AWS endpoint URL.
  kind: password
  label: AWS Endpoint URL
  name: aws_endpoint_url
  sensitive: true
- description: Optional - AWS profile name for profile based authentication. If not
    provided, AWS_PROFILE environment variable will be used.
  label: AWS profile name
  name: aws_profile
- description: S3 Secret Access Key. If not provided, aws_profile or AWS_ACCESS_KEY_ID
    environment variable will be used.
  kind: password
  label: AWS S3 Secret Access Key
  name: aws_secret_access_key
  sensitive: true
- description: Optional - S3 AWS STS token for temporary credentials. If not provided,
    AWS_SESSION_TOKEN environment variable will be used.
  kind: password
  label: AWS S3 Session Token
  name: aws_session_token
  sensitive: true
- description: AWS S3 bucket name
  label: Bucket
  name: bucket
- description: Optional - A dict of proxies settings for use of a proxy server. Set
    to {} to avoid using a proxy server for s3 traffic.
  kind: object
  label: S3 Proxies
  name: s3_proxies
- description: Optional - When set true will emit `null` (the JSON equivalent of None)
    instead of an empty string.
  kind: boolean
  label: Set Empty Values Null
  name: set_empty_values_null
- description: Determines how much historical data will be extracted. Please be aware
    that the larger the time period and amount of data, the longer the initial extraction
    can be expected to take.
  kind: date_iso8601
  label: Start Date
  name: start_date
- description: Optional - If set will append a suffix on each of the tables to provide
    some uniqueness e.g. a date or supplier identifier.
  kind: string
  label: Table Suffix
  name: table_suffix
- description: "An array that consists of one or more objects that describe how to
    find files and emit records. Required - `table_name` and `search_pattern`. Optional
    - `key_properties`, `search_prefix`, `date_overrides`, `delimiter`, `remove_character`,
    `string_overrides`, `guess_types`, `encoding`.\n- `search_prefix` - This is a
    prefix to apply after the bucket, but before the file search pattern, to allow
    you to find files in \"directories\" below the bucket. - `search_pattern` - This
    is an escaped regular expression that the tap will use to find files in the bucket
    + prefix. It's a bit strange, since this is an escaped string inside of an escaped
    string, any backslashes in the RegEx will need to be double-escaped. - `table_name`
    - This value is a string of your choosing, and will be used to name the stream
    that records are emitted under for files matching content. - `key_properties`
    - These are the \"primary keys\" of the CSV files, to be used by the target for
    deduplication and primary key definitions downstream in the destination. - `date_overrides`
    - Specifies field names in the files that are supposed to be parsed as a datetime.
    The tap doesn't attempt to automatically determine if a field is a datetime, so
    this will make it explicit in the discovered schema. - `delimiter` - This allows
    you to specify a custom delimiter, such as `\\t` or `|`, if that applies to your
    files. - `string_overrides` - Specifies field names in the files that should be
    parsed as a string regardless of what was discovered. - `guess_types` - (default
    `True`) By default, column data types will be determined via scanning the first
    file in a table_spec. Set this to `False` to disable this and set all columns
    to `string`. - `remove_character` - Specifies a character which can be removed
    from each line in the the file e.g. `\"\\\"\"` will remove all double-quotes.
    - `encoding` - The encoding to use to read these files from [codecs -> Standard
    Encodings](https://docs.python.org/3/library/codecs.html#standard-encodings)"
  kind: array
  label: Tables
  name: tables
- description: Optional - Will attempt to log a warning rather than error if there
    are no files found for the search criteria if the setting is set to `true`.
  kind: boolean
  label: Warning if no file
  name: warning_if_no_file
settings_group_validation:
- - bucket
  - start_date
  - tables
variant: s7clarke10