From 5b11aec784da14efb8d50ec1fed3285531d1e481 Mon Sep 17 00:00:00 2001 From: andreyaksenov Date: Tue, 26 Sep 2023 17:53:59 +0300 Subject: [PATCH] Document tt export/import --- conf.py | 3 +- doc/reference/tooling/tt_cli/commands.rst | 9 + doc/reference/tooling/tt_cli/crud.rst | 19 ++ doc/reference/tooling/tt_cli/export.rst | 148 +++++++++ doc/reference/tooling/tt_cli/import.rst | 283 ++++++++++++++++++ doc/reference/tooling/tt_cli/index.rst | 6 + doc/reference/tooling/tt_cli/installation.rst | 5 + 7 files changed, 472 insertions(+), 1 deletion(-) create mode 100644 doc/reference/tooling/tt_cli/crud.rst create mode 100644 doc/reference/tooling/tt_cli/export.rst create mode 100644 doc/reference/tooling/tt_cli/import.rst diff --git a/conf.py b/conf.py index 93a2134f56..88b6c74521 100644 --- a/conf.py +++ b/conf.py @@ -41,6 +41,7 @@ extlinks = { 'tarantool-issue': ('https://github.com/tarantool/tarantool/issues/%s', 'gh-'), 'tarantool-release': ('https://github.com/tarantool/tarantool/releases/%s', 'v. '), + 'tt-release': ('https://github.com/tarantool/tt/releases/v%s', 'v. '), 'doc-issue': ('https://github.com/tarantool/doc/issues/%s', 'doc-'), 'tarantool-sec-issue': ('https://github.com/tarantool/security/issues/%s', 'ghs-'), } @@ -59,7 +60,7 @@ project = u'Tarantool' # |release| The full version, including alpha/beta/rc tags. -release = "2.11.0" +release = "2.11.1" # |version| The short X.Y version. version = '.'.join(release.split('.')[0:2]) diff --git a/doc/reference/tooling/tt_cli/commands.rst b/doc/reference/tooling/tt_cli/commands.rst index 1910d867a6..5c68092f1b 100644 --- a/doc/reference/tooling/tt_cli/commands.rst +++ b/doc/reference/tooling/tt_cli/commands.rst @@ -34,8 +34,14 @@ help for the given command. - Manipulate Tarantool core dumps * - :doc:`create ` - Create an application from a template + * - :doc:`crud ` + - Interact with the CRUD module (`Enterprise only `_) + * - :doc:`export ` + - Export data to a file (`Enterprise only `_) * - :doc:`help ` - Display help for ``tt`` or a specific command + * - :doc:`import ` + - Import data from a file (`Enterprise only `_) * - :doc:`init ` - Create a new ``tt`` environment in the current directory * - :doc:`install ` @@ -81,7 +87,10 @@ help for the given command. connect coredump create + crud + export help + import init install instances diff --git a/doc/reference/tooling/tt_cli/crud.rst b/doc/reference/tooling/tt_cli/crud.rst new file mode 100644 index 0000000000..1c79c97565 --- /dev/null +++ b/doc/reference/tooling/tt_cli/crud.rst @@ -0,0 +1,19 @@ +.. _tt-crud: + +Interacting with the CRUD module +================================ + +.. admonition:: Enterprise Edition + :class: fact + + This command is supported by the `Enterprise Edition `_ only. + +.. code-block:: console + + $ tt crud COMMAND [COMMAND_OPTION ...] + +``tt crud`` enables the interaction with a cluster using the `CRUD `_ module. +``COMMAND`` is one of the following: + +* ``export``: export a cluster's data to a file. Learn more at :ref:`Exporting data `. +* ``import``: import data from a file. Learn more at :ref:`Importing data `. diff --git a/doc/reference/tooling/tt_cli/export.rst b/doc/reference/tooling/tt_cli/export.rst new file mode 100644 index 0000000000..a4fc09a5ba --- /dev/null +++ b/doc/reference/tooling/tt_cli/export.rst @@ -0,0 +1,148 @@ +.. _tt-export: + +Exporting data +============== + +.. admonition:: Enterprise Edition + :class: fact + + This command is supported by the `Enterprise Edition `_ only. + + +.. code-block:: console + + $ tt [crud] export URI FILE SPACE [EXPORT_OPTION ...] + +``tt [crud] export`` exports a space's data to a file. +The ``crud`` command is optional and can be used to export a cluster's data by using the `CRUD `_ module. Without ``crud``, data is exported using the :ref:`box.space ` API. + +``tt [crud] export`` takes the following arguments: + +* ``URI``: The URI of a router instance if ``crud`` is used. Otherwise, it should specify the URI of a storage. +* ``FILE``: The name of a file for storing exported data. +* ``SPACE``: The name of a space from which data is exported. + +.. NOTE:: + + :ref:`Read access ` to the space is required to export its data. + +.. _tt-export-limitations: + +Limitations +----------- + +Exporting isn't supported for the :ref:`interval ` field type. + + +.. _tt-export-default: + +Exporting with default settings +------------------------------- + +The command below exports data of the ``customers`` space to the ``customers.csv`` file: + +.. code-block:: console + + $ tt crud export localhost:3301 customers.csv customers + +If the ``customers`` space has five fields (``id``, ``bucket_id``, ``firstname``, ``lastname``, and ``age``), the file with exported data might look like this: + +.. code-block:: text + + 1,477,Andrew,Fuller,38 + 2,401,Michael,Suyama,46 + 3,2804,Robert,King,33 + # ... + +If a tuple contains a ``null`` value, for example, ``[1, 477, 'Andrew', null, 38]``, it is exported as an empty value: + +.. code-block:: text + + 1,477,Andrew,,38 + + +.. _tt-export-header: + +Exporting headers +----------------- + +To export data with a space's field names in the first row, use the ``--header`` option: + +.. code-block:: console + + $ tt crud export localhost:3301 customers.csv customers \ + --header + +In this case, field values start from the second row, for example: + +.. code-block:: text + + id,bucket_id,firstname,lastname,age + 1,477,Andrew,Fuller,38 + 2,401,Michael,Suyama,46 + 3,2804,Robert,King,33 + # ... + + +.. _tt-export-compound-data: + +Exporting compound data +----------------------- + +By default, ``tt`` exports empty values for fields containing compound data such as arrays or maps. +To export compound values in a specific format, use the ``--compound-value-format`` option. +For example, the command below exports compound values serialized in JSON: + +.. code-block:: console + + $ tt crud export localhost:3301 customers.csv customers \ + --compound-value-format json + + +.. _tt-export-options: + +Options +------- + +.. option:: --batch-queue-size INT + + The maximum number of tuple batches in a queue between a fetch and write threads (the default is ``32``). + + ``tt`` exports data using two threads: + + * A *fetch* thread makes requests and receives data from a Tarantool instance. + * A *write* thread encodes received data and writes it to the output. + + The fetch thread uses a queue to pass received tuple batches to the write thread. + If a queue is full, the fetch thread waits until the write thread takes a batch from the queue. + +.. option:: --batch-size INT + + The number of tuples to transfer per request (the default is ``10000``). + +.. option:: --compound-value-format STRING + + A format used to export compound values like arrays or maps. + By default, ``tt`` exports empty values for fields containing such values. + + Supported formats: ``json``. + + See also: :ref:`Exporting compound data `. + +.. option:: --header + + Add field names in the first row. + + See also: :ref:`Exporting headers `. + +.. option:: --password STRING + + A password used to connect to the instance. + +.. option:: --readview + + Export data using a `read view `_. + +.. option:: --username STRING + + A username for connecting to the instance. diff --git a/doc/reference/tooling/tt_cli/import.rst b/doc/reference/tooling/tt_cli/import.rst new file mode 100644 index 0000000000..5dbc9bc858 --- /dev/null +++ b/doc/reference/tooling/tt_cli/import.rst @@ -0,0 +1,283 @@ +.. _tt-import: + +Importing data +============== + +.. admonition:: Enterprise Edition + :class: fact + + This command is supported by the `Enterprise Edition `_ only. + + +.. code-block:: console + + $ tt [crud] import URI FILE SPACE [IMPORT_OPTION ...] + # or + $ tt [crud] import URI - SPACE < FILE [IMPORT_OPTION ...] + +``tt [crud] import`` imports data from a file to a space. +The ``crud`` command is optional and can be used to import data to a cluster by using the `CRUD `_ module. Without ``crud``, data is imported using the :ref:`box.space ` API. + +This command takes the following arguments: + +* ``URI``: The URI of a router instance if ``crud`` is used. Otherwise, it should specify the URI of a storage. +* ``FILE``: The name of a file containing data to be imported. +* ``SPACE``: The name of a space to which data is imported. + +.. NOTE:: + + :ref:`Write access ` to the space and `execute` access to `universe` are required to import data. + + +.. _tt-import-limitations: + +Limitations +----------- + +Importing isn't supported for the :ref:`interval ` field type. + + +.. _tt-import-match-fields: + +Matching of input and space fields +---------------------------------- + + +.. _tt-import-match-fields-auto: + +Automatic matching +~~~~~~~~~~~~~~~~~~ + +Suppose that you have the ``customers.csv`` file with a header containing field names in the first row: + +.. code-block:: text + + id,firstname,lastname,age + 1,Andrew,Fuller,38 + 2,Michael,Suyama,46 + 3,Robert,King,33 + # ... + +If the target ``customers`` space has fields with the same names, you can import data using the ``--header`` and ``--match`` options specified as follows: + +.. code-block:: console + + $ tt crud import localhost:3301 customers.csv customers \ + --header \ + --match=header + +In this case, fields in the input file and the target space are matched automatically. +You can also match fields :ref:`manually ` if field names in the input file and the target space differ. +Note that if you're importing data into a cluster, you don't need to specify the ``bucket_id`` field. +The CRUD module generates ``bucket_id`` values automatically. + +.. _tt-import-match-fields-manual: + +Manual matching +~~~~~~~~~~~~~~~ + +The ``--match`` option enables importing data by matching field names in the input file and the target space manually. +Suppose that you have the following ``customers.csv`` file with four fields: + +.. code-block:: text + + customer_id,name,surname,customer_age + 1,Andrew,Fuller,38 + 2,Michael,Suyama,46 + 3,Robert,King,33 + # ... + +If the target ``customers`` space has the ``id``, ``firstname``, ``lastname``, and ``age`` fields, +you can configure mapping as follows: + +.. code-block:: console + + $ tt crud import localhost:3301 customers.csv customers \ + --header \ + --match "id=customer_id;firstname=name;lastname=surname;age=customer_age" + +Similarly, you can configure mapping using numeric field positions in the input file: + +.. code-block:: console + + $ tt crud import localhost:3301 customers.csv customers \ + --header \ + --match "id=1;firstname=2;lastname=3;age=4" + +Below are the rules if some fields are missing in input data or space: + +* If a space has fields that are not specified in input data, ``tt [crud] import`` tries to insert ``null`` values. +* If input data contains fields missing in a target space, these fields are ignored. + +.. _tt-import-duplicate-error: + +Handling duplicate primary key errors +------------------------------------- + +The ``--on-exist`` option enables you to control data import when a duplicate primary key error occurs. +In the example below, values already existing in the space are replaced with new ones: + +.. code-block:: console + + $ tt crud import localhost:3301 customers.csv customers \ + --on-exist replace + +.. _tt-import-parsing-error: + +Handling parsing errors +----------------------- + +To skip rows whose data cannot be parsed correctly, use the ``--on-error`` option as follows: + +.. code-block:: console + + $ tt crud import localhost:3301 customers.csv customers \ + --on-error skip + + +.. _tt-import-options: + +Options +------- + +.. option:: --dec-sep STRING + + The string of symbols that defines decimal separators for numeric data (the default is ``.,``). + + .. NOTE:: + + Symbols specified in this option cannot intersect with ``--th-sep``. + +.. option:: --delimiter STRING + + A symbol that defines a field value delimiter. + For CSV, the default delimiter is a comma (``,``). + To use a tab character as a delimiter, set this value as ``tab``: + + .. code-block:: console + + $ tt crud import localhost:3301 customers.csv customers \ + --delimiter tab + + .. NOTE:: + + A delimiter cannot be ``\r``, ``\n``, or the Unicode replacement character (``U+FFFD``). + +.. option:: --error STRING + + The name of a file containing rows that are not imported (the default is ``error``). + + See also: :ref:`Handling parsing errors `. + +.. option:: --format STRING + + A format of input data. + + Supported formats: ``csv``. + +.. option:: --header + + Process the first line as a header containing field names. + In this case, field values start from the second line. + + See also: :ref:`Matching of input and space fields `. + +.. option:: --log STRING + + The name of a log file containing information about import errors (the default is ``import``). + If the log file already exists, new data is written to this file. + +.. option:: --match STRING + + Configure matching between field names in the input file and the target space. + + See also: :ref:`Matching of input and space fields `. + +.. option:: --null STRING + + A value to be interpreted as ``null`` when importing data. + By default, an empty value is interpreted as ``null``. + For example, a tuple imported from the following row ... + + .. code-block:: text + + 1,477,Andrew,,38 + + ... should look as follows: ``[1, 477, 'Andrew', null, 38]``. + +.. option:: --on-error STRING + + An action performed if a row to be imported cannot be parsed correctly. + Possible values: + + * ``stop``: stop importing data. + * ``skip``: skip rows whose data cannot be parsed correctly. + + Duplicate primary key errors are handled using the ``--on-exist`` option. + + See also: :ref:`Handling parsing errors `. + +.. option:: --on-exist STRING + + An action performed if a duplicate primary key error occurs. + Possible values: + + * ``stop``: stop importing data. + * ``skip``: skip existing values when importing. + * ``replace``: replace existing values when importing. + + Other errors are handled using the ``--on-error`` option. + + See also: :ref:`Handling duplicate primary key errors `. + +.. option:: --password STRING + + A password used to connect to the instance. + +.. option:: --progress STRING + + The name of a progress file that stores the following information: + + * The positions of lines that were not imported at the last launch. + * The last position that was processed at the last launch. + + If a file with the specified name exists, it is taken into account when importing data. + ``tt import`` tries to insert lines that were not imported and then continues importing from the last position. + + At each launch, the content of a progress file with the specified name is overwritten. + If the file with the specified name does not exist, a progress file is created with the results of this run. + + .. NOTE:: + + If the option is not set, then this mechanism is not used. + +.. option:: --quote STRING + + A symbol that defines a quote. + For CSV, double quotes are used by default (``"``). + The double symbol of this option acts as the escaping symbol within input data. + +.. option:: -success STRING + + The name of a file with rows that were imported (the default is ``success``). + Overwrites the file if it already exists. + +.. option:: --th-sep STRING + + The string of symbols that define thousand separators for numeric data. + The default value includes a space and a backtick `````. + This means that ``1 000 000`` and ``1`000`000`` are both imported as ``1000000``. + + .. NOTE:: + + Symbols specified in this option cannot intersect with ``--dec-sep``. + +.. option:: --username STRING + + A username for connecting to the instance. + +.. option:: --rollback-on-error + + Applicable only when ``crud`` is used. + + Specify whether any operation failed on a router leads to rollback on a storage where the operation is failed. diff --git a/doc/reference/tooling/tt_cli/index.rst b/doc/reference/tooling/tt_cli/index.rst index e79e38a9b8..a155a3096d 100644 --- a/doc/reference/tooling/tt_cli/index.rst +++ b/doc/reference/tooling/tt_cli/index.rst @@ -3,6 +3,8 @@ tt CLI utility ============== +Latest release on GitHub: :tt-release:`1.3.0` + ``tt`` is a utility that provides a unified command-line interface for managing Tarantool-based applications. It covers a wide range of tasks -- from installing a specific Tarantool version to managing remote instances and developing applications. @@ -10,6 +12,10 @@ a specific Tarantool version to managing remote instances and developing applica ``tt`` is developed in its own `GitHub repository `_. Here you can find its source code, changelog, and releases information. +There is also the Enterprise version of ``tt`` available in a +Tarantool Enterprise's `release package `_. +The Enterprise version provides additional features, for example, :ref:`importing ` and :ref:`exporting ` data. + This section provides instructions on ``tt`` installation and configuration, concept explanation, and the ``tt`` command reference. diff --git a/doc/reference/tooling/tt_cli/installation.rst b/doc/reference/tooling/tt_cli/installation.rst index 3e062df24a..8e62731998 100644 --- a/doc/reference/tooling/tt_cli/installation.rst +++ b/doc/reference/tooling/tt_cli/installation.rst @@ -5,6 +5,11 @@ To install the ``tt`` command-line utility, use a package manager -- Yum or APT on Linux, or Homebrew on macOS. If you need a specific build, you can build ``tt`` from sources. +.. NOTE:: + + A Tarantool Enterprise's `release package `_ includes the `tt` utility extended with additional features like :ref:`importing ` and :ref:`exporting ` data. + + Using Linux package managers ----------------------------