##### **** These pip installs need to be adapted to use the appropriate release level. Alternatively, The venv running the jupyter lab could be pre-configured with a requirement file that includes the right release. Example for transform developers working from git clone:
```
make venv 
source venv/bin/activate 
pip install jupyterlab
```

In [None]:
%%capture
## This is here as a reference only
# Users and application developers must use the right tag for the latest from pypi
%pip install data-prep-toolkit
%pip install data-prep-toolkit-transforms==0.2.2.dev3

##### **** Configure the transform parameters. The set of dictionary keys holding DocIDTransform configuration for values are as follows: 
* doc_column - specifies name of the column containing the document (required for ID generation)
* hash_column - specifies name of the column created to hold the string document id, if None, id is not generated
* int_id_column - specifies name of the column created to hold the integer document id, if None, id is not generated
* start_id - an id from which ID generator starts () 

##### ***** Import required classes and modules

In [None]:
import os
import sys

from data_processing.runtime.pure_python import PythonTransformLauncher
from data_processing.utils import ParamsUtils
from doc_id_transform_python import DocIDPythonTransformRuntimeConfiguration
from doc_id_transform_base import (
    doc_column_name_cli_param,
    hash_column_name_cli_param,
    int_column_name_cli_param,
    start_id_cli_param,
)

##### ***** Setup runtime parameters for this transform

In [None]:

# create parameters
input_folder = os.path.join("python", "test-data", "input")
output_folder = os.path.join( "python", "output")
local_conf = {
    "input_folder": input_folder,
    "output_folder": output_folder,
}
code_location = {"github": "github", "commit_hash": "12345", "path": "path"}
params = {
    # Data access. Only required parameters are specified
    "data_local_config": ParamsUtils.convert_to_ast(local_conf),
    # execution info
    "runtime_pipeline_id": "pipeline_id",
    "runtime_job_id": "job_id",
    "runtime_code_location": ParamsUtils.convert_to_ast(code_location),
    # doc id params
    doc_column_name_cli_param: "contents",
    hash_column_name_cli_param: "hash_column",
    int_column_name_cli_param: "int_id_column",
    start_id_cli_param: 5,
}

##### ***** Use python runtime to invoke the transform

In [None]:
%%capture
sys.argv = ParamsUtils.dict_to_req(d=params)
launcher = PythonTransformLauncher(runtime_config=DocIDPythonTransformRuntimeConfiguration())
launcher.launch()

##### **** The specified folder will include the transformed parquet files.

In [None]:
import glob
glob.glob("python/output/*")