In [1]:
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

### verbs custom

#### difine verbs str_append function
we typically use "column" as the input column name and "to" as the output column name, but you can use whatever you want 
just as long as the "args" in the workflow reference match the function signature

In [2]:
# Our fake dataset
import pandas as pd
dataset = pd.DataFrame([{"col1": 2, "col2": 4}, {"col1": 5, "col2": 10}])


In [3]:

from datashaper import TableContainer, VerbInput


def str_append(
    input: VerbInput, source_column: str, target_column: str, string_to_append: str,
    **_kwargs: dict,
):
    """A custom verb that appends a string to a column"""
    # by convention, we typically use "column" as the input column name and "to" as the output column name, but you can use whatever you want
    # just as long as the "args" in the workflow reference match the function signature
    input_data = input.get_input()
    output_df = input_data.copy()
    output_df[target_column] = output_df[source_column].apply(
        lambda x: f"{x}{string_to_append}"
    )
    return TableContainer(table=output_df)


custom_verbs = {
    "str_append": str_append,
}


#### use file create pipeline str_append  verb

In [4]:
import pandas as pd

from pathlib import Path

PIPELINE_YAML = """
workflows:
 - steps:
    - verb: "str_append"  # should be the key that you pass to the custom_verbs dict below
      args: 
        source_column: "col1"
        target_column: "col_1_custom"
        string_to_append: " - custom verb"
"""
pipeline_file =  Path().cwd() / "pipeline.yaml"
with pipeline_file.open("w") as file:
    file.write(PIPELINE_YAML)
    

In [5]:
# Load your config without the input section
config_path = str(pipeline_file)
config_path

'/media/gpt4-pdf-chatbot-langchain/graphrag/examples_notebooks/3_custom_set_of_available_verbs/pipeline.yaml'



### add  custom verbs to use for the pipeline additional_verbs

In [6]:

from graphrag.index import run_pipeline_with_config

outputs = []
async for output in run_pipeline_with_config(
    config_or_path=config_path, dataset=dataset, 
    additional_verbs=custom_verbs,
):
    outputs.append(output)
pipeline_result = outputs[-1]

if pipeline_result.result is not None:
    # Should look something like this, which should be identical to the python example:
    #    col1  col2  col_1_custom
    # 0     2     4  2 - custom verb
    # 1     5    10  5 - custom verb
    print(pipeline_result.result)
else:
    print("No results!")

   col1  col2     col_1_custom
0     2     4  2 - custom verb
1     5    10  5 - custom verb


#### use Python API custom_verbs pipeline  str_append  verb

In [7]:
from graphrag.index.config import PipelineWorkflowReference
workflows: list[PipelineWorkflowReference] = [
    PipelineWorkflowReference(
        name="my_workflow",
        steps=[
            {
                "verb": "str_append",  # should be the key that you pass to the custom_verbs dict below
                "args": {
                    "source_column": "col1",  # from above
                    "target_column": "col_1_custom",  # new column name,
                    "string_to_append": " - custom verb",  # The string to append to the column
                },
                # Since we're trying to act on the default input, we don't need explicitly to specify an input
            }
        ],
    ),
]


##### Run the pipeline

In [8]:
from graphrag.index import run_pipeline 

# Run the pipeline
outputs = []
async for output in run_pipeline(
    dataset=dataset,
    workflows=workflows,
    additional_verbs=custom_verbs,
):
    outputs.append(output)

# Find the result from the workflow we care about
pipeline_result = next(
    (output for output in outputs if output.workflow == "my_workflow"), None
)

if pipeline_result is not None and pipeline_result.result is not None:
    # Should look something like this:
    #    col1  col2     col_1_custom
    # 0     2     4  2 - custom verb
    # 1     5    10  5 - custom verb
    print(pipeline_result.result)
else:
    print("No results!")

   col1  col2     col_1_custom
0     2     4  2 - custom verb
1     5    10  5 - custom verb
