In [1]:
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

### verb
#### init csv file

In [2]:
# our fake dataset

import pandas as pd
import os

dataset = pd.DataFrame([{"col1": 2, "col2": 4}, {"col1": 5, "col2": 10}])

if not os.path.exists('input'):
    os.makedirs('input')
dataset.to_csv('input/dataset.csv', index=False)

#### file create pipeline  verb 

In [3]:
import pandas as pd

from pathlib import Path

PIPELINE_YAML = """
input:
  file_type: csv
  base_dir: ./input
  file_pattern: .*\.csv$
workflows:
  - steps:
      - verb: derive # https://github.com/microsoft/datashaper/blob/main/python/datashaper/datashaper/engine/verbs/derive.py
        args:
          column1: "col1"
          column2: "col2"
          to: "col_multiplied"
          operator: "*"

"""
pipeline_file =  Path().cwd() / "pipeline.yaml"
with pipeline_file.open("w") as file:
    file.write(PIPELINE_YAML)
    

In [4]:
# Load your config without the input section
config_path = str(pipeline_file)
config_path

'/media/gpt4-pdf-chatbot-langchain/graphrag/examples_notebooks/2_single_verb/pipeline.yaml'

#### start pipeline workflow verb

In [5]:
from graphrag.index import run_pipeline_with_config
tables = []
async for table in run_pipeline_with_config(
    config_or_path=config_path, dataset=dataset
):
    tables.append(table)
pipeline_result = tables[-1]

if pipeline_result.result is not None:
    # Should look something like this, which should be identical to the python example:
    #    col1  col2  col_multiplied
    # 0     2     4               8
    # 1     5    10              50
    print(pipeline_result.result)
else:
    print("No results!")

   col1  col2  col_multiplied
0     2     4               8
1     5    10              50


#### use Python API verbs pipeline 

In [6]:
from graphrag.index.config import PipelineWorkflowReference
"""Run a pipeline using the python API"""
workflows: list[PipelineWorkflowReference] = [
    PipelineWorkflowReference(
        steps=[
            {
                # built-in verb
                "verb": "derive",  # https://github.com/microsoft/datashaper/blob/main/python/datashaper/datashaper/engine/verbs/derive.py
                "args": {
                    "column1": "col1",  # from above
                    "column2": "col2",  # from above
                    "to": "col_multiplied",  # new column name
                    "operator": "*",  # multiply the two columns
                },
                # Since we're trying to act on the default input, we don't need explicitly to specify an input
            }
        ]
    ),
]


##### Run the pipeline

In [7]:
from graphrag.index import run_pipeline
# Grab the last result from the pipeline, should be our entity extraction
tables = []
async for table in run_pipeline(dataset=dataset, workflows=workflows):
    tables.append(table)
pipeline_result = tables[-1]

if pipeline_result.result is not None:
    # Should look something like this:
    #    col1  col2  col_multiplied
    # 0     2     4               8
    # 1     5    10              50
    print(pipeline_result.result)
else:
    print("No results!")

   col1  col2  col_multiplied
0     2     4               8
1     5    10              50
