# Processing a File

In this example, we are defining a recipe on the go, without using a recipe file.
We are also introducing a new input type: a file.

In [4]:

recipe_template = {
  "definitions": {
    "document_summary_prompt": "Please summarize the following document content:\n\n---\n{{extract_content.content}}\n---\n\nSummary:",
    "summarizer_model": {
      "provider": "openai",
      "model": "gpt-4o-mini",
      "temperature": 0.7,
      "max_tokens": 1500
    },
  },

  "recipe": {
    "name": "File Content Summarizer",
    "user_inputs": [
      {
        "id": "document_to_summarize",
        "label": "Document to summarize",
        "type": "file",
        "required": True
      }
    ],
    "nodes": [
      {
        "id": "extract_content",
        "type": "function_task",
        "function_identifier": "extract_file_content",
        "input": {
          "file_path": "document_to_summarize"
        }
      },
      {
        "id": "summarize_document_content",
        "type": "language_task",
        "model": "@summarizer_model",
        "prompt": "@document_summary_prompt"
      }
    ],
    "edges": [
      "extract_content to summarize_document_content"
    ],
    "final_outputs": [
      "summarize_document_content"
    ]
  }
}


In [None]:
# let's use the content-core library to extract content from text and URL inputs
# this will import the function for us to register here
from file_processing import extract_file_content

from content_composer.registry import RegistryScope, get_registry

registry = get_registry()
    
# Register the function in the registry
registry.register(
    identifier="extract_file_content",
    function=extract_file_content,
    description="Extracts content from files using content_core library",
    tags=["file", "extraction", "content"],
)
    




False

In [6]:
from content_composer import parse_recipe, execute_workflow

# workflows can be defined in multiple formats, this example uses a json file
recipe = parse_recipe(recipe_template)
recipe

Recipe(name='File Content Summarizer', user_inputs=[UserInput(id='document_to_summarize', label='Document to summarize', type='file', description=None, default=None, required=True, literal_values=None)], nodes=[Node(id='extract_content', type=<NodeType.FUNCTION_TASK: 'function_task'>, description=None, prompt=None, model=None, function_identifier='extract_file_content', input={'file_path': 'document_to_summarize'}, output='extract_content', recipe_path=None, input_mapping=None, output_mapping=None, map_task_definition=None, map_over_key=None, map_on_error='halt', map_execution_mode='parallel'), Node(id='summarize_document_content', type=<NodeType.LANGUAGE_TASK: 'language_task'>, description=None, prompt='Please summarize the following document content:\n\n---\n{{extract_content.content}}\n---\n\nSummary:', model=ModelConfig(provider='openai', model='gpt-4o-mini', temperature=0.7), function_identifier=None, input=None, output='summarize_document_content', recipe_path=None, input_mapping

In [7]:

inputs = {"document_to_summarize": "2503.18238v1.pdf",}
outputs = await execute_workflow(
    recipe,
    inputs
)

[32m2025-06-20 17:41:23.159[0m | [1mINFO    [0m | [36mcontent_composer.core_functions.file_processing[0m:[36mextract_file_content[0m:[36m22[0m - [1m[Core Function] extract_file_content called with file: 2503.18238v1.pdf[0m
[32m2025-06-20 17:44:34.470[0m | [1mINFO    [0m | [36mcontent_composer.core_functions.file_processing[0m:[36mextract_file_content[0m:[36m55[0m - [1mSuccessfully extracted content from 2503.18238v1.pdf[0m


In [8]:
outputs

{'summarize_document_content': 'The document titled "Collaborating with AI Agents: Field Experiments on Teamwork, Productivity, and Performance" by Harang Ju and Sinan Aral from MIT presents research on how AI agents influence productivity and collaboration in team settings. The study introduces a platform named MindMeld, designed for real-time collaboration between humans and AI agents, allowing for the analysis of communication, workflow, and productivity.\n\nIn a large-scale marketing experiment with 2,310 participants, teams were randomly assigned to either human-human or human-AI collaborations. The teams exchanged vast amounts of messages and created numerous advertisements, revealing that collaboration with AI agents led to a 137% increase in communication and allowed participants to focus more on content generation rather than editing. Specifically, Human-AI teams produced higher quality ad text while Human-Human teams excelled in image quality. \n\nThe findings indicated that 