# Tracing 101

Step through this notebook to understand how tracing works in Generative AI Toolkit.

The Generative AI Toolkit comes with these tracers out-of-the-box:


In [1]:
from generative_ai_toolkit.tracer import (
    Tracer,
    NoopTracer,
    HumanReadableTracer,
    InMemoryTracer,
    StructuredLogsTracer,
    TeeTracer,
)
from generative_ai_toolkit.tracer.otlp import OtlpTracer
from generative_ai_toolkit.tracer.dynamodb import DynamoDbTracer

import time
import random

### `InMemoryTracer`

Use the in-memory tracer for testing and development:


In [2]:
in_memory_tracer = InMemoryTracer(
    memory_size=1000  # Store max 1000 traces, before discarding older ones
)

# Context, added to all traces:
in_memory_tracer.set_context(resource_attributes={"service.name": "MyAgent"})

with in_memory_tracer.trace("parent") as parent_span:
    parent_span.add_attribute("foo", "bar")
    parent_span.add_attribute(
        "inherited.foo",
        "bar",
        inheritable=True,  # Inheritable attributes propagate to child spans
    )
    time.sleep(0.1)

    # Nested spans become child spans, that point to the parent (parent_span_id):
    with in_memory_tracer.trace("child") as child_span:
        child_span.add_attribute("bar", "foo")
        time.sleep(0.1)

for trace in in_memory_tracer.get_traces():
    print(trace)
    print()

Trace(span_name='parent', span_kind='INTERNAL', trace_id='bb6aa96134562192852b302a0442836e', span_id='733e7bf26c063afc', parent_span_id=None, started_at=datetime.datetime(2025, 4, 15, 20, 24, 45, 208275, tzinfo=datetime.timezone.utc), ended_at=datetime.datetime(2025, 4, 15, 20, 24, 45, 416516, tzinfo=datetime.timezone.utc), attributes={'foo': 'bar', 'inherited.foo': 'bar'}, span_status='UNSET', resource_attributes={'service.name': 'MyAgent'}, scope=generative-ai-toolkit@current)

Trace(span_name='child', span_kind='INTERNAL', trace_id='bb6aa96134562192852b302a0442836e', span_id='221e9ff65fd55515', parent_span_id='733e7bf26c063afc', started_at=datetime.datetime(2025, 4, 15, 20, 24, 45, 311403, tzinfo=datetime.timezone.utc), ended_at=datetime.datetime(2025, 4, 15, 20, 24, 45, 416474, tzinfo=datetime.timezone.utc), attributes={'bar': 'foo', 'inherited.foo': 'bar'}, span_status='UNSET', resource_attributes={'service.name': 'MyAgent'}, scope=generative-ai-toolkit@current)



### Printing a human-readable version of traces during development

In the following example we add attributes that Generative AI Toolkit understands. It will use these to present traces in a way that is nicer to the human eye:


In [3]:
conversation_id = random.randint(0, 1000000)

with in_memory_tracer.trace("parent", span_kind="SERVER") as parent_span:
    parent_span.add_attribute("ai.conversation.id", conversation_id, inheritable=True)
    parent_span.add_attribute("ai.auth.context", "user123", inheritable=True)
    time.sleep(0.1)

    with in_memory_tracer.trace("child") as child_span:
        child_span.add_attribute("ai.trace.type", "tool-invocation")
        child_span.add_attribute("ai.tool.input", "Hello, world!")
        child_span.add_attribute("ai.tool.output", "World, hello!")
        time.sleep(0.1)


for trace in in_memory_tracer.get_traces(
    attribute_filter={
        "ai.conversation.id": conversation_id  # filter traces by conversation id
    }
):
    print(trace.as_human_readable())
    print()

[94m[44c831615e08cf51a102c23610c863e6/root/7a666bc31ee3ec08][0m [96mMyAgent[0m [92mSERVER[0m 2025-04-15T20:24:45.426Z - parent ([93mai.conversation.id='488044' ai.auth.context='user123'[0m)


[94m[44c831615e08cf51a102c23610c863e6/7a666bc31ee3ec08/d2f2e57d53e96f4c][0m [96mMyAgent[0m [94mINTERNAL[0m 2025-04-15T20:24:45.531Z - child ([93mai.trace.type='tool-invocation' ai.conversation.id='488044' ai.auth.context='user123'[0m)
[90m       Input: Hello, world![0m
[90m      Output: World, hello![0m




### `HumanReadableTracer`

You can also use the `HumanReadableTracer` that will log traces in human readable form to stdout, which is useful during development.

Note that traces are logged when the span ends, so parent spans are logged after child spans (this is true for all tracers):


In [4]:
import sys

human_readable_tracer = HumanReadableTracer(stream=sys.stdout)

human_readable_tracer.set_context(resource_attributes={"service.name": "MyAgent"})

with human_readable_tracer.trace("parent", span_kind="SERVER") as parent_span:
    parent_span.add_attribute("ai.conversation.id", conversation_id, inheritable=True)
    parent_span.add_attribute("ai.auth.context", "user123", inheritable=True)
    time.sleep(0.1)

    with human_readable_tracer.trace("child") as child_span:
        child_span.add_attribute("ai.trace.type", "tool-invocation")
        child_span.add_attribute("ai.tool.input", "Hello, world!")
        child_span.add_attribute("ai.tool.output", "World, hello!")
        time.sleep(0.1)

[94m[a8eafbdecb37764e97ba8377d1a5a70e/6653aa0e28e11ced/5896169d04b6ec25][0m [96mMyAgent[0m [94mINTERNAL[0m 2025-04-15T20:24:45.747Z - child ([93mai.trace.type='tool-invocation' ai.conversation.id='488044' ai.auth.context='user123'[0m)
[90m       Input: Hello, world![0m
[90m      Output: World, hello![0m

[94m[a8eafbdecb37764e97ba8377d1a5a70e/root/6653aa0e28e11ced][0m [96mMyAgent[0m [92mSERVER[0m 2025-04-15T20:24:45.641Z - parent ([93mai.conversation.id='488044' ai.auth.context='user123'[0m)



### `StructuredLogsTracer`

Use the `StructuredLogsTracer` to log traces to stdout as JSON:


In [5]:
structured_logs_tracer = StructuredLogsTracer(stream=sys.stdout)

structured_logs_tracer.set_context(resource_attributes={"service.name": "MyAgent"})

with structured_logs_tracer.trace("parent", span_kind="SERVER") as parent_span:
    parent_span.add_attribute("ai.conversation.id", conversation_id, inheritable=True)
    parent_span.add_attribute("ai.auth.context", "user123", inheritable=True)
    time.sleep(0.1)

    with structured_logs_tracer.trace("child") as child_span:
        child_span.add_attribute("ai.trace.type", "tool-invocation")
        child_span.add_attribute("ai.tool.input", "Hello, world!")
        child_span.add_attribute("ai.tool.output", "World, hello!")
        time.sleep(0.1)

{"logger":"TraceLogger","level":"INFO","message":"Trace","trace":{"span_name":"child","span_kind":"INTERNAL","trace_id":"92d604b5ae48e49b0917fa15fc54d124","span_id":"7c284f55ae4ac7ce","parent_span_id":"c2b1a56889b6687e","started_at":"2025-04-15 20:24:45.972976+00:00","ended_at":"2025-04-15 20:24:46.077698+00:00","attributes":{"ai.trace.type":"tool-invocation","ai.tool.input":"Hello, world!","ai.tool.output":"World, hello!","ai.conversation.id":488044,"ai.auth.context":"user123"},"span_status":"UNSET","resource_attributes":{"service.name":"MyAgent"},"scope":{"name":"generative-ai-toolkit","version":"current"}}}
{"logger":"TraceLogger","level":"INFO","message":"Trace","trace":{"span_name":"parent","span_kind":"SERVER","trace_id":"92d604b5ae48e49b0917fa15fc54d124","span_id":"c2b1a56889b6687e","parent_span_id":null,"started_at":"2025-04-15 20:24:45.867530+00:00","ended_at":"2025-04-15 20:24:46.079694+00:00","attributes":{"ai.conversation.id":488044,"ai.auth.context":"user123"},"span_status

### `DynamoDbTracer`

Use the `DynamoDbTracer` to store traces to DynamoDB.

To use this tracer, you should have created a table with partition key `pk` (string) and sort key `sk` (string).

If you want to support getting traces by conversation ID, the table must have a GSI with partition key `conversation_id` (string) and sort key `sk` (string).

For example, here's how to create such a table:


In [6]:
!aws dynamodb create-table \
  --table-name MyTracesTable \
  --attribute-definitions \
    AttributeName=pk,AttributeType=S \
    AttributeName=sk,AttributeType=S \
    AttributeName=conversation_id,AttributeType=S \
  --key-schema \
    AttributeName=pk,KeyType=HASH \
    AttributeName=sk,KeyType=RANGE \
  --billing-mode PAY_PER_REQUEST \
  --global-secondary-indexes '[{"IndexName":"conversation_index","KeySchema":[{"AttributeName":"conversation_id","KeyType":"HASH"},{"AttributeName":"sk","KeyType":"RANGE"}],"Projection":{"ProjectionType":"ALL"}}]'


An error occurred (ResourceInUseException) when calling the CreateTable operation: Table already exists: MyTracesTable


Then, use that table in the `DynamoDbTracer`:


In [7]:
conversation_id = random.randint(0, 1000000)
auth_context = "user123"

ddb_tracer = DynamoDbTracer(
    table_name="MyTracesTable",
    identifier="MyAgent",
    conversation_id_gsi_name="conversation_index",
)

ddb_tracer.set_context(resource_attributes={"service.name": "MyAgent"})

with ddb_tracer.trace("parent", span_kind="SERVER") as parent_span:
    parent_span.add_attribute("ai.conversation.id", conversation_id, inheritable=True)
    parent_span.add_attribute("ai.auth.context", auth_context, inheritable=True)
    time.sleep(0.1)

    with ddb_tracer.trace("child") as child_span:
        child_span.add_attribute("ai.trace.type", "tool-invocation")
        child_span.add_attribute("ai.tool.input", "Hello, world!")
        child_span.add_attribute("ai.tool.output", "World, hello!")
        time.sleep(0.1)


for trace in ddb_tracer.get_traces(
    attribute_filter={
        "ai.conversation.id": conversation_id,
        "ai.auth.context": auth_context,
    }
):
    print(trace.as_human_readable())
    print()

[94m[932aebd8017bc6c3ae15180bef936b7f/root/b982643bda353e6d][0m [96mMyAgent[0m [92mSERVER[0m 2025-04-15T20:24:47.538Z - parent ([93mai.conversation.id='975556' ai.auth.context='user123'[0m)


[94m[932aebd8017bc6c3ae15180bef936b7f/b982643bda353e6d/b4d55d8612549668][0m [96mMyAgent[0m [94mINTERNAL[0m 2025-04-15T20:24:47.643Z - child ([93mai.trace.type='tool-invocation' ai.conversation.id='975556' ai.auth.context='user123'[0m)
[90m       Input: Hello, world![0m
[90m      Output: World, hello![0m




### `OtlpTracer`

The `OtlpTracer` logs traces in Open Telemetry protobuf format. It expects you to run an Open Telemetry collector, that it can send the traces to. By default, it expects the collector to be run on localhost port 4318.

You can use the `OtlpTracer` to send traces to AWS X-Ray. To make that work, you can run the [ADOT collector](https://github.com/aws-observability/aws-otel-collector) locally:


In [8]:
# Create the ADOT config file:

yaml_content = """\
receivers:
  otlp:
    protocols:
      http:
        endpoint: 0.0.0.0:4318

processors:
  batch/traces:
    timeout: 10s
    send_batch_size: 50

exporters:
  awsxray:
    region: eu-central-1
    indexed_attributes:
      - ai.conversation.id

service:
  pipelines:
    traces:
      receivers: [otlp]
      processors: [batch/traces]
      exporters: [awsxray]
"""

with open("adot-config.yaml", "w") as f:
    f.write(yaml_content)

Run the ADOT collector in the background. Note that the following example assumes `AWS_REGION`, `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_SESSION_TOKEN` are available as environment variables:


In [9]:
!docker run --rm -d --name adot-collector \
  -p 4318:4318 \
  -e AWS_REGION \
  -e AWS_ACCESS_KEY_ID \
  -e AWS_SECRET_ACCESS_KEY \
  -e AWS_SESSION_TOKEN \
  -v $(pwd)/adot-config.yaml:/etc/collector-config.yaml \
  public.ecr.aws/aws-observability/aws-otel-collector:latest \
  --config=/etc/collector-config.yaml
!sleep 2 # wait for the collector to start
!docker logs adot-collector

7f9f2871f2e6c8776f46726f1088618bf8eaffa9cecdd8f24adb578d7f4dc83d
2025/04/15 20:24:48 ADOT Collector version: v0.43.1
2025/04/15 20:24:48 found no extra config, skip it, err: open /opt/aws/aws-otel-collector/etc/extracfg.txt: no such file or directory
2025/04/15 20:24:48 attn: users of the `datadog`, `logzio`, `sapm`, `signalfx` exporter components. please refer to https://github.com/aws-observability/aws-otel-collector/issues/2734 in regards to an upcoming ADOT Collector breaking change
2025-04-15T20:24:48.296Z	info	service@v0.117.0/service.go:164	Setting up own telemetry...
2025-04-15T20:24:48.296Z	info	telemetry/metrics.go:70	Serving metrics	{"address": "localhost:8888", "metrics level": "Normal"}
2025-04-15T20:24:48.297Z	info	service@v0.117.0/service.go:230	Starting aws-otel-collector...	{"Version": "v0.43.1", "NumCPU": 2}
2025-04-15T20:24:48.297Z	info	extensions/extensions.go:39	Starting extensions...
2025-04-15T20:24:48.297Z	info	otlpreceiver@v0.117.0/otlp.go:169	Starting HTTP ser

Then, send traces to AWS X-Ray by using the `OtlpTracer`:


In [10]:
otlp_tracer = OtlpTracer()

otlp_tracer.set_context(resource_attributes={"service.name": "MyAgent"})

with otlp_tracer.trace("parent", span_kind="SERVER") as parent_span:
    parent_span.add_attribute("ai.conversation.id", "123456", inheritable=True)
    parent_span.add_attribute("ai.auth.context", "user123", inheritable=True)
    time.sleep(0.1)

    with otlp_tracer.trace("child") as child_span:
        child_span.add_attribute("ai.trace.type", "tool-invocation")
        child_span.add_attribute("ai.tool.input", "Hello, world!")
        child_span.add_attribute("ai.tool.output", "World, hello!")
        time.sleep(0.1)

If that seems to work, but you don't see traces appear in AWS X-Ray, check the ADOT container logs. E.g. there may be a permission issue if your AWS credentials have expired:


In [None]:
!docker logs adot-collector

### `NoopTracer`

Use the no-operation tracer when you don't want traces:


In [12]:
noop_tracer = NoopTracer()
with noop_tracer.trace("noop") as span:
    span.add_attribute("foo", "bar")

# nothing was logged

### `TeeTracer`

Use the `TeeTracer` to send traces to multiple tracers at once.

Note that the first tracer you add, will be the one that `get_traces()` will be delegated to. So if you want to use that method, use a tracer that supports it.

Add tracers like this:


In [13]:
tee_tracer = TeeTracer()

# E.g. the DynamoDBTracer supports get_traces(), so add that first:
tee_tracer.add_tracer(ddb_tracer)

tee_tracer.add_tracer(human_readable_tracer)

# This is of course useless, but added for the sake of the example:
tee_tracer.add_tracer(noop_tracer)

Then, use the `TeeTracer` as any other tracer:


In [14]:
conversation_id = random.randint(0, 1000000)
auth_context = "user456"

tee_tracer.set_context(resource_attributes={"service.name": "MyAgent"})

with tee_tracer.trace("parent", span_kind="SERVER") as parent_span:
    parent_span.add_attribute("ai.conversation.id", conversation_id, inheritable=True)
    parent_span.add_attribute("ai.auth.context", auth_context, inheritable=True)
    time.sleep(0.1)

    with tee_tracer.trace("child") as child_span:
        child_span.add_attribute("ai.trace.type", "tool-invocation")
        child_span.add_attribute("ai.tool.input", "Hello, world!")
        child_span.add_attribute("ai.tool.output", "World, hello!")
        time.sleep(0.1)


print("==== from DynamoDB: ====")
for trace in tee_tracer.get_traces(
    attribute_filter={
        "ai.conversation.id": conversation_id,
        "ai.auth.context": auth_context,
    }
):
    print(trace)
    print()

[94m[935a6722bac8d30cdf2720d9ce61e997/0e777bca39188c70/910512fd98ecd67c][0m [96mMyAgent[0m [94mINTERNAL[0m 2025-04-15T20:24:51.652Z - child ([93mai.trace.type='tool-invocation' ai.conversation.id='726828' ai.auth.context='user456'[0m)
[90m       Input: Hello, world![0m
[90m      Output: World, hello![0m

[94m[935a6722bac8d30cdf2720d9ce61e997/root/0e777bca39188c70][0m [96mMyAgent[0m [92mSERVER[0m 2025-04-15T20:24:51.546Z - parent ([93mai.conversation.id='726828' ai.auth.context='user456'[0m)

==== from DynamoDB: ====
Trace(span_name='parent', span_kind='SERVER', trace_id='935a6722bac8d30cdf2720d9ce61e997', span_id='0e777bca39188c70', parent_span_id=None, started_at=datetime.datetime(2025, 4, 15, 20, 24, 51, 546951, tzinfo=datetime.timezone.utc), ended_at=datetime.datetime(2025, 4, 15, 20, 24, 51, 798469, tzinfo=datetime.timezone.utc), attributes={'ai.conversation.id': 726828, 'ai.auth.context': 'user456'}, span_status='UNSET', resource_attributes={'service.name': 'My

### `@traced` decorator

Rather than wrapping your code inside `with` statements to add tracing, you can also you use the `@traced` decorator with your functions, to trace their execution:


In [15]:
from generative_ai_toolkit.tracer import traced

in_memory_tracer = InMemoryTracer()


@traced("parent", tracer=in_memory_tracer)
def parent_fn():
    child_fn()
    time.sleep(0.1)


@traced("child", tracer=in_memory_tracer)
def child_fn():
    time.sleep(0.1)

Now, when you execute these functions, they will be traced:


In [16]:
parent_fn()

for trace in in_memory_tracer.get_traces():
    print(trace)

Trace(span_name='parent', span_kind='INTERNAL', trace_id='cee7cac3f84344bb7963208f1971774e', span_id='fba61a8cc2eb0b85', parent_span_id=None, started_at=datetime.datetime(2025, 4, 15, 20, 24, 51, 878300, tzinfo=datetime.timezone.utc), ended_at=datetime.datetime(2025, 4, 15, 20, 24, 52, 88690, tzinfo=datetime.timezone.utc), attributes={}, span_status='UNSET', resource_attributes={}, scope=generative-ai-toolkit@current)
Trace(span_name='child', span_kind='INTERNAL', trace_id='cee7cac3f84344bb7963208f1971774e', span_id='d3e4a3a5bc127921', parent_span_id='fba61a8cc2eb0b85', started_at=datetime.datetime(2025, 4, 15, 20, 24, 51, 878477, tzinfo=datetime.timezone.utc), ended_at=datetime.datetime(2025, 4, 15, 20, 24, 51, 983571, tzinfo=datetime.timezone.utc), attributes={}, span_status='UNSET', resource_attributes={}, scope=generative-ai-toolkit@current)


In order to add attributes to the trace, you can access the `current_trace` attribute for the tracer. Accessing that attribute only works within the context of a trace:


In [17]:
in_memory_tracer = InMemoryTracer()


@traced("parent", tracer=in_memory_tracer)
def parent_fn2():
    in_memory_tracer.current_trace.add_attribute("foo", "bar", inheritable=True)
    child_fn2()
    time.sleep(0.1)


@traced("child", tracer=in_memory_tracer)
def child_fn2():
    in_memory_tracer.current_trace.add_attribute("bar", "foo")
    time.sleep(0.1)


parent_fn2()

for trace in in_memory_tracer.get_traces():
    print(trace)

Trace(span_name='parent', span_kind='INTERNAL', trace_id='442f3cdb25ab4f2428f3c65db43fbe66', span_id='89e2e8404826d2fb', parent_span_id=None, started_at=datetime.datetime(2025, 4, 15, 20, 24, 52, 98054, tzinfo=datetime.timezone.utc), ended_at=datetime.datetime(2025, 4, 15, 20, 24, 52, 307644, tzinfo=datetime.timezone.utc), attributes={'foo': 'bar'}, span_status='UNSET', resource_attributes={}, scope=generative-ai-toolkit@current)
Trace(span_name='child', span_kind='INTERNAL', trace_id='442f3cdb25ab4f2428f3c65db43fbe66', span_id='815da32ef95de724', parent_span_id='89e2e8404826d2fb', started_at=datetime.datetime(2025, 4, 15, 20, 24, 52, 98154, tzinfo=datetime.timezone.utc), ended_at=datetime.datetime(2025, 4, 15, 20, 24, 52, 203235, tzinfo=datetime.timezone.utc), attributes={'bar': 'foo', 'foo': 'bar'}, span_status='UNSET', resource_attributes={}, scope=generative-ai-toolkit@current)


If the first argument to your function has a `tracer` attribute, you don't need to specify a `tracer` explicitly. E.g. within a class with a `tracer` attribute, you can decorate methods with `@traced`, i.e. without explicitly passing the tracer, as below:


In [18]:
in_memory_tracer = InMemoryTracer()


class MyAgent:
    def __init__(self, tracer: Tracer) -> None:
        self._tracer = tracer

    @property
    def tracer(self):
        return self._tracer

    @traced
    def parent_method(self):
        self.tracer.current_trace.add_attribute("foo", "bar", inheritable=True)
        self.child_method()
        time.sleep(0.1)

    @traced
    def child_method(self):
        self.tracer.current_trace.add_attribute("bar", "foo")
        time.sleep(0.1)


agent = MyAgent(in_memory_tracer)
agent.parent_method()

for trace in in_memory_tracer.get_traces():
    print(trace)

Trace(span_name='parent_method', span_kind='INTERNAL', trace_id='7add7807c1842b3a0457dca77f542b90', span_id='e8f7ebb7ad975fc2', parent_span_id=None, started_at=datetime.datetime(2025, 4, 15, 20, 24, 52, 326268, tzinfo=datetime.timezone.utc), ended_at=datetime.datetime(2025, 4, 15, 20, 24, 52, 534291, tzinfo=datetime.timezone.utc), attributes={'foo': 'bar'}, span_status='UNSET', resource_attributes={}, scope=generative-ai-toolkit@current)
Trace(span_name='child_method', span_kind='INTERNAL', trace_id='7add7807c1842b3a0457dca77f542b90', span_id='3ab7762eaf8d0657', parent_span_id='e8f7ebb7ad975fc2', started_at=datetime.datetime(2025, 4, 15, 20, 24, 52, 326342, tzinfo=datetime.timezone.utc), ended_at=datetime.datetime(2025, 4, 15, 20, 24, 52, 431435, tzinfo=datetime.timezone.utc), attributes={'bar': 'foo', 'foo': 'bar'}, span_status='UNSET', resource_attributes={}, scope=generative-ai-toolkit@current)


### Developing your own tracer

It's easy to develop your own tracers that can be used with the Generative AI Toolkit.

In the simplest case, you inherit from `BaseTracer` and only have to implement the `persist` method:


In [None]:
from generative_ai_toolkit.tracer import BaseTracer, Trace


class MyTracer(BaseTracer):

    def persist(self, trace: Trace):
        print(trace.as_human_readable())  # This is what the `HumanReadableTracer` does


my_tracer = MyTracer()

with my_tracer.trace("span") as span:
    span.add_attribute("foo", "bar")

[94m[0ac7331734bb764db64f2d74cf7a1e60/root/ccd06d55d6c92c4b][0m [96m<missing service.name>[0m [94mINTERNAL[0m 2025-04-15T20:24:52.550Z - span

