Profound API

This notebook demonstrates how to replicate Profound's frontend functionality using the `profound` Python library.

**API Documentation**: [https://docs.tryprofound.com/rest-api](https://docs.tryprofound.com/rest-api)

## Table of Contents

- [Setup](#setup)
- [Configuration](#configuration)
- [Common Filters](#common-filters)
- [Helper Functions](#helper-functions)
- [Visibility Reports](#visibility-reports)
- [Citations Reports](#citations-reports)

## Setup

Install dependencies using `uv`:

```bash
uv sync
```

In [None]:
import os
import datetime
from typing import Any, Dict, List, Optional

import pytz
import pandas as pd

import profound

profound_api_key = os.getenv(
    "PROFOUND_API_KEY", "change-me!"
)  # Provide using the environmental variable PROFOUND_API_KEY, or change the fallback value
profound_api_host = os.getenv("PROFOUND_API_HOST", "https://api.tryprofound.com")
client = profound.Client(api_key=profound_api_key, base_url=profound_api_host)

## Configuration

Set your API credentials and query parameters:


In [None]:
DEFAULT_TIMEZONE = pytz.timezone("America/New_York")
category_id = "00000000-0000-0000-0000-000000000000"
domain_url = "example.com"

today = datetime.datetime.now(DEFAULT_TIMEZONE).date()
start_date = today - datetime.timedelta(days=7)
end_date = today

## Common Filters

Define reusable filter configurations:


In [None]:
common_filters = {
    "prompts_visibility": {"field": "prompt_type", "operator": "is", "value": "visibility"},
    "region_us": {"field": "region_id", "operator": "in", "value": ["3c37529b-e592-43a9-839a-14bee2673a6b"]},
}

## Helper Functions

### Core API Functions


In [None]:
def clean_payload(payload: Dict[str, Any]) -> Dict[str, Any]:
    """
    Remove None values from payload dictionary without mutating the original.

    Args:
        payload: Dictionary containing API request parameters

    Returns:
        New dictionary with None values removed

    Example:
        >>> payload = {"filters": None, "pagination": None, "category_id": "123"}
        >>> clean_payload(payload)
        {'category_id': '123'}
    """
    return {k: v for k, v in payload.items() if v is not None}


def format_response_data(response) -> List[Dict[str, Any]]:
    """
    Extract and format response data from API response.

    Args:
        response: Response object from Profound API (has .info and .data attributes)

    Returns:
        List of dictionaries containing dimension and metric values

    Raises:
        KeyError: If response structure is unexpected

    Example:
        >>> response = client.reports.citations(...)
        >>> data = format_response_data(response)
        >>> df = pd.DataFrame(data)
    """
    query_info = response.info.query
    dim_names = query_info["dimensions"]
    metric_names = query_info["metrics"]
    results = []
    for row in response.data:
        result = {}
        for i, name in enumerate(dim_names):
            result[name] = row.dimensions[i]
        for i, name in enumerate(metric_names):
            result[name] = row.metrics[i]
        results.append(result)
    return results


def build_payload(
    category_id: str,
    start_date: str,
    end_date: str,
    metrics: List[str],
    dimensions: Optional[List[str]] = None,
    filters: Optional[List[Dict[str, Any]]] = None,
    pagination: Optional[Dict[str, int]] = None,
) -> Dict[str, Any]:
    """
    Build API payload dictionary for report queries.

    Args:
        category_id: Category identifier for the query
        start_date: Start date in ISO format (YYYY-MM-DD)
        end_date: End date in ISO format (YYYY-MM-DD)
        metrics: List of metric names to retrieve
        dimensions: Optional list of dimension names to group by
        filters: Optional list of filter dictionaries
        pagination: Optional pagination dict with 'limit' and 'offset' keys

    Returns:
        Dictionary ready to be sent as JSON payload

    Example:
        >>> payload = build_payload(
        ...     category_id="123",
        ...     start_date="2025-01-01",
        ...     end_date="2025-01-07",
        ...     metrics=["count"],
        ...     dimensions=["root_domain"],
        ... )
    """
    payload = {
        "category_id": category_id,
        "start_date": start_date,
        "end_date": end_date,
        "metrics": metrics,
    }
    if dimensions is not None:
        payload["dimensions"] = dimensions
    if filters is not None:
        payload["filters"] = filters
    if pagination is not None:
        payload["pagination"] = pagination
    return payload


def query_report(report_type: str, payload: Dict[str, Any]) -> pd.DataFrame:
    """
    Query a report endpoint and return results as a pandas DataFrame.

    This function handles cleaning the payload, making the API request,
    and formatting the response into a DataFrame with proper column names.

    Args:
        report_type: Type of report - either "citations" or "visibility"
        payload: Dictionary containing request parameters. None values for
                 'filters' and 'pagination' will be automatically removed.

    Returns:
        DataFrame with columns corresponding to dimensions and metrics

    Raises:
        KeyError: If response structure is unexpected

    Example:
        >>> df = query_report(
        ...     "citations",
        ...     {
        ...         "category_id": "123",
        ...         "start_date": "2025-01-01",
        ...         "end_date": "2025-01-07",
        ...         "metrics": ["count"],
        ...         "filters": None,
        ...         "pagination": None,
        ...     },
        ... )
    """
    clean_payload_dict = clean_payload(payload)

    if report_type == "citations":
        response = client.reports.citations(**clean_payload_dict)
    elif report_type == "visibility":
        response = client.reports.visibility(**clean_payload_dict)
    else:
        raise ValueError(f"Unknown report type: {report_type}. Must be 'citations' or 'visibility'")

    results = format_response_data(response)
    return pd.DataFrame(results)

### Visibility Report Functions

Functions for querying visibility reports. See [API Documentation](https://docs.tryprofound.com/api-reference/reports/query-visibility) for details.


In [None]:
def get_visibility_share_by_date(
    category_id: str,
    start_date: str,
    end_date: str,
    filters: Optional[List[Dict[str, Any]]] = None,
    pagination: Optional[Dict[str, int]] = None,
) -> pd.DataFrame:
    """
    Get visibility scores by date and asset name.

    Args:
        category_id: Category identifier for the query
        start_date: Start date in ISO format (YYYY-MM-DD)
        end_date: End date in ISO format (YYYY-MM-DD)
        filters: Optional list of filter dictionaries. Each filter should have
                 'field', 'operator', and 'value' keys.
        pagination: Optional pagination dict with 'limit' and 'offset' keys

    Returns:
        DataFrame with columns ['date', 'asset_name', 'visibility_score'],
        sorted by date (ascending) and visibility_score (descending)

    Example:
        >>> df = get_visibility_share_by_date(
        ...     category_id="123", start_date="2025-01-01", end_date="2025-01-07", filters=[common_filters["region_us"]]
        ... )
    """
    payload = build_payload(
        category_id=category_id,
        start_date=start_date,
        end_date=end_date,
        metrics=["visibility_score"],
        dimensions=["date", "asset_name"],
        filters=filters,
        pagination=pagination,
    )
    visibility_df = query_report("visibility", payload)
    return visibility_df.sort_values(["date", "visibility_score"], ascending=[True, False])


def get_visibility_score_rank(
    category_id: str,
    start_date: str,
    end_date: str,
    filters: Optional[List[Dict[str, Any]]] = None,
    pagination: Optional[Dict[str, int]] = None,
) -> pd.DataFrame:
    """
    Get visibility score rankings by asset name.

    Args:
        category_id: Category identifier for the query
        start_date: Start date in ISO format (YYYY-MM-DD)
        end_date: End date in ISO format (YYYY-MM-DD)
        filters: Optional list of filter dictionaries
        pagination: Optional pagination dict with 'limit' and 'offset' keys

    Returns:
        DataFrame with columns ['asset_name', 'visibility_score']

    Example:
        >>> df = get_visibility_score_rank(category_id="123", start_date="2025-01-01", end_date="2025-01-07")
    """
    payload = build_payload(
        category_id=category_id,
        start_date=start_date,
        end_date=end_date,
        metrics=["visibility_score"],
        dimensions=["asset_name"],
        filters=filters,
        pagination=pagination,
    )
    return query_report("visibility", payload)


def get_visibility_share_of_voice_rank(
    category_id: str,
    start_date: str,
    end_date: str,
    filters: Optional[List[Dict[str, Any]]] = None,
    pagination: Optional[Dict[str, int]] = None,
) -> pd.DataFrame:
    """
    Get share of voice rankings by asset name.

    Args:
        category_id: Category identifier for the query
        start_date: Start date in ISO format (YYYY-MM-DD)
        end_date: End date in ISO format (YYYY-MM-DD)
        filters: Optional list of filter dictionaries
        pagination: Optional pagination dict with 'limit' and 'offset' keys

    Returns:
        DataFrame with columns ['asset_name', 'share_of_voice']

    Example:
        >>> df = get_visibility_share_of_voice_rank(category_id="123", start_date="2025-01-01", end_date="2025-01-07")
    """
    payload = build_payload(
        category_id=category_id,
        start_date=start_date,
        end_date=end_date,
        metrics=["share_of_voice"],
        dimensions=["asset_name"],
        filters=filters,
        pagination=pagination,
    )
    return query_report("visibility", payload)


def get_visibility_rankings_by_topic(
    category_id: str,
    start_date: str,
    end_date: str,
    filters: Optional[List[Dict[str, Any]]] = None,
    pagination: Optional[Dict[str, int]] = None,
) -> pd.DataFrame:
    """
    Get visibility score rankings broken down by topic and asset.

    Retrieves visibility scores for each asset within each topic, allowing
    analysis of competitive positioning across different topic areas.

    Args:
        category_id: Category identifier for the query
        start_date: Start date in ISO format (YYYY-MM-DD)
        end_date: End date in ISO format (YYYY-MM-DD)
        filters: Optional list of filter dictionaries. Each filter should have
                 'field', 'operator', and 'value' keys.
        pagination: Optional pagination dict with 'limit' and 'offset' keys

    Returns:
        DataFrame with columns ['topic', 'asset_name', 'visibility_score'],
        sorted by topic (ascending), visibility_score (descending), and
        asset_name (ascending)

    Example:
        >>> df = get_visibility_rankings_by_topic(
        ...     category_id="123", start_date="2025-01-01", end_date="2025-01-07", filters=[common_filters["region_us"]]
        ... )
        >>> # View top assets per topic
        >>> df.groupby("topic").head(10)
    """
    payload = build_payload(
        category_id=category_id,
        start_date=start_date,
        end_date=end_date,
        metrics=["visibility_score"],
        dimensions=["asset_name", "topic"],
        filters=filters,
        pagination=pagination,
    )
    visibility_df = query_report("visibility", payload)
    visibility_df = visibility_df[["topic", "asset_name", "visibility_score"]]

    return (
        visibility_df.assign(
            _topic=visibility_df["topic"].str.lower(),
            _asset=visibility_df["asset_name"].str.lower(),
        )
        .sort_values(["_topic", "visibility_score", "_asset"], ascending=[True, False, True])
        .drop(columns=["_topic", "_asset"])
    )

### Citations Report Functions

Functions for querying citations reports. See [API Documentation](https://docs.tryprofound.com/api-reference/reports/query-citations) for details.


In [None]:
def get_citations_count(
    category_id: str,
    start_date: str,
    end_date: str,
    filters: Optional[List[Dict[str, Any]]] = None,
    pagination: Optional[Dict[str, int]] = None,
) -> pd.DataFrame:
    """
    Get total citation count.

    Args:
        category_id: Category identifier for the query
        start_date: Start date in ISO format (YYYY-MM-DD)
        end_date: End date in ISO format (YYYY-MM-DD)
        filters: Optional list of filter dictionaries
        pagination: Optional pagination dict with 'limit' and 'offset' keys

    Returns:
        DataFrame with a single row containing the 'count' column

    Example:
        >>> df = get_citations_count(
        ...     category_id="123",
        ...     start_date="2025-01-01",
        ...     end_date="2025-01-07",
        ...     filters=[common_filters["prompts_visibility"]],
        ... )
    """
    payload = build_payload(
        category_id=category_id,
        start_date=start_date,
        end_date=end_date,
        metrics=["count"],
        filters=filters,
        pagination=pagination,
    )
    return query_report("citations", payload)


def get_citation_share(
    category_id: str,
    start_date: str,
    end_date: str,
    filters: Optional[List[Dict[str, Any]]] = None,
    pagination: Optional[Dict[str, int]] = None,
) -> pd.DataFrame:
    """
    Get citation share of voice by root domain.

    Args:
        category_id: Category identifier for the query
        start_date: Start date in ISO format (YYYY-MM-DD)
        end_date: End date in ISO format (YYYY-MM-DD)
        filters: Optional list of filter dictionaries
        pagination: Optional pagination dict with 'limit' and 'offset' keys

    Returns:
        DataFrame with columns ['root_domain', 'share_of_voice']

    Example:
        >>> df = get_citation_share(category_id="123", start_date="2025-01-01", end_date="2025-01-07")
    """
    payload = build_payload(
        category_id=category_id,
        start_date=start_date,
        end_date=end_date,
        metrics=["share_of_voice"],
        dimensions=["root_domain"],
        filters=filters,
        pagination=pagination,
    )
    return query_report("citations", payload)


def get_citation_share_by_date(
    category_id: str,
    start_date: str,
    end_date: str,
    filters: Optional[List[Dict[str, Any]]] = None,
    pagination: Optional[Dict[str, int]] = None,
) -> pd.DataFrame:
    """
    Get citation share of voice by date and root domain.

    Args:
        category_id: Category identifier for the query
        start_date: Start date in ISO format (YYYY-MM-DD)
        end_date: End date in ISO format (YYYY-MM-DD)
        filters: Optional list of filter dictionaries
        pagination: Optional pagination dict with 'limit' and 'offset' keys

    Returns:
        DataFrame with columns ['date', 'root_domain', 'share_of_voice'],
        sorted by date (ascending) and share_of_voice (descending)

    Example:
        >>> df = get_citation_share_by_date(category_id="123", start_date="2025-01-01", end_date="2025-01-07")
    """
    payload = build_payload(
        category_id=category_id,
        start_date=start_date,
        end_date=end_date,
        metrics=["share_of_voice"],
        dimensions=["date", "root_domain"],
        filters=filters,
        pagination=pagination,
    )
    citations_df = query_report("citations", payload)
    return citations_df.sort_values(["date", "share_of_voice"], ascending=[True, False])


def get_citation_categories(
    category_id: str,
    start_date: str,
    end_date: str,
    filters: Optional[List[Dict[str, Any]]] = None,
    pagination: Optional[Dict[str, int]] = None,
) -> pd.DataFrame:
    """
    Get citation categories with share of voice calculations.

    Calculates share of voice for each citation category by:
    1. Computing share of voice per model
    2. Averaging across all models

    Args:
        category_id: Category identifier for the query
        start_date: Start date in ISO format (YYYY-MM-DD)
        end_date: End date in ISO format (YYYY-MM-DD)
        filters: Optional list of filter dictionaries
        pagination: Optional pagination dict with 'limit' and 'offset' keys

    Returns:
        DataFrame with columns ['citation_category', 'share_of_voice', 'count'],
        sorted by share_of_voice (descending)

    Example:
        >>> df = get_citation_categories(category_id="123", start_date="2025-01-01", end_date="2025-01-07")
    """
    payload = build_payload(
        category_id=category_id,
        start_date=start_date,
        end_date=end_date,
        metrics=["count"],
        dimensions=["citation_category", "model"],
        filters=filters,
        pagination=pagination,
    )
    citations_df = query_report("citations", payload)

    total_unique_models = citations_df["model"].nunique()
    model_total_citations = citations_df.groupby("model")["count"].sum()

    citations_df["share_of_voice"] = citations_df["count"] / citations_df["model"].map(model_total_citations)

    domain_category_scores = citations_df.groupby(["citation_category"], as_index=False)[
        ["share_of_voice", "count"]
    ].sum()

    domain_category_scores["share_of_voice"] = domain_category_scores["share_of_voice"] / total_unique_models

    return domain_category_scores.sort_values("share_of_voice", ascending=False)


def get_top_citation_domains(
    category_id: str,
    start_date: str,
    end_date: str,
    filters: Optional[List[Dict[str, Any]]] = None,
    pagination: Optional[Dict[str, int]] = None,
) -> pd.DataFrame:
    """
    Calculate top citation domains based on citation share across models.

    Calculates share of voice for each root domain by:
    1. Computing share of voice per model
    2. Averaging across all models

    Args:
        category_id: Category identifier for the query
        start_date: Start date in ISO format (YYYY-MM-DD)
        end_date: End date in ISO format (YYYY-MM-DD)
        filters: Optional list of filter dictionaries
        pagination: Optional pagination dict with 'limit' and 'offset' keys

    Returns:
        DataFrame with columns ['root_domain', 'share_of_voice', 'count'],
        sorted by share_of_voice (descending)

    Example:
        >>> df = get_top_citation_domains(category_id="123", start_date="2025-01-01", end_date="2025-01-07")
    """
    payload = build_payload(
        category_id=category_id,
        start_date=start_date,
        end_date=end_date,
        metrics=["count"],
        dimensions=["root_domain", "model"],
        filters=filters,
        pagination=pagination,
    )
    citations_df = query_report("citations", payload)

    total_unique_models = citations_df["model"].nunique()
    model_total_citations = citations_df.groupby("model")["count"].sum()

    citations_df["share_of_voice"] = citations_df["count"] / citations_df["model"].map(model_total_citations)

    domain_category_scores = citations_df.groupby(["root_domain"], as_index=False)[["share_of_voice", "count"]].sum()

    domain_category_scores["share_of_voice"] = domain_category_scores["share_of_voice"] / total_unique_models

    return domain_category_scores.sort_values("share_of_voice", ascending=False)

## Visibility Reports


In [None]:
visibility_share_by_date = get_visibility_share_by_date(
    category_id=category_id,
    start_date=start_date,
    end_date=end_date,
    filters=[common_filters["region_us"]],
)
visibility_share_by_date

In [None]:
visibility_score_rank = get_visibility_score_rank(
    category_id=category_id,
    start_date=start_date,
    end_date=end_date,
    filters=[common_filters["region_us"]],
)
visibility_score_rank

In [None]:
visibility_share_of_voice_rank = get_visibility_share_of_voice_rank(
    category_id=category_id,
    start_date=start_date,
    end_date=end_date,
    filters=[common_filters["region_us"]],
)
visibility_share_of_voice_rank

In [None]:
visibility_rankings_by_topic = get_visibility_rankings_by_topic(
    category_id=category_id,
    start_date=start_date,
    end_date=end_date,
    filters=[common_filters["region_us"]],
)
visibility_rankings_by_topic.groupby("topic").head(10)

## Citations Reports

In [None]:
citations_count = get_citations_count(
    category_id=category_id,
    start_date=start_date,
    end_date=end_date,
    filters=[
        common_filters["prompts_visibility"],
        common_filters["region_us"],
    ],
)
citations_count

In [None]:
citation_share = get_citation_share(
    category_id=category_id,
    start_date=start_date,
    end_date=end_date,
    filters=[
        {
            "field": "root_domain",
            "operator": "is",
            "value": domain_url,
        },
        common_filters["prompts_visibility"],
        common_filters["region_us"],
    ],
)
citation_share

In [None]:
citation_share_by_date = get_citation_share_by_date(
    category_id=category_id,
    start_date=start_date,
    end_date=end_date,
    filters=[
        common_filters["prompts_visibility"],
        common_filters["region_us"],
    ],
)
citation_share_by_date

In [None]:
citation_categories = get_citation_categories(
    category_id=category_id,
    start_date=start_date,
    end_date=end_date,
    filters=[
        common_filters["prompts_visibility"],
        common_filters["region_us"],
    ],
)
citation_categories

In [None]:
top_citation_domains = get_top_citation_domains(
    category_id=category_id,
    start_date=start_date,
    end_date=end_date,
    filters=[
        common_filters["prompts_visibility"],
        common_filters["region_us"],
    ],
)
top_citation_domains