In [None]:
import warnings
warnings.filterwarnings("ignore")
import streamlit as st
import pandas as pd
from snowflake.snowpark.context import get_active_session
from snowflake.snowpark.functions import col
from snowflake.snowpark import types as T
from snowflake.core import Root
from snowflake.cortex import Complete
from service_generation import create_cortex_search_service
session = get_active_session()
root = Root(get_active_session())

# Document Generation
import doc_generation
doc_generation.generate_demo_documents(session)

# Cortex Agents
In this notebook you will setup multiple Cortex Search and Cortex Analyst Services which will be used by Cortex Agents to answer user queries on unstructured and structured data.
![text](https://github.com/michaelgorkow/snowflake_cortex_agents_demo/blob/main/resources/cortex_agents_notebook_small.png?raw=true)

# Setup the Cortex Search Service [Unstructured Data]

We have some PDF documents in our stage **DOCUMENTS** that we want business users to be able to ask questions about.  
To achieve this, we need to extract the contents of the PDF files and make them searchable.

## Extracting Content from PDF Files

### [`PARSE_DOCUMENT`](https://docs.snowflake.com/en/sql-reference/functions/parse_document-snowflake-cortex)  
This function returns the extracted content from a document on a Snowflake stage as an **OBJECT** that contains JSON-encoded objects as strings.  

It supports two types of extractions:  
- **Optical Character Recognition (OCR)**  
- **Layout Extraction**  

### [`SPLIT_TEXT_RECURSIVE_CHARACTER`](https://docs.snowflake.com/en/sql-reference/functions/split_text_recursive_character-snowflake-cortex)  
The `SPLIT_TEXT_RECURSIVE_CHARACTER` function splits a string into shorter strings recursively. It is useful for preprocessing text to be used with text embedding or search indexing functions.

In [None]:
-- List documents in stage
SELECT * FROM DIRECTORY('@DOCUMENTS');

In [None]:
-- Layout extraction for PDF documents
CREATE TABLE IF NOT EXISTS _UNSTR_RAW_DOCUMENTS_MARKETING_CAMPAIGNS AS
SELECT 
    RELATIVE_PATH,
    TO_VARCHAR (
        SNOWFLAKE.CORTEX.PARSE_DOCUMENT (
            '@DOCUMENTS',
            RELATIVE_PATH,
            {'mode': 'LAYOUT'} ):content
        ) AS EXTRACTED_LAYOUT 
FROM 
    DIRECTORY('@DOCUMENTS')
WHERE
    startswith(RELATIVE_PATH, 'marketing_campaigns/');

SELECT * FROM _UNSTR_RAW_DOCUMENTS_MARKETING_CAMPAIGNS;

In [None]:
-- Create chunks from extracted content
CREATE OR REPLACE TABLE _UNSTR_CHUNKED_DOCUMENTS_MARKETING_CAMPAIGNS AS
SELECT
   RELATIVE_PATH,
   GET_PRESIGNED_URL(@DOCUMENTS, RELATIVE_PATH, 604800) AS URL,
   c.INDEX::INTEGER AS CHUNK_INDEX,
   c.value::TEXT AS CHUNK_TEXT
FROM
   _UNSTR_RAW_DOCUMENTS_MARKETING_CAMPAIGNS,
   LATERAL FLATTEN( input => SNOWFLAKE.CORTEX.SPLIT_TEXT_RECURSIVE_CHARACTER (
      EXTRACTED_LAYOUT,
      'markdown',
      4000,
      0,
      ['\n\n', '\n', ' ', '']
   )) c;

SELECT * FROM _UNSTR_CHUNKED_DOCUMENTS_MARKETING_CAMPAIGNS;

In [None]:
-- Create a Cortex Search Service for Annual Reports
CREATE OR REPLACE CORTEX SEARCH SERVICE SEARCH_MARKETING_CAMPAIGNS
  ON CHUNK_TEXT
  ATTRIBUTES RELATIVE_PATH, CHUNK_INDEX
  WAREHOUSE = COMPUTE_WH
  TARGET_LAG = '1 hour'
  EMBEDDING_MODEL = 'snowflake-arctic-embed-l-v2.0'
AS (
  SELECT
      CHUNK_TEXT,
      RELATIVE_PATH,
      CHUNK_INDEX,
      URL
  FROM _UNSTR_CHUNKED_DOCUMENTS_MARKETING_CAMPAIGNS
);

In [None]:
# Create additional search services
create_cortex_search_service(session, 'product_specifications')
create_cortex_search_service(session, 'regional_market_reports')
create_cortex_search_service(session, 'financial_operations_reports')
create_cortex_search_service(session, 'customer_contracts')

### [Optional] Test Your Service in a Simple RAG Pipeline  

In this small example, we **combine Cortex Search with Cortex LLMs** to generate a response from context—also known as **Retrieval-Augmented Generation (RAG)**.  
This approach enhances responses by retrieving relevant data before generating an answer, improving accuracy and contextual relevance. 🚀  

In [None]:
question = 'Which marketing campaigns targeted the Chocolate category and what were the sales results?'

# Fetch service
my_service = (root
  .databases["CORTEX_AGENTS_DEMO"]
  .schemas["FINANCE_FOOD_BEVERAGE"]
  .cortex_search_services["SEARCH_MARKETING_CAMPAIGNS"]
)

# Query service
resp = my_service.search(
  query=question,
  columns=["CHUNK_INDEX", "CHUNK_TEXT", "RELATIVE_PATH", "URL"],
  limit=1
)
resp = resp.results[0]

st.info(f'**File:** {resp["RELATIVE_PATH"]}\n\n **Source:**\n\n {resp["URL"]}\n\n {resp["CHUNK_TEXT"]}')

# Generate Response
model = 'mistral-large2'
prompt = f"{question} Answer based on the provided context: {resp['CHUNK_TEXT']}"
response = Complete(model, prompt).strip()

st.info(f'**LLM Response:**\n\n**{response}**')

# Setup the Cortex Analyst Service [Structured Data]  

We generate a realistic looking financial dataset for a food and beverage company that users will be able to **query in natural language**.  

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

# Set random seed for reproducibility
np.random.seed(42)
random.seed(42)

def generate_financial_dataset():
    """Generate a realistic financial dataset for food and beverage company"""
    
    categories = {
        'Coffee': ['NesKafe Classic', 'NesKafe Gold', 'NesKafe Decaf', 'NesKafe Instant',
                  'NesKafe Cappuccino', 'NesKafe Latte', 'NesKafe Mocha'],
        'Water': ['PureLife Natural', 'PureLife Sparkling', 'PureLife Flavored', 
                 'AquaFlow Premium', 'SpringSource Mountain', 'CrystalClear Pure'],
        'Chocolate': ['ChocoBars Dark', 'ChocoBars Milk', 'ChocoBars White', 'ChocoBars Almond',
                     'SweetTreats Original', 'SweetTreats Caramel', 'ChocoWafers Crispy',
                     'CreamyBites Hazelnut', 'DeluxeChoc Premium'],
        'Baby Food': ['BabyFirst Formula', 'BabyFirst Organic', 'BabyFirst Cereal', 
                     'TinyTots Puree', 'LittleOnes Snacks', 'InfantCare Plus'],
        'Dairy': ['CreamyDelight Vanilla', 'CreamyDelight Chocolate', 'CreamyDelight Strawberry',
                 'FrozenJoy Cookies', 'FrozenJoy Mint', 'PremiumScoop Deluxe'],
        'Cereals': ['MorningCrunch Honey', 'MorningCrunch Chocolate', 'HealthyStart Oats',
                   'FiberPlus Original', 'KidsChoice Fruity'],
        'Pet Care': ['PetLove Dry Food', 'PetLove Wet Food', 'PetLove Treats', 'PetCare Premium']
    }
    
    products_data = []
    for category, products in categories.items():
        for i, product in enumerate(products, 1):
            products_data.append({
                'PRODUCT_ID': len(products_data) + 1,
                'PRODUCT_NAME': product,
                'CATEGORY': category,
                'UNIT_COST': round(random.uniform(0.5, 15.0), 2),
                'UNIT_PRICE': round(random.uniform(1.0, 25.0), 2)
            })
    
    products_df = pd.DataFrame(products_data)
    
    # 2. CUSTOMERS TABLE (High cardinality - customer names)
    customer_types = ['Supermarket', 'Convenience Store', 'Hypermarket', 'Online Retailer', 'Distributor']
    regions = ['North America', 'Europe', 'Asia Pacific', 'Latin America', 'Middle East & Africa']
    
    customer_names = [
        # Supermarkets
        'FreshMart Downtown', 'FreshMart Central', 'FreshMart Plaza', 'GroceryWorld Main',
        'GroceryWorld Express', 'SuperShop Premium', 'SuperShop Local', 'MegaStore Alpha',
        'MegaStore Beta', 'MegaStore Gamma', 'QuickBuy Central', 'QuickBuy Corner',
        
        # Hypermarkets
        'HyperMall North', 'HyperMall South', 'HyperMall East', 'GiantStore Complex',
        'GiantStore Plaza', 'UltraMart Mega', 'UltraMart Super',
        
        # Online Retailers
        'E-Commerce Hub', 'Digital Grocery Co', 'Online Fresh Ltd', 'WebMart Express',
        'VirtualStore Pro', 'ClickAndBuy Solutions',
        
        # Distributors
        'Regional Dist. Corp', 'National Supply Chain', 'Metro Distribution', 
        'Premium Wholesale Ltd', 'Global Trade Partners', 'Continental Suppliers',
        
        # International
        'EuroMart Berlin', 'EuroMart Paris', 'AsiaFresh Tokyo', 'AsiaFresh Seoul',
        'LatinMarket Mexico', 'LatinMarket Brazil', 'AfricaTrade Lagos', 'AfricaTrade Cairo'
    ]
    
    customers_data = []
    for i, name in enumerate(customer_names, 1):
        customers_data.append({
            'CUSTOMER_ID': i,
            'CUSTOMER_NAME': name,
            'CUSTOMER_TYPE': random.choice(customer_types),
            'REGION': random.choice(regions),
            'CREDIT_LIMIT': random.choice([50000, 100000, 250000, 500000, 1000000])
        })
    
    customers_df = pd.DataFrame(customers_data)
    
    # 3. TIME PERIODS TABLE
    start_date = datetime(2021, 1, 1)
    time_periods = []
    
    for i in range(48):  # 24 months
        current_date = start_date + timedelta(days=30*i)
        time_periods.append({
            'PERIOD_ID': i + 1,
            'YEAR': current_date.year,
            'MONTH': current_date.month,
            'QUARTER': f"Q{(current_date.month-1)//3 + 1}",
            'MONTH_NAME': current_date.strftime('%B'),
            'DATE': current_date.strftime('%Y-%m-%d')
        })
    
    time_periods_df = pd.DataFrame(time_periods)
    
    # 4. SALES TRANSACTIONS TABLE
    sales_data = []
    transaction_id = 1
    
    for period in range(1, 49):  # 24 months
        # Generate different number of transactions per month
        num_transactions = random.randint(800, 1200)
        
        for _ in range(num_transactions):
            customer_id = random.randint(1, len(customers_df))
            product_id = random.randint(1, len(products_df))
            quantity = random.randint(10, 1000)
            
            # Get product info for calculations
            product_info = products_df[products_df['PRODUCT_ID'] == product_id].iloc[0]
            unit_price = product_info['UNIT_PRICE']
            unit_cost = product_info['UNIT_COST']
            
            # Add some price variation
            actual_price = unit_price * random.uniform(0.9, 1.1)
            revenue = quantity * actual_price
            cost = quantity * unit_cost
            
            sales_data.append({
                'TRANSACTION_ID': transaction_id,
                'CUSTOMER_ID': customer_id,
                'PRODUCT_ID': product_id,
                'PERIOD_ID': period,
                'QUANTITY_SOLD': quantity,
                'UNIT_PRICE': round(actual_price, 2),
                'TOTAL_REVENUE': round(revenue, 2),
                'TOTAL_COST': round(cost, 2),
                'GROSS_PROFIT': round(revenue - cost, 2)
            })
            transaction_id += 1
    
    sales_df = pd.DataFrame(sales_data)
    
    # 5. MARKETING CAMPAIGNS TABLE
    campaigns_data = [
        {'campaign_id': 1, 'campaign_name': 'Coffee Lovers Special', 'category': 'Coffee', 
         'start_period': 3, 'end_period': 5, 'budget': 500000, 'discount_percent': 15},
        {'campaign_id': 2, 'campaign_name': 'Summer Hydration', 'category': 'Water', 
         'start_period': 6, 'end_period': 8, 'budget': 750000, 'discount_percent': 10},
        {'campaign_id': 3, 'campaign_name': 'Back to School', 'category': 'Cereals', 
         'start_period': 8, 'end_period': 9, 'budget': 400000, 'discount_percent': 20},
        {'campaign_id': 4, 'campaign_name': 'Holiday Treats', 'category': 'Chocolate', 
         'start_period': 11, 'end_period': 12, 'budget': 1000000, 'discount_percent': 25},
        {'campaign_id': 5, 'campaign_name': 'New Year Health', 'category': 'Baby Food', 
         'start_period': 13, 'end_period': 14, 'budget': 300000, 'discount_percent': 12},
        {'campaign_id': 6, 'campaign_name': 'Spring Refresh', 'category': 'Dairy', 
         'start_period': 15, 'end_period': 17, 'budget': 600000, 'discount_percent': 18},
        {'campaign_id': 7, 'campaign_name': 'Pet Love Month', 'category': 'Pet Care', 
         'start_period': 18, 'end_period': 18, 'budget': 200000, 'discount_percent': 30}
    ]
    
    campaigns_df = pd.DataFrame(campaigns_data)
    campaigns_df.columns = [col.upper() for col in campaigns_df.columns]
    
    return {
        'products': products_df,
        'customers': customers_df,
        'time_periods': time_periods_df,
        'sales': sales_df,
        'campaigns': campaigns_df
    }

# Generate the dataset
dataset = generate_financial_dataset()

# Save to Snowflake
products_df = session.write_pandas(df=dataset['products'], table_name='PRODUCTS', overwrite=True, auto_create_table=True)
customers_df = session.write_pandas(df=dataset['customers'], table_name='CUSTOMERS', overwrite=True, auto_create_table=True)
time_periods_df = session.write_pandas(df=dataset['time_periods'], table_name='TIME_PERIODS', overwrite=True, auto_create_table=True)
time_periods_df = time_periods_df.with_column("DATE", col("DATE").cast(T.DateType()))
time_periods_df.write.mode("overwrite").save_as_table('TIME_PERIODS', mode='overwrite')
campaigns_df = session.write_pandas(df=dataset['campaigns'], table_name='CAMPAIGNS', overwrite=True, auto_create_table=True)
sales_df = session.write_pandas(df=dataset['sales'], table_name='SALES', overwrite=True, auto_create_table=True)

# Display sample data
for table_name, df in dataset.items():
    st.subheader(f"\n{table_name.upper()}")
    st.dataframe(df.head(3))

# Dynamic Literal Retrieval with Cortex Analyst

Business users may not have detailed knowledge of how data is stored in Snowflake.  
Instead of ingesting all possible values of a column into **Cortex Analyst**, we will use **dynamic literal retrieval** via the [Cortex Search Integration](https://docs.snowflake.com/en/user-guide/snowflake-cortex/cortex-analyst/cortex-analyst-search-integration).

## How It Works  
When a user asks a question about their **sales** that requires the `PRODUCT_NAME`, `CUSTOMER_NAME`, `CAMPAIGN_NAME` column, **Cortex Analyst** will:  
1. Retrieve the relevant literal dynamically from **Cortex Search**  
2. Use it for **SQL generation**  

This approach ensures efficient and accurate query generation without preloading all possible values into Cortex Analyst.  


In [None]:
CREATE CORTEX SEARCH SERVICE IF NOT EXISTS _ANALYST_PRODUCT_NAME_SEARCH
  ON PRODUCT_NAME
  WAREHOUSE = COMPUTE_WH
  TARGET_LAG = '1 hour'
  EMBEDDING_MODEL = 'snowflake-arctic-embed-l-v2.0'
AS (
  SELECT
      DISTINCT PRODUCT_NAME
  FROM PRODUCTS
);

In [None]:
CREATE CORTEX SEARCH SERVICE IF NOT EXISTS _ANALYST_CUSTOMER_NAME_SEARCH
  ON CUSTOMER_NAME
  WAREHOUSE = COMPUTE_WH
  TARGET_LAG = '1 hour'
  EMBEDDING_MODEL = 'snowflake-arctic-embed-l-v2.0'
AS (
  SELECT
      DISTINCT CUSTOMER_NAME,
  FROM CUSTOMERS
);

In [None]:
CREATE CORTEX SEARCH SERVICE IF NOT EXISTS _ANALYST_CAMPAIGN_SEARCH
  ON CAMPAIGN_NAME
  WAREHOUSE = COMPUTE_WH
  TARGET_LAG = '1 hour'
  EMBEDDING_MODEL = 'snowflake-arctic-embed-l-v2.0'
AS (
  SELECT
      DISTINCT CAMPAIGN_NAME,
  FROM CAMPAIGNS
);

### [Optional] Test Literal Retrievals

In [None]:
question = 'What was the over impact of the sumer hydration campaign?'

# Fetch service
my_service = (root
  .databases["CORTEX_AGENTS_DEMO"]
  .schemas["FINANCE_FOOD_BEVERAGE"]
  .cortex_search_services["_ANALYST_CAMPAIGN_SEARCH"]
)

# Query service
resp = my_service.search(
  query=question,
  columns=["CAMPAIGN_NAME"],
  limit=1
)
resp = resp.results[0]

st.info(f'**Search Results: {resp["CAMPAIGN_NAME"]}**')

In [None]:
question = 'What was the revenue per week for my customer supershop local?'

# Fetch service
my_service = (root
  .databases["CORTEX_AGENTS_DEMO"]
  .schemas["FINANCE_FOOD_BEVERAGE"]
  .cortex_search_services["_ANALYST_CUSTOMER_NAME_SEARCH"]
)

# Query service
resp = my_service.search(
  query=question,
  columns=["CUSTOMER_NAME"],
  limit=1
)

for r in resp.results:
    st.info(r['CUSTOMER_NAME'])

In [None]:
question = 'What was the revenue for pure life products?'

# Fetch service
my_service = (root
  .databases["CORTEX_AGENTS_DEMO"]
  .schemas["FINANCE_FOOD_BEVERAGE"]
  .cortex_search_services["_ANALYST_PRODUCT_NAME_SEARCH"]
)

# Query service
resp = my_service.search(
  query=question,
  columns=["PRODUCT_NAME"],
  limit=3
)

for r in resp.results:
    st.info(r['PRODUCT_NAME'])

# Create Semantic View

In [None]:
create or replace semantic view CORTEX_AGENTS_DEMO.FINANCE_FOOD_BEVERAGE.FINANCE_SEMANTIC_MODEL 
tables (
    CAMPAIGNS primary key (CATEGORY) with synonyms =(
        'advertisements',
        'promotions',
        'marketing campaigns',
        'ad campaigns',
        'promotional activities',
        'sales promotions',
        'advertising initiatives'
    ) comment = 'This table stores information about marketing campaigns, including the campaign''s unique identifier, name, category, start and end periods, allocated budget, and discount percentage offered during the campaign.',
    CUSTOMERS primary key (CUSTOMER_ID) with synonyms =(
        'clients',
        'patrons',
        'buyers',
        'consumers',
        'customers_list',
        'customer_base',
        'client_base',
        'customer_database'
    ) comment = 'This table stores information about customers, including their unique identifier, name, type (e.g. individual, business, etc.), geographic region, and credit limit.',
    PRODUCTS primary key (CATEGORY, PRODUCT_ID) with synonyms =(
        'items',
        'goods',
        'merchandise',
        'commodities',
        'stock',
        'inventory',
        'products_list',
        'product_catalog'
    ) comment = 'This table stores information about the products offered by a company, including a unique identifier, product name, category, and pricing details.',
    SALES primary key (PRODUCT_ID) with synonyms =(
        'SALES_DATA',
        'SALES_INFO',
        'SALES_RECORDS',
        'TRANSACTION_DATA',
        'SALES_TRANSACTIONS',
        'REVENUE_DATA'
    ) comment = 'This table stores sales transaction data, capturing key information about each sale, including the transaction ID, customer and product details, sales period, quantity sold, pricing, revenue, cost, and profit.',
    TIME_PERIODS primary key (PERIOD_ID) with synonyms =(
        'time_periods',
        'time_frames',
        'periods',
        'time_intervals',
        'date_ranges',
        'calendar_periods',
        'fiscal_periods',
        'reporting_periods'
    ) comment = 'This table stores information about specific time periods, including the period ID, year, month, quarter, month name, and a specific date. It appears to be a date dimension table, which is commonly used in data warehousing and business intelligence applications to provide a centralized repository of date-related data for reporting and analysis purposes.'
) relationships (
    CAMPAIGNS_TO_TIME_PERIOD_END as CAMPAIGNS(END_PERIOD) references TIME_PERIODS(PERIOD_ID),
    CAMPAIGNS_TO_TIME_PERIOD_START as CAMPAIGNS(START_PERIOD) references TIME_PERIODS(PERIOD_ID),
    CAMPAIGNS_TO_PRODUCT as PRODUCTS(CATEGORY) references CAMPAIGNS(CATEGORY),
    SALES_TO_PRODUCT as PRODUCTS(PRODUCT_ID) references SALES(PRODUCT_ID),
    SALES_TO_CUSTOMERS as SALES(CUSTOMER_ID) references CUSTOMERS(CUSTOMER_ID),
    SALES_TO_TIME_PERIODS as SALES(PERIOD_ID) references TIME_PERIODS(PERIOD_ID)
) facts (
    CAMPAIGNS.BUDGET as BUDGET with synonyms =(
        'funds_allocated',
        'allocated_amount',
        'financial_plan',
        'cost_plan',
        'expense_limit',
        'financial_allocation',
        'allocated_funds',
        'cost_estimate'
    ) comment = 'The budget allocated for each marketing campaign.',
    CAMPAIGNS.DISCOUNT_PERCENT as DISCOUNT_PERCENT with synonyms =(
        'discount_rate',
        'percentage_off',
        'promo_percentage',
        'sale_percentage',
        'markdown_percentage',
        'percent_discount',
        'discount_percentage_value'
    ) comment = 'The percentage discount offered to customers as part of a marketing campaign.',
    CUSTOMERS.CREDIT_LIMIT as CREDIT_LIMIT with synonyms =(
        'max_credit',
        'credit_ceiling',
        'credit_maximu',
        'credit_upper_limit',
        'maximum_credit_allowed',
        'credit_limit_amount'
    ) comment = 'The maximum amount of credit that a customer is a allowed to use for purchases.',
    PRODUCTS.UNIT_COST as UNIT_COST with synonyms =(
        'cost_per_unit',
        'unit_price_base',
        'base_cost',
        'cost_per_item',
        'unit_expense',
        'item_cost'
    ) comment = 'The cost of a single unit of each product, representing the amount the company pays to produce or purchase one item.',
    PRODUCTS.UNIT_PRICE as UNIT_PRICE with synonyms =(
        'price_per_unit',
        'selling_price',
        'list_price',
        'retail_price',
        'item_price'
    ) comment = 'The price of each product per unit.',
    SALES.GROSS_PROFIT as GROSS_PROFIT with synonyms =(
        'gross_margin',
        'profit',
        'net_gain',
        'earnings',
        'total_profit',
        'revenue_surplus',
        'income',
        'net_earnings',
        'profit_margin'
    ) comment = 'The total profit earned from sales after deducting the cost of goods sold, representing the amount left over to cover operating expenses and generate net income.',
    SALES.QUANTITY_SOLD as QUANTITY_SOLD with synonyms =(
        'units_sold',
        'items_sold',
        'sales_volume',
        'quantity_purchased',
        'amout_sold',
        'volume_sold',
        'sales_quantity'
    ) comment = 'The total number of units of a product sold during a transaction.',
    SALES.TOTAL_COST as TOTAL_COST with synonyms =(
        'total_expense',
        'total_spend',
        'total_outlay',
        'total_expenditure',
        'overall_cost',
        'total_outgoings'
    ) comment = 'The total cost of a sale, representing the overall amount spent by a customer on a particular transaction.',
    SALES.TOTAL_REVENUE as TOTAL_REVENUE with synonyms =(
        'total_sales',
        'total_income',
        'revenue_total',
        'total_revenue_generated',
        'total_turnover',
        'total_receipts',
        'total_earnings'
    ) comment = 'The total revenue generated from sales, representing the total amount of money earned from the sale of products or services.',
    SALES.UNIT_PRICE as UNIT_PRICE with synonyms =(
        'item_price',
        'price_per_unit',
        'cost_per_item',
        'unit_cost',
        'base_price',
        'list_price',
        'selling_price',
        'price_per_item'
    ) comment = 'The price of a single unit of a product sold.'
) dimensions (
    CAMPAIGNS.CAMPAIGN_ID as CAMPAIGN_ID with synonyms =(
        'campaign_key',
        'campaign_number',
        'campaign_code',
        'marketing_id',
        'promo_id',
        'ad_id'
    ) comment = 'Unique identifier for a marketing campaign.',
    CAMPAIGNS.CAMPAIGN_NAME as CAMPAIGN_NAME with synonyms =(
        'advertising_name',
        'marketing_campaign',
        'promotion_name',
        'campaign_title',
        'ad_name',
        'promo_name'
    ) comment = 'The name of a specific marketing campaign run by the company.',
    CAMPAIGNS.CATEGORY as CATEGORY with synonyms =(
        'type',
        'classification',
        'group',
        'genre',
        'kind',
        'class',
        'sort',
        'label',
        'designation'
    ) comment = 'The category of product or service being promoted in the campaign.',
    CAMPAIGNS.END_PERIOD as END_PERIOD with synonyms =(
        'end_date',
        'end_time',
        'period_end',
        'expiration_date',
        'termination_date',
        'closing_period',
        'final_date',
        'last_date'
    ) comment = 'The end date of the campaign period, represented as a numerical value.',
    CAMPAIGNS.START_PERIOD as START_PERIOD with synonyms =(
        'start_date',
        'initial_period',
        'beginning_period',
        'commencement_date',
        'kickoff_date',
        'launch_date',
        'period_start',
        'start_time'
    ) comment = 'The start period of a campaign, representing the month when the campaign was initiated, with values ranging from 3 to 8, likely corresponding to the third to eighth months of the year.',
    CUSTOMERS.CUSTOMER_ID as CUSTOMER_ID with synonyms =(
        'customer_key',
        'client_id',
        'account_number',
        'customer_number',
        'client_id_number',
        'account_id'
    ) comment = 'Unique identifier for each customer in the database, used to distinguish and reference individual customers across various transactions and interactions.',
    CUSTOMERS.CUSTOMER_NAME as CUSTOMER_NAME with synonyms =(
        'client_name',
        'account_holder',
        'account_name',
        'client_title',
        'customer_title',
        'full_name',
        'account_owner'
    ) comment = 'The name of the customer, which can be a physical store location or an online entity.',
    CUSTOMERS.CUSTOMER_TYPE as CUSTOMER_TYPE with synonyms =(
        'customer_category',
        'client_type',
        'account_type',
        'customer_classification',
        'client_classification',
        'account_classification'
    ) comment = 'The type of business or organization that the customer represents, such as a retail store, supermarket, or wholesale distributor.',
    CUSTOMERS.REGION as REGION with synonyms =(
        'area',
        'territory',
        'zone',
        'district',
        'location',
        'geographic_area',
        'province',
        'state',
        'county',
        'municipality'
    ) comment = 'Geographic region where the customer is located.',
    PRODUCTS.CATEGORY as CATEGORY with synonyms =(
        'type',
        'classification',
        'group',
        'product_type',
        'product_group',
        'class',
        'genre',
        'kind'
    ) comment = 'The category of the product, which can be one of the 3 types: Coffee, Water, or Chocolate, indicating the main classification or grouping of the product.',
    PRODUCTS.PRODUCT_ID as PRODUCT_ID with synonyms =(
        'product_key',
        'item_id',
        'product_number',
        'item_number',
        'product_code',
        'sku',
        'product_identifier'
    ) comment = 'Unique identifier for each product in the catalog.',
    PRODUCTS.PRODUCT_NAME as PRODUCT_NAME with synonyms =(
        'item_name',
        'product_title',
        'item_title',
        'product_description',
        'product_label',
        'item_label'
    ) comment = 'The type of NesKafe coffee product beign sold.',
    SALES.CUSTOMER_ID as CUSTOMER_ID with synonyms =(
        'client_id',
        'customer_number',
        'account_id',
        'client_number',
        'account_holder_id',
        'user_id'
    ) comment = 'Unique identifier for the customer who made the purchase.',
    SALES.PERIOD_ID as PERIOD_ID with synonyms =(
        'time_period',
        'reporting_period',
        'fiscal_period',
        'accounting_period',
        'cycle_id',
        'interval_id',
        'time_interval',
        'period_number'
    ) comment = 'A unique identifier for a specific time period, such as a month, quarter, or year, used to track sales performance over time.',
    SALES.PRODUCT_ID as PRODUCT_ID with synonyms =(
        'item_id',
        'product_code',
        'item_code',
        'product_number',
        'sku',
        'product_key'
    ) comment = 'Unique identifier for the product being sold.',
    SALES.TRANSACTION_ID as TRANSACTION_ID with synonyms =(
        'order_id',
        'transaction_number',
        'sale_id',
        'invoice_number',
        'purchase_id',
        'deal_id',
        'trade_id'
    ) comment = 'Unique identifier for each sales transaction.',
    TIME_PERIODS.DATE as DATE with synonyms =(
        'day',
        'calendar_date',
        'timestamp',
        'datestamp',
        'calendar_day',
        'date_value'
    ) comment = 'Date dimension representing specific points in time, used to track and analyze data over distinct periods.',
    TIME_PERIODS.MONTH as MONTH with synonyms =(
        'month_number',
        'month_value',
        'month_code',
        'calendar_month',
        'month_of_year',
        'month_index'
    ) comment = 'The month of the year in which a specific event or transaction occurred, represented by a numerical value (1-12), with the provided values indicating January, March, and April.',
    TIME_PERIODS.MONTH_NAME as MONTH_NAME with synonyms =(
        'month_description',
        'month_full_name',
        'full_month_name',
        'month_label',
        'month_title',
        'month_text'
    ) comment = 'The month of the year in which a specific event or transaction occurred.',
    TIME_PERIODS.PERIOD_ID as PERIOD_ID with synonyms =(
        'period_key',
        'time_period_identifier',
        'id',
        'time_id',
        'interval_id',
        'cycle_id'
    ) comment = 'Unique identifier for a specific time period, used to distinguish between different intervals of time.',
    TIME_PERIODS.QUARTER as QUARTER with synonyms =(
        'quarterly_period',
        'financial_quarter',
        'qtr',
        'fiscal_quarter',
        'quarterly_term'
    ) comment = 'The quarter of the year in which a specific event or metric occurred, with possible values being Q1 (January to March), Q2 (April to June), and Q3 (July to September).',
    TIME_PERIODS.YEAR as YEAR with synonyms =(
        'year_value',
        'annual_period',
        'fiscal_year',
        'calendar_year',
        'yearly_period',
        'twelve_month_period'
    ) comment = 'The year in which the data was recorded or the event occurred.'
) comment = 'This models represents the global sales and marketing operations of a major food and beverage company, tracking financial performance across key product categories including coffee, water, chocolate, baby food, dairy, cereals, and pet care products. The data captures 24 months of transactional sales data from diverse customer channels (supermarkets, hypermarkets, online retailers, and distributors) across five major geographic regions, enabling comprehensive revenue, profitability, and market performance analysis. Additionally, the dataset includes marketing campaign information to measure promotional effectiveness and seasonal trends, providing insights for strategic decision-making in product portfolio management and customer relationship optimization.' with extension (
    CA = '{"tables":[{"name":"CAMPAIGNS","dimensions":[{"name":"CAMPAIGN_ID","sample_values":["1","2","3"]},{"name":"CAMPAIGN_NAME","cortex_search_service":{"database":"CORTEX_AGENTS_DEMO","schema":"FINANCE_FOOD_BEVERAGE","service":"_ANALYST_CAMPAIGN_SEARCH"}},{"name":"CATEGORY","sample_values":["Coffee","Water","Cereals"]},{"name":"END_PERIOD","sample_values":["5","8","9"]},{"name":"START_PERIOD","sample_values":["3","6","8"]}],"facts":[{"name":"BUDGET","sample_values":["500000","750000","400000"]},{"name":"DISCOUNT_PERCENT","sample_values":["15","10","20"]}]},{"name":"CUSTOMERS","dimensions":[{"name":"CUSTOMER_ID","sample_values":["1","2","3"]},{"name":"CUSTOMER_NAME","cortex_search_service":{"database":"CORTEX_AGENTS_DEMO","schema":"FINANCE_FOOD_BEVERAGE","service":"_ANALYST_CUSTOMER_NAME_SEARCH"}},{"name":"CUSTOMER_TYPE","sample_values":["Convenience Store","Supermarket","Distributor"]},{"name":"REGION","sample_values":["Middle East & Africa","North America","Europe"]}],"facts":[{"name":"CREDIT_LIMIT","sample_values":["500000","50000","1000000"]}]},{"name":"PRODUCTS","dimensions":[{"name":"CATEGORY","sample_values":["Coffee","Water","Chocolate"]},{"name":"PRODUCT_ID","sample_values":["1","2","3"]},{"name":"PRODUCT_NAME","cortex_search_service":{"database":"CORTEX_AGENTS_DEMO","schema":"FINANCE_FOOD_BEVERAGE","service":"_ANALYST_PRODUCT_NAME_SEARCH"}}],"facts":[{"name":"UNIT_COST","sample_values":["9.77","4.49","11.18"]},{"name":"UNIT_PRICE","sample_values":["1.6","6.36","17.24"]}]},{"name":"SALES","dimensions":[{"name":"CUSTOMER_ID","sample_values":["4","26","16"]},{"name":"PERIOD_ID","sample_values":["1","2","3"]},{"name":"PRODUCT_ID","sample_values":["42","22","13"]},{"name":"TRANSACTION_ID","sample_values":["1","2","3"]}],"facts":[{"name":"GROSS_PROFIT","sample_values":["7890.2","3917.07","-1067.76"]},{"name":"QUANTITY_SOLD","sample_values":["671","829","204"]},{"name":"TOTAL_COST","sample_values":["6025.58","3150.2","2933.52"]},{"name":"TOTAL_REVENUE","sample_values":["13915.78","7067.27","1865.76"]},{"name":"UNIT_PRICE","sample_values":["20.74","8.53","9.15"]}]},{"name":"TIME_PERIODS","dimensions":[{"name":"MONTH","sample_values":["1","3","4"]},{"name":"MONTH_NAME","sample_values":["January","March","April"]},{"name":"PERIOD_ID","sample_values":["1","2","3"]},{"name":"QUARTER","sample_values":["Q1","Q2","Q3"]},{"name":"YEAR","sample_values":["2022","2023"]}],"time_dimensions":[{"name":"DATE","sample_values":["2022-01-01","2022-03-02","2022-05-31"]}]}],"relationships":[{"name":"CAMPAIGNS_TO_TIME_PERIOD_END","relationship_type":"many_to_one","join_type":"inner"},{"name":"CAMPAIGNS_TO_TIME_PERIOD_START","relationship_type":"many_to_one","join_type":"inner"},{"name":"CAMPAIGNS_TO_PRODUCT","relationship_type":"many_to_one"},{"name":"SALES_TO_PRODUCT","relationship_type":"one_to_one","join_type":"left_outer"},{"name":"SALES_TO_CUSTOMERS","relationship_type":"many_to_one","join_type":"inner"},{"name":"SALES_TO_TIME_PERIODS","relationship_type":"many_to_one","join_type":"inner"}],"verified_queries":[{"name":"campaign_impact","question":"What''s the impact of marketing campaigns on sales performance?","use_as_onboarding_question":false,"sql":"SELECT camp.campaign_name, SUM(CASE WHEN t.period_id BETWEEN camp.start_period AND camp.end_period THEN s.total_revenue ELSE 0 END) AS campaign_revenue, SUM(CASE WHEN NOT t.period_id BETWEEN camp.start_period AND camp.end_period THEN s.total_revenue ELSE 0 END) AS non_campaign_revenue, camp.budget FROM campaigns AS camp JOIN products AS p ON camp.category = p.category JOIN sales AS s ON p.product_id = s.product_id JOIN time_periods AS t ON s.period_id = t.period_id GROUP BY camp.campaign_id, camp.campaign_name, camp.budget","verified_by":"Michael Gorkow","verified_at":1751456609},{"name":"monthly_rolling_average_per_category","question":"Calculate the 3-month rolling average revenue for each product category.","use_as_onboarding_question":false,"sql":"WITH monthly_category_revenue AS (SELECT p.category, t.year, t.month, SUM(s.total_revenue) AS monthly_revenue FROM sales AS s JOIN products AS p ON s.product_id = p.product_id JOIN time_periods AS t ON s.period_id = t.period_id GROUP BY p.category, t.year, t.month) SELECT category, year, month, AVG(monthly_revenue) OVER (PARTITION BY category ORDER BY year, month ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS rolling_3month_avg FROM monthly_category_revenue ORDER BY category, year, month","verified_by":"Michael Gorkow","verified_at":1751456896}]}'
);

# Create Agent

In [None]:
session.sql("""
CREATE OR REPLACE AGENT SNOWFLAKE_INTELLIGENCE.AGENTS.FB_AGENT2 profile='{"display_name":"FB_AGENT"}' FROM SPECIFICATION 
$$
{
  "models": {
    "orchestration": "claude-4-sonnet"
  },
  "instructions": {
    "sample_questions": [
      {
        "question": "Which products were featured in the summer hydration campaign and what was their weekly sales revenue during the campaign and the two months before the campaign started? Visualize weekly sales of the featured products in a line plot."
      }
    ]
  },
  "tools": [
    {
      "tool_spec": {
        "type": "cortex_analyst_text_to_sql",
        "name": "fb_hub",
        "description": "CAMPAIGNS:\n- Database: CORTEX_AGENTS_DEMO, Schema: FINANCE_FOOD_BEVERAGE\n- This table stores marketing campaign information for a food and beverage company, tracking promotional activities across different product categories. Each campaign has specific time periods, budgets, and discount percentages to measure marketing effectiveness.\n- The table enables analysis of campaign performance by linking to sales data through product categories and time periods. It supports evaluation of marketing ROI and promotional impact on revenue generation.\n- LIST OF COLUMNS: CAMPAIGN_ID (unique campaign identifier), CAMPAIGN_NAME (marketing campaign name), CATEGORY (product category being promoted - links to PRODUCTS table), START_PERIOD (campaign start period - links to PERIOD_ID in TIME_PERIODS), END_PERIOD (campaign end period - links to PERIOD_ID in TIME_PERIODS), BUDGET (allocated campaign budget), DISCOUNT_PERCENT (percentage discount offered)\n\nCUSTOMERS:\n- Database: CORTEX_AGENTS_DEMO, Schema: FINANCE_FOOD_BEVERAGE\n- This table contains customer information including various business types such as supermarkets, convenience stores, and distributors across different geographic regions. Each customer has defined credit limits and represents different channels through which the company sells its products.\n- The customer data enables regional sales analysis and customer segmentation based on business type and credit capacity. It supports customer relationship management and market penetration analysis across different territories.\n- LIST OF COLUMNS: CUSTOMER_ID (unique customer identifier - links to CUSTOMER_ID in SALES), CUSTOMER_NAME (customer business name), CUSTOMER_TYPE (type of business entity), REGION (geographic location), CREDIT_LIMIT (maximum credit allowed)\n\nPRODUCTS:\n- Database: CORTEX_AGENTS_DEMO, Schema: FINANCE_FOOD_BEVERAGE\n- This table maintains the product catalog for a food and beverage company, focusing on three main categories: Coffee, Water, and Chocolate. Each product has associated cost and pricing information for profitability analysis.\n- The product data serves as the foundation for sales analysis and campaign targeting, enabling category-based performance evaluation. It supports pricing strategy decisions and product portfolio management through cost and revenue tracking.\n- LIST OF COLUMNS: PRODUCT_ID (unique product identifier - links to PRODUCT_ID in SALES), PRODUCT_NAME (specific product name), CATEGORY (product classification - links to CATEGORY in CAMPAIGNS), UNIT_COST (production/purchase cost per unit), UNIT_PRICE (selling price per unit)\n\nSALES:\n- Database: CORTEX_AGENTS_DEMO, Schema: FINANCE_FOOD_BEVERAGE\n- This table captures all sales transactions with detailed financial metrics including revenue, costs, and profitability for each sale. It serves as the central fact table connecting customers, products, and time periods for comprehensive business analysis.\n- The sales data enables performance tracking across multiple dimensions including customer segments, product categories, and time periods. It supports profitability analysis, trend identification, and campaign effectiveness measurement through integrated financial metrics.\n- LIST OF COLUMNS: TRANSACTION_ID (unique transaction identifier), CUSTOMER_ID (purchasing customer - links to CUSTOMER_ID in CUSTOMERS), PRODUCT_ID (sold product - links to PRODUCT_ID in PRODUCTS), PERIOD_ID (sales period - links to PERIOD_ID in TIME_PERIODS), QUANTITY_SOLD (units sold), UNIT_PRICE (price per unit), TOTAL_REVENUE (total sales amount), TOTAL_COST (total cost of goods), GROSS_PROFIT (profit after costs)\n\nTIME_PERIODS:\n- Database: CORTEX_AGENTS_DEMO, Schema: FINANCE_FOOD_BEVERAGE\n- This table provides a comprehensive time dimension with hierarchical date information including years, quarters, months, and specific dates. It spans 24 months of data covering 2022-2023 for temporal analysis and reporting.\n- The time dimension enables trend analysis, seasonal pattern identification, and period-over-period comparisons across all business metrics. It supports campaign timing analysis and financial performance tracking over different time intervals.\n- LIST OF COLUMNS: PERIOD_ID (unique time period identifier - links to PERIOD_ID in SALES and START_PERIOD/END_PERIOD in CAMPAIGNS), YEAR (calendar year), MONTH (month number), MONTH_NAME (month name), QUARTER (quarterly designation), DATE (specific calendar date)\n\nREASONING:\nThis semantic view represents a comprehensive business intelligence model for a food and beverage company's sales and marketing operations. The model centers around the SALES table as the primary fact table, which connects to dimension tables for CUSTOMERS, PRODUCTS, and TIME_PERIODS, while CAMPAIGNS provides marketing context. The relationships enable multi-dimensional analysis of sales performance, customer behavior, product profitability, and marketing effectiveness across different time periods and geographic regions. The data structure supports complex analytical queries including campaign impact analysis, rolling averages, customer segmentation, and regional performance comparisons across the company's three main product categories.\n\nDESCRIPTION:\nThis semantic view models the global sales and marketing operations of a major food and beverage company, tracking financial performance across coffee, water, and chocolate product categories over 24 months (2022-2023). The model integrates sales transactions with customer information (supermarkets, distributors, convenience stores across five regions), product catalog data, marketing campaigns, and time dimensions to enable comprehensive business analysis. The central SALES table connects all dimensions, allowing for multi-dimensional analysis of revenue, profitability, and market performance across different customer segments, product categories, and time periods. The integrated campaign data enables measurement of promotional effectiveness and ROI analysis, while the hierarchical time structure supports trend analysis and seasonal pattern identification. This model is stored in the CORTEX_AGENTS_DEMO database under the FINANCE_FOOD_BEVERAGE schema and provides the foundation for strategic decision-making in product portfolio management, customer relationship optimization, and marketing campaign planning."
      }
    },
    {
      "tool_spec": {
        "type": "cortex_search",
        "name": "marketing_documents",
        "description": "Tool provides access to marketing documents that contain information about featured products and campaign periods."
      }
    }
  ],
  "tool_resources": {
    "fb_hub": {
      "execution_environment": {
        "query_timeout": 300,
        "type": "warehouse",
        "warehouse": "COMPUTE_WH"
      },
      "semantic_view": "CORTEX_AGENTS_DEMO.FINANCE_FOOD_BEVERAGE.FINANCE_SEMANTIC_MODEL"
    },
    "marketing_documents": {
      "id_column": "URL",
      "max_results": 4,
      "name": "CORTEX_AGENTS_DEMO.FINANCE_FOOD_BEVERAGE.SEARCH_MARKETING_CAMPAIGNS",
      "title_column": "RELATIVE_PATH"
    }
  }
}
$$""").collect()