# Call Center Transcripts - Snowflake Cortex Demo
This is a demonstration using synthetic data resembling call center data from a medical supply company.  The demonstration shows the power of Snowflake Cortex and Snowflake Cortex LLM functions.  All LLMs referenced in this demo are being called from within the Snowflake account.  

In [None]:
#  Copyright (c) 2025 Snowflake Computing Inc. All rights reserved.

# We can also use Snowpark for our analyses!
import streamlit as st
from snowflake.snowpark.context import get_active_session
import snowflake.snowpark.functions as F
import pandas as pd

session = get_active_session()

print(session)

### The prompt used to create this data set:
"Create a data set that collects data from transcripts for a customers calling into a medical supply company to get support for help using diabetes, wound care, breast pumps and urology care. The dataset should include ID, agent name, customer name, customer phone number, start time, end time and complete conversation transcript.  The transcripts should have both positive and negative sentiment.  Include all 4 supply types above in the across the data set.  Allow there to be 5 to10 agents each with a random number of calls.   Give me 100 records in JSON format."

In [None]:
select * from cortex.transcripts.transcripts_raw_json limit 10;

## Working with Semi-Structured Data
https://docs.snowflake.com/en/user-guide/querying-semistructured

In [None]:
SELECT variant_col:ID::float as ID
    ,variant_col:Supply_Type::text as supply_type
    ,TO_TIMESTAMP_NTZ(variant_col:Start_Time::STRING) AS starttime
    ,TO_TIMESTAMP_NTZ(variant_col:End_Time::STRING) AS endtime
    ,variant_col:Agent::text as AGENTNAME
    ,variant_col:Customer::text as CUSTOMERNAME
    ,variant_col:Phone::text as CUSTOMERPHONE
    ,variant_col:Transcript::varchar as TRANSCRIPT
FROM "CORTEX"."TRANSCRIPTS"."TRANSCRIPTS_RAW_JSON" 
LIMIT 
  10;

## Dynamic Tables
https://docs.snowflake.com/en/user-guide/dynamic-tables-intro
\
*NOTE:  You will need to assign a virtual warehouse in all of the create Dynamic Table cells.  By default, they are currently using the warehouse **"CORTEX_DEMO_WH"***

In [None]:
CREATE OR REPLACE DYNAMIC TABLE TRANSCRIPTS_STRUCTURED
  TARGET_LAG = DOWNSTREAM
  WAREHOUSE = CORTEX_DEMO_WH --Update this Warehouse across all cells creating dynamic tables
  REFRESH_MODE = INCREMENTAL
  AS
    SELECT variant_col:ID::float as ID
        ,variant_col:Supply_Type::text as supply_type
        ,TO_TIMESTAMP_NTZ(variant_col:Start_Time::STRING) AS starttime
        ,TO_TIMESTAMP_NTZ(variant_col:End_Time::STRING) AS endtime
        ,variant_col:Agent::text as AGENTNAME
        ,variant_col:Customer::text as CUSTOMERNAME
        ,variant_col:Phone::text as CUSTOMERPHONE
        ,variant_col:Transcript::varchar as TRANSCRIPT
    FROM "CORTEX"."TRANSCRIPTS"."TRANSCRIPTS_RAW_JSON" 

In [None]:
SELECT * FROM transcripts_structured LIMIT 10

### Cortex Summarize LLM function
https://docs.snowflake.com/en/sql-reference/functions/summarize-snowflake-cortex

In [None]:
select CUSTOMERNAME
    ,STARTTIME
    ,TRANSCRIPT
    ,snowflake.cortex.summarize(TRANSCRIPT)AS TRANSCRIPT_SUMMARY
from cortex.transcripts.transcripts_structured LIMIT 10;

### Cortex Sentiment & Extract Answer LLM Functions
- https://docs.snowflake.com/en/sql-reference/functions/sentiment-snowflake-cortex
- https://docs.snowflake.com/en/sql-reference/functions/extract_answer-snowflake-cortex

In [None]:
select CUSTOMERNAME
    ,STARTTIME
    ,snowflake.cortex.sentiment(TRANSCRIPT) sentiment_transcript
    ,snowflake.cortex.extract_answer(TRANSCRIPT, 'What is the main topic?(in 8 words or less)') topic
from cortex.transcripts.transcripts_structured LIMIT 10;

### Cortex Complete LLM Function
https://docs.snowflake.com/en/sql-reference/functions/complete-snowflake-cortex

In [None]:
select CUSTOMERNAME
    ,STARTTIME
    ,SNOWFLAKE.CORTEX.COMPLETE(
        'mistral-large2',
        concat('How well did the agent meet the customer needs? (In 20 words or less)', TRANSCRIPT)) AS interaction_overview
    ,SNOWFLAKE.CORTEX.COMPLETE(
        'mistral-large2',
        concat('What is the product category mentioned? (The answer shoudl only be diabetes, wound care, breast pumps, urology care or                 other)', TRANSCRIPT)) AS product_category,
from cortex.transcripts.transcripts_structured LIMIT 10;

In [None]:
select CUSTOMERNAME
    ,STARTTIME
    ,SNOWFLAKE.CORTEX.COMPLETE(
        'mistral-large2',
        concat('Rate the customer service experience from 0 to 10, with 0 being very poor suport without resolution and 10 being highly                 supportive and complete resolution of the issue and a completely happy customer. Return the results with a 
                single integer for the rating and then a reason for the rating in JSON FORMAT without includin the json header or any 
                leading quotes', TRANSCRIPT)) AS interaction_rating
from cortex.transcripts.transcripts_structured LIMIT 10;

In [None]:
select ID
    ,SUPPLY_TYPE
    ,AGENTNAME
    ,CUSTOMERNAME
    ,STARTTIME
    ,ENDTIME
    ,TRANSCRIPT
    ,snowflake.cortex.summarize(TRANSCRIPT)AS TRANSCRIPT_SUMMARY
    ,snowflake.cortex.sentiment(TRANSCRIPT) sentiment_transcript
    ,snowflake.cortex.extract_answer(TRANSCRIPT, 'What is the main topic?') topic
    ,SNOWFLAKE.CORTEX.COMPLETE(
        'mistral-large2',
        concat('How well did the agent meet the customer needs? (In 20 words or less)', TRANSCRIPT)) AS interaction_overview
    ,SNOWFLAKE.CORTEX.COMPLETE(
        'mistral-large2',
        concat('What is the product category mentioned? (In 5 words or less)', TRANSCRIPT)) AS product_category
    ,SNOWFLAKE.CORTEX.COMPLETE(
        'mistral-large2',
        concat('Rate the customer service experience from 0 to 10, with 0 being very poor suport without resolution and 10 being highly                 supportive and complete resolution of the issue and a completely happy customer. Return the results with a 
                single integer for the rating and then a reason for the rating in JSON FORMAT without includin the json header or any 
                leading quotes', TRANSCRIPT)) AS interaction_rating                 
from cortex.transcripts.transcripts_structured 
limit 10;

In [None]:
CREATE OR REPLACE DYNAMIC TABLE cortex.transcripts.transcripts_cortex
  TARGET_LAG = DOWNSTREAM
  WAREHOUSE = CORTEX_DEMO_WH
  REFRESH_MODE = FULL
  AS
    SELECT ID 
        ,SUPPLY_TYPE
        ,AGENTNAME
        ,CUSTOMERNAME
        ,STARTTIME
        ,ENDTIME
        ,TRANSCRIPT
        ,snowflake.cortex.summarize(TRANSCRIPT)AS TRANSCRIPT_SUMMARY
        ,snowflake.cortex.sentiment(TRANSCRIPT) sentiment_transcript
        ,snowflake.cortex.extract_answer(TRANSCRIPT, 'What is the main topic?') topic
        ,SNOWFLAKE.CORTEX.COMPLETE(
            'mistral-large2',
            concat('How well did the agent meet the customer needs? (In 20 words or less)', TRANSCRIPT)) AS interaction_overview
        ,SNOWFLAKE.CORTEX.COMPLETE(
            'mistral-large2',
            concat('What is the product category mentioned? (In 5 words or less)', TRANSCRIPT)) AS product_category
        ,SNOWFLAKE.CORTEX.COMPLETE(
            'mistral-large2',
            concat('Rate the customer service experience from 0 to 10, with 0 being very poor suport without resolution and 10 being                        highly supportive and complete resolution of the issue and a completely happy customer. Return the results with a 
                    single integer for the rating and then a reason for the rating as JSON
                    The json should not have a header or a footer', TRANSCRIPT)) AS interaction_rating   
        FROM cortex.transcripts.transcripts_structured;

In [None]:
SELECT * FROM cortex.transcripts.transcripts_cortex LIMIT 10

### Lateral Flatten
- https://docs.snowflake.com/en/sql-reference/functions/flatten
- https://docs.snowflake.com/en/sql-reference/constructs/join-lateral
- https://community.snowflake.com/s/article/Dynamically-extracting-JSON-using-LATERAL-FLATTEN

In [None]:
CREATE OR REPLACE DYNAMIC TABLE cortex.transcripts.transcripts_cortex_topic
  TARGET_LAG = DOWNSTREAM
  WAREHOUSE = CORTEX_DEMO_WH
  REFRESH_MODE = FULL
  AS
    SELECT ID
        ,topic
        ,topic_data.value:"answer"::STRING AS topic_answer
        ,topic_data.value:"score"::float AS topic_conf_score
    FROM cortex.transcripts.transcripts_cortex,
    LATERAL FLATTEN(input => TOPIC) AS topic_data

In [None]:
SELECT * FROM cortex.transcripts.transcripts_cortex_topic LIMIT 10

### Parse_JSON
https://docs.snowflake.com/en/sql-reference/functions/parse_json

In [None]:
CREATE OR REPLACE DYNAMIC TABLE cortex.transcripts.transcripts_cortex_interaction
  TARGET_LAG = DOWNSTREAM
  WAREHOUSE = CORTEX_DEMO_WH
  REFRESH_MODE = FULL
  AS
    SELECT ID
        ,INTERACTION_RATING
        ,REPLACE(INTERACTION_RATING, '```', '') AS a --Make proper JSON data:  Step 1
        ,PARSE_JSON(a):"rating"::INT AS interaction_rating_value
        ,PARSE_JSON(a):"reason"::STRING AS interaction_reason
    FROM TRANSCRIPTS_CORTEX

In [None]:
SELECT * FROM cortex.transcripts.transcripts_cortex_interaction LIMIT 10

In [None]:
CREATE OR REPLACE DYNAMIC TABLE cortex.transcripts.transcripts_cortex_final
  TARGET_LAG = '5 minutes'
  WAREHOUSE = CORTEX_DEMO_WH
  REFRESH_MODE = FULL
  AS
    SELECT a.ID
        ,a.supply_type
        ,a.product_category
        ,b.topic_answer as topic_summary
        ,b.topic_conf_score
        ,agentname
        ,customername
        ,starttime
        ,endtime
        ,transcript_summary
        ,sentiment_transcript
        ,interaction_overview
        ,c.interaction_rating_value
        ,c.interaction_reason
        ,a.transcript
    FROM cortex.transcripts.transcripts_cortex a
        INNER JOIN cortex.transcripts.transcripts_cortex_topic b ON a.ID = b.ID
        INNER JOIN cortex.transcripts.transcripts_cortex_interaction c ON a.ID = c.ID

In [None]:
SELECT * FROM cortex.transcripts.transcripts_cortex_final LIMIT 10

## Integrating Streamlit into a Notebook
https://docs.snowflake.com/en/developer-guide/streamlit/about-streamlit

In [None]:
st.write("Transcripts by Category")
bar_sql = f"""SELECT "SUPPLY_TYPE", COUNT("ID") AS CNT FROM TRANSCRIPTS_CORTEX_FINAL GROUP BY "SUPPLY_TYPE" ORDER BY 2 DESC"""
bdf = session.sql(bar_sql)
st.bar_chart(data=(bdf),x="SUPPLY_TYPE",height=300, width=100)

In [None]:
SELECT "AGENTNAME", AVG("SENTIMENT_TRANSCRIPT") AS SENTIMENT FROM TRANSCRIPTS_CORTEX_FINAL GROUP BY "AGENTNAME" ORDER BY 2 DESC

In [None]:
SELECT "AGENTNAME", AVG("SENTIMENT_TRANSCRIPT") FROM TRANSCRIPTS_CORTEX_FINAL GROUP BY "AGENTNAME" ORDER BY 2 DESC

In [None]:
st.write("Transcript Sentiment")
bar_sql = f"""SELECT "AGENTNAME", AVG("SENTIMENT_TRANSCRIPT") AS SENTIMENT FROM TRANSCRIPTS_CORTEX_FINAL GROUP BY "AGENTNAME" ORDER BY 2 DESC"""
bdf = session.sql(bar_sql)
st.bar_chart(
    data=(bdf),
    x="AGENTNAME",
    height=300,
    width=100,
    use_container_width=True,
    color="#3fb4d5"
)

In [None]:
import plotly.express as px

bar_sql = f"""SELECT "AGENTNAME", AVG("SENTIMENT_TRANSCRIPT") AS SENTIMENT FROM TRANSCRIPTS_CORTEX_FINAL GROUP BY "AGENTNAME" ORDER BY 2 DESC"""
bdf = session.sql(bar_sql)

fig = px.bar(bdf, 
             x='SENTIMENT', 
             y='AGENTNAME',
             orientation='h')  # This creates a horizontal bar chart
st.plotly_chart(fig, use_container_width=True)

In [None]:
st.write("Binned Sentiment Distribution")

# SQL query to bin sentiment into 4 categories
bar_sql = """
SELECT 
    CASE        
        WHEN "SENTIMENT_TRANSCRIPT" >= -1 AND "SENTIMENT_TRANSCRIPT" < -0.5 THEN '-1 to -0.5'
        WHEN "SENTIMENT_TRANSCRIPT" >= -0.5 AND "SENTIMENT_TRANSCRIPT" < 0 THEN '-0.5 to 0'
        WHEN "SENTIMENT_TRANSCRIPT" >= 0 AND "SENTIMENT_TRANSCRIPT" < 0.5 THEN '0 to 0.5'
        WHEN "SENTIMENT_TRANSCRIPT" >= 0.5 AND "SENTIMENT_TRANSCRIPT" <= 1 THEN '0.5 to 1'
    END AS sentiment_bin,
    COUNT("ID") AS CNT
FROM TRANSCRIPTS_CORTEX_FINAL
GROUP BY sentiment_bin
ORDER BY sentiment_bin;
"""

# Execute query
bdf = session.sql(bar_sql).to_pandas()

# Plot bar chart
st.bar_chart(
    data=bdf,
    x="SENTIMENT_BIN",
    y="CNT",
    height=300,
    use_container_width=True,
    color="#3fb4d5"
)

In [None]:
st.write("Interaction Ratings")
bar_sql = f"""SELECT "INTERACTION_RATING_VALUE", COUNT("ID") AS CNT FROM TRANSCRIPTS_CORTEX_FINAL GROUP BY "INTERACTION_RATING_VALUE" ORDER BY 2 DESC"""
bdf = session.sql(bar_sql)
st.bar_chart(data=(bdf),x="INTERACTION_RATING_VALUE",height=300, width=100)