In [None]:
# Import python packages
import streamlit as st
import pandas as pd

# We can also use Snowpark for our analyses!
from snowflake.snowpark.context import get_active_session
session = get_active_session()


In [None]:
!pip install openai

In [None]:
oai_key = '<YOUR OAI KEY HERE>'
print('Stored API key in *oai_key* var')

In [None]:
from openai import OpenAI
oai_client = OpenAI(api_key=oai_key)

response = oai_client.embeddings.create(
    input="Some example text to embed",
    model="text-embedding-3-small"
)

print(response.data[0].embedding)

In [None]:
df = session.table('TICKETS_DB.SUPPORT_SCHEMA.SUPPORT_CASES').to_pandas()
print(df.shape)
df.head(10)

In [None]:
SELECT SUM(SNOWFLAKE.CORTEX.COUNT_TOKENS('llama3.1-70b', CASE_DETAILS)) from TICKETS_DB.SUPPORT_SCHEMA.SUPPORT_CASES;

In [None]:
#If data already exists in table, retrieve it
try: 
    df = session.table('SUPPORT_TICKETS_OAI_EMBEDDINGS').to_pandas()
    df[['CASE_DETAILS','OAI_EMBEDDING']][0:5]

#Otherwise create embedings and write to table    
except:
    oai_embeddings = df.CASE_DETAILS.apply(lambda x: oai_client.embeddings.create(input=x,
                                                                model="text-embedding-3-small").data[0].embedding)

    df['OAI_EMBEDDING'] = oai_embeddings
    session.write_pandas(df, 'SUPPORT_TICKETS_OAI_EMBEDDINGS', auto_create_table=True)

In [None]:
CREATE OR REPLACE CORTEX SEARCH SERVICE SUPPORT_TICKET_SEARCH_OAI_EMBED
  TEXT INDEXES CASE_DETAILS
  VECTOR INDEXES OAI_EMB
  ATTRIBUTES CATEGORY, CASE_TITLE
  WAREHOUSE = LARGE
  TARGET_LAG = '24 hours'
  AS SELECT CASE_DETAILS, OAI_EMBEDDING::VECTOR(FLOAT,1536) as OAI_EMB, CATEGORY, CASE_TITLE FROM SUPPORT_TICKETS_OAI_EMBEDDINGS;

In [None]:
import os
from snowflake.core import Root
from snowflake.snowpark import Session

root = Root(session)

# fetch service
search_service_oai_embed = (root
	.databases["EMBEDDING_EVAL_DB"]
	.schemas["DATA"]
	.cortex_search_services["SUPPORT_TICKET_SEARCH_OAI_EMBED"]
)

# query service
user_query = "I had an issue with duplicate charges"

resp = search_service_oai_embed.search(
    multi_index_query={
        "OAI_EMB": [
            {"vector": oai_client.embeddings.create(input=user_query,
                                                model="text-embedding-3-small").data[0].embedding}
        ],
    },
	columns=["CASE_DETAILS", "CATEGORY", "CASE_TITLE"],
	limit=10,
)
print(f"Query: {user_query}")
results =  pd.DataFrame(resp.results)
results[sorted(results.columns, reverse=True)]

In [None]:
CREATE OR REPLACE CORTEX SEARCH SERVICE SUPPORT_TICKET_SEARCH_ARCTIC_EMBED
  ON CASE_DETAILS
  ATTRIBUTES CATEGORY, CASE_TITLE
  EMBEDDING_MODEL = 'snowflake-arctic-embed-l-v2.0'
  WAREHOUSE = LARGE
  TARGET_LAG = '24 hours'
  AS SELECT CASE_DETAILS, CATEGORY, CASE_TITLE FROM SUPPORT_TICKETS_OAI_EMBEDDINGS;

In [None]:
search_service_arctic_embed = (root
	.databases["EMBEDDING_EVAL_DB"]
	.schemas["DATA"]
	.cortex_search_services["SUPPORT_TICKET_SEARCH_ARCTIC_EMBED"]
)

# query service
user_query = "I had an issue with duplicate charges"

resp = search_service_arctic_embed.search(
    query = user_query,
	columns=["CASE_DETAILS", "CATEGORY", "CASE_TITLE"],
	limit=10,
)
print(f"Query: {user_query}")
results =  pd.DataFrame(resp.results)
results[sorted(results.columns, reverse=True)]

In [None]:
from snowflake.cortex import complete

complete('claude-4-sonnet', f'Use the following context to summarize issues with duplicate charges: {list(results.CASE_DETAILS)}')

# ARCHIVE BELOW

In [None]:
case_sample = list(df.sample(n=100).CASE_DETAILS)

complete('claude-4-sonnet', f'''Using the following support case descriptions - 
                        come up with 10 questions someone may ask where the answers are likely in the Case descriptions. 
                        Include some basic questions with easily found answers but also some trickier questions. 
                        
                        Case Descriptions: {case_sample}''')

In [None]:
user_query = 'What should I do if my package shows as delivered but I never received it?'

context = pd.DataFrame(search_service_arctic_embed.search(
    query = user_query,
	columns=["CASE_DETAILS"],
	limit=3).results).CASE_DETAILS

complete('claude-4-sonnet', f'''Use the following context to answer the users question.
                                        user question : {user_query}
                                        context: {list(context)}''')