# Cortex AISQL: Reinventando SQL como lenguaje de consulta de IA para datos multimodales

In [None]:
-- Image Files table
create or replace table IMAGES as
select to_file(file_url) img_file, 
    DATEADD(SECOND, UNIFORM(0, 13046400, RANDOM()),
    TO_TIMESTAMP('2025-01-01 00:00:00')) as created_at,
    UNIFORM(0, 200, RANDOM()) as user_id,
    * from directory(@AISQL_DB.AISQL_SCHEMA.AISQL_IMAGE_FILES);
    
select user_id, ticket_id, created_at, SNOWFLAKE.CORTEX.TRANSLATE(content, 'en', 'es') from emails limit 10;

In [None]:
import streamlit as st
import pandas as pd
import altair as alt
import snowbooks_extras
from snowflake.snowpark.context import get_active_session

session = get_active_session()

## Identificación de problemas de clientes en formatos de texto e imagen

* Texto: Correos de soporte
* Imágenes: Informes de errores

Uso de los mismos operadores SQL, compatibilidad con todas las modalidades

*Función utilizada: AI_COMPLETE()*

In [None]:
create table if not exists insights as
with IMAGE_INSIGHTS as (
    select created_at,user_id,relative_path as ticket_id,img_file as input_file,file_url as input_file_url,
        AI_COMPLETE('pixtral-large', prompt('Resume el problema que se muestra en esta captura de pantalla en una oración concisa: {0}', img_file)) as summary, summary as content
    from images
    ),
    EMAIL_INSIGHTS as (
    select created_at,user_id,ticket_id::text as ticket_id,null as input_file,'' as input_file_url,content as content,
        AI_COMPLETE('claude-3-7-sonnet', prompt('Resume este problema en una frase concisa.
Si el usuario mencionó algo relacionado con sus preferencias musicales, conserva esa información: {0}', content)) as summary
    from emails
    )
select 'Image' as source, created_at, user_id, ticket_id, input_file, input_file_url, content, summary 
from IMAGE_INSIGHTS
    union
select 'Email' as source, created_at, user_id, ticket_id, input_file, input_file_url, content, summary 
from EMAIL_INSIGHTS;

## Datos consolidados en formatos de texto, imagen y audio

Tipo de dato nativo FILE para consolidar todos los formatos en una sola tabla

In [None]:
select 
    user_id, source, input_file, summary, SNOWFLAKE.CORTEX.TRANSLATE(content, 'en', 'es'), input_file_url 
from insights
order by input_file_url desc;

## Semantically JOIN Issues with Solutions Library

Efficiently "JOIN" customer issues with existing solutions upon an AI relationship

*Function used: ... JOIN ... ON AI_FILTER()*

In [None]:
select 
    SNOWFLAKE.CORTEX.TRANSLATE(c.content, 'en', 'es') as "PROBLEMA DEL CLIENTE",
    SNOWFLAKE.CORTEX.TRANSLATE(s.solution, 'en', 'es') as "SOLUCION",
    c.created_at,
from
    INSIGHTS c
left join
    SOLUTION_CENTER_ARTICLES s
on AI_FILTER(prompt('Se le proporciona un problema de cliente y un artículo del centro de soluciones. Compruebe si el artículo de solución puede abordar las inquietudes del cliente. Le recordamos que debe comprobar si los detalles del error coinciden. Customer issues: {0}; \n\nSolution: {1}', content, s.solution))
order by created_at asc;

## Aggregate Top Pain Points By Month

Get aggregated insights across multiple rows

*Function used: AI_AGG()*

In [None]:
AGGREGATE_PROMPT = """
Analice todas las revisiones de los tickets de soporte y proporcione una lista completa de todos los problemas mencionados.
Formatee su respuesta como una lista con viñetas de problemas, con su frecuencia aproximada en porcentaje.
"""

sql = f"""
select 
    monthname(created_at) as month, 
    count(*) as total_tickets,
    count(distinct user_id) as unique_users,
    AI_AGG(summary,'{AGGREGATE_PROMPT}') as top_issues_reported,
    from (select * from insights order by random() limit 200)
    group by month
    order by total_tickets desc,month desc
"""

df = session.sql(sql).to_pandas()
rows_to_display = 1
for row in df[:rows_to_display].itertuples():
    st.subheader(f"Información agregada para {row.MONTH}")
    st.metric("Total de entradas",row.TOTAL_TICKETS)
    st.metric("Usuarios únicos", row.UNIQUE_USERS)
    st.subheader("Problemas principales")
    st.markdown(row.TOP_ISSUES_REPORTED)
    st.divider()

df_long = df.melt(id_vars='MONTH', value_vars=['TOTAL_TICKETS', 'UNIQUE_USERS'],
                  var_name='Metric', value_name='Total')

chart = alt.Chart(df_long).mark_bar().encode(
    y=alt.Y('MONTH:N', sort='-x'),
    x=alt.X('Total:Q'),
    color=alt.Color('Metric:N', scale=alt.Scale(scheme='tableau10')),
    tooltip=['MONTH', 'Metric', 'Total']
).properties(height=300)

st.altair_chart(chart, use_container_width=True)

## Classification

Build classification of labels that can be used in downstream applications. For example, to train ML models.

*Function used: AI_CLASSIFY()*

In [None]:
SET FILTER_PROMPT = '
Estoy intentando averiguar si el cliente mencionó alguna preferencia de género musical en su comentario.
¿Este comentario menciona alguna preferencia de género musical específica del cliente?: {0}';

SET CLASSIFY_PROMPT = 'Por favor ayúdenme a clasificar la preferencia musical mencionada en este comentario: ';
SET MUSIC_GENRES = 'Electronic/Dance Music (EDM), Jazz, Indie/Folk, Rock, Classical, World Music, Blues, Pop';

create table if not exists filtered as 
select * from 
        (select * from insights order by random() limit 500)
    where AI_FILTER(prompt($FILTER_PROMPT, summary));

select 
    source, 
    SNOWFLAKE.CORTEX.TRANSLATE(summary, 'en', 'es'),
    AI_CLASSIFY($CLASSIFY_PROMPT || summary,SPLIT($MUSIC_GENRES, ','))['labels'][0] as classified_label
from filtered;

In [None]:
df = AI_CLASSIFY.to_pandas()

# Group by genre and calculate counts
genre_counts = df['CLASSIFIED_LABEL'].value_counts().reset_index()
genre_counts.columns = ['Genre', 'Count']

# Pie chart using Altair
chart = alt.Chart(genre_counts).mark_arc().encode(
    theta=alt.Theta(field='Count', type='quantitative'),
    color=alt.Color(field='Genre', type='nominal'),
    tooltip=['Genre', 'Count']
).properties(
    width=500,
    height=400
)

st.subheader('Distribución de géneros')
st.altair_chart(chart, use_container_width=True)