# Batch Text Processing using LLMs and Fine-tuning

In [None]:
from snowflake.snowpark.context import get_active_session
import snowflake.snowpark.functions as F
import streamlit as st
import altair as alt
session = get_active_session()

df_support_tickets = session.table('support_tickets')
df_support_tickets

## Automatic ticket categorization using LLM

In [None]:
prompt = """You are an agent that helps organize requests that come to our support team. 

The request category is the reason why the customer reached out. These are the possible types of request categories:

Roaming fees
Slow data speed
Lost phone
Add new line
Closing account

Try doing it for this request and return only the request category only.
"""

### Use Large LLM

In [None]:
large_llm_response_sql = f""" select ticket_id, request, 
trim(snowflake.cortex.complete('llama3.1-405b',concat('{prompt}',request)),'\n') as large_llm_response
from support_tickets"""

df_large_llm_response = session.sql(large_llm_response_sql)
df_large_llm_response

### Use mistral-7b and compare responses

In [None]:
mistral_7b_response_sql = f""" select ticket_id,
trim(snowflake.cortex.complete('mistral-7b',concat('{prompt}',request)),'\n') as mistral_7b_response
from support_tickets"""

df_mistral_7b_response = session.sql(mistral_7b_response_sql)

df_llms = df_large_llm_response.join(df_mistral_7b_response,'ticket_id')
df_llms

## Fine-tuning 

### Generate datasets to fine-tune mistral-7b

In [None]:
df_fine_tune = df_large_llm_response.with_column("prompt", F.concat(F.lit(prompt),F.lit(" "),F.col("request"))).select("ticket_id","prompt","large_llm_response")
df_fine_tune.write.mode('overwrite').save_as_table('support_tickets_finetune')

train_df, eval_df = session.table("support_tickets_finetune").random_split(weights=[0.8, 0.2], seed=42)
train_df.write.mode('overwrite').save_as_table('support_tickets_train')
eval_df.write.mode('overwrite').save_as_table('support_tickets_eval')

st.write("### :white_check_mark: Training dataset created and saved in *support_tickets_train* table successfully.")
st.write("### :white_check_mark: Evaluation dataset created and saved in *support_tickets_eval* table successfully.")

## Inference using fine-tuned model

In [None]:
sql = f"""select ticket_id, request,
trim(snowflake.cortex.complete('SUPPORT_TICKETS_FINETUNED_MISTRAL_7B',concat('{prompt}',request)),'\n') as category
from support_tickets"""

df_fine_tuned_mistral_7b_response = session.sql(sql)
df_fine_tuned_mistral_7b_response

In [None]:
df = df_fine_tuned_mistral_7b_response.group_by('category').agg(F.count("*").as_('COUNT'))
st.subheader("Number of requests per category")

chart = alt.Chart(df.to_pandas()).mark_bar().encode(
    y=alt.Y('CATEGORY:N', sort="-x"),
    x=alt.X('COUNT:Q',),
    color=alt.Color('CATEGORY:N', scale=alt.Scale(scheme='category10'), legend=None),
).properties(height=400)

st.altair_chart(chart, use_container_width=True)