In [None]:
# Import python packages
import streamlit as st
import pandas as pd
import altair as alt
import snowflake.snowpark.functions as F

# We can also use Snowpark for our analyses!
from snowflake.snowpark.context import get_active_session
session = get_active_session()


In [None]:
SELECT * FROM MUSICAL_INSTRUMENTS_REVIEWS LIMIT 5;

In [None]:
CREATE OR REPLACE TABLE summarized_reviews AS
WITH concatenated_reviews AS (
    SELECT asin, 
           LISTAGG(reviewText, ' ') AS concatenated_review_text,
           AVG(OVERALL) AS AVG_SCORE, 
           AVG(SNOWFLAKE.CORTEX.SENTIMENT(reviewText)) AS AVG_SENTIMENT
    FROM MUSICAL_INSTRUMENTS_REVIEWS 
    GROUP BY asin
)
SELECT asin, 
       AVG_SCORE,
       AVG_SENTIMENT, 
       concatenated_review_text
FROM concatenated_reviews

In [None]:
SELECT * FROM summarized_reviews LIMIT 5;

In [None]:
SELECT SNOWFLAKE.CORTEX.COUNT_TOKENS('SUMMARIZE', concatenated_review_text) AS CNT_TOKENS,
       SNOWFLAKE.CORTEX.SUMMARIZE(concatenated_review_text) AS summary
FROM summarized_reviews LIMIT 5;

In [None]:
df = session.table("summarized_reviews");

df_pandas = df.to_pandas()
# Generate the first histogram for avg_Score
histogram1 = alt.Chart(df_pandas).mark_bar().encode(
    alt.X('AVG_SCORE:Q', bin=alt.Bin(maxbins=10), title='Average Score'),
    alt.Y('count()', title='Frequency')
).properties(
    title='Histogram of Avg Score Reviews'
)

# Generate the second histogram for another_Score
histogram2 = alt.Chart(df_pandas).mark_bar().encode(
    alt.X('AVG_SENTIMENT:Q', bin=alt.Bin(maxbins=10), title='Average Sentiment'),
    alt.Y('count()', title='Frequency')
).properties(
    title='Histogram of Average Sentiment'
)

# Concatenate the two histograms horizontally
combined_histograms = alt.hconcat(histogram1, histogram2)

# Use Streamlit to display the combined histograms
st.altair_chart(combined_histograms, use_container_width=True)

In [None]:
prompt_text = """You are a sports commentator at the Olympic Games. Your task is to generate one-paragraph summaries of Amazon product reviews, capturing the excitement, energy, and competitive spirit of a sports event.

Guidelines:
- Summarize the reviews in a single paragraph.
- Use a tone that conveys enthusiasm, energy, and a sense of competition.
- Use sports-related metaphors and analogies to describe the product features and performance.
- Ensure the summary is clear, concise, and engaging."""

In [None]:
select trim(snowflake.cortex.complete('mistral-large',concat('{prompt}',concatenated_review_text)),'\n') as llm_response
from summarized_reviews limit 3;

In [None]:
CREATE OR REPLACE TABLE REVIEWS4FINETUNING AS
SELECT
 f.asin, s.concatenated_review_text, f.exciting_summary
FROM
  AMAZON_REVIEWS.PUBLIC.AMAZON_REVIEWS_FINETUNE f
INNER JOIN summarized_reviews s
ON s.asin = f.asin

In [None]:
SELECT * FROM REVIEWS4FINETUNING LIMIT 5;

In [None]:
df_fine_tune = session.table("REVIEWS4FINETUNING");
df_fine_tune_prompt = df_fine_tune.with_column("prompt", F.concat(F.lit(prompt_text),F.lit(" "),F.col("concatenated_review_text"))).select("asin","prompt","exciting_summary")

train_df, eval_df = df_fine_tune_prompt.random_split(weights=[0.8, 0.2], seed=42)
print(train_df.count(), eval_df.count())
train_df.write.mode('overwrite').save_as_table('reviews_train')
eval_df.write.mode('overwrite').save_as_table('reviews_eval')

In [None]:
session.table('reviews_train').show(1)

##  Fine-tune mistral 7b using Cortex


In [None]:
select snowflake.cortex.finetune('CREATE', 'AMAZON_REVIEWS_FINETUNED_MISTRAL_7B', 'mistral-7b', 'SELECT prompt, exciting_summary as completion from reviews_train', 'SELECT prompt, exciting_summary as completion from reviews_eval');

In [None]:
select snowflake.cortex.finetune('DESCRIBE', 'CortexFineTuningWorkflow_eeb61b0f-6ffa-469e-a762-2550467154e8');

In [None]:
fine_tuned_model_name = 'AMAZON_REVIEWS_FINETUNED_MISTRAL_7B'
sql_text = f"""
SELECT asin, concatenated_review_text,
       TRIM(snowflake.cortex.complete('{fine_tuned_model_name}', CONCAT('{prompt_text}', concatenated_review_text)), '\n') AS fine_tuned_mistral_7b_model_response
FROM summarized_reviews
"""

df_fine_tuned_mistral_7b_response = session.sql(sql_text)
df_fine_tuned_mistral_7b_response.show()

In [None]:
df_fine_tuned_mistral_7b_response.write.mode('overwrite').save_as_table('reviews_generated_mistral_finetuned')

In [None]:
SELECT * FROM reviews_generated_mistral_finetuned LIMIT 5;

## Apply *ROUGE (Recall-Oriented Understudy for Gisting Evaluation)* scores as a metric for evaluating summarization tasks with large language models (LLMs)

In [None]:
from rouge_score import rouge_scorer
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
# Load the tables
df_generated_summaries = session.table("reviews_generated_mistral_finetuned")
df_orig_reviews = session.table("REVIEWS4FINETUNING")

df_joined = df_generated_summaries.join(df_orig_reviews, df_generated_summaries["asin"] == df_orig_reviews["asin"])

# Collect the joined DataFrame to the client
joined_data = df_joined.select("EXCITING_SUMMARY", "FINE_TUNED_MISTRAL_7B_MODEL_RESPONSE").collect()

In [None]:
# reference versus generated summaries

rouge_scores = []
for row in joined_data:
    ref = row["EXCITING_SUMMARY"]
    gen = row["FINE_TUNED_MISTRAL_7B_MODEL_RESPONSE"]
    scores = scorer.score(ref, gen)
    rouge_scores.append({
        'ROUGE-1': scores['rouge1'].fmeasure,
        'ROUGE-2': scores['rouge2'].fmeasure,
        'ROUGE-L': scores['rougeL'].fmeasure
    })

# Convert the list to a DataFrame
df_rouge_scores = pd.DataFrame(rouge_scores)

# Create Altair charts
chart_rouge1 = alt.Chart(df_rouge_scores).mark_bar().encode(
    x=alt.X('ROUGE-1:Q', bin=True, title='ROUGE-1 Score'),
    y=alt.Y('count()', title='Count')
).properties(
    title='ROUGE-1 Scores'
)

chart_rouge2 = alt.Chart(df_rouge_scores).mark_bar().encode(
    x=alt.X('ROUGE-2:Q', bin=True, title='ROUGE-2 Score'),
    y=alt.Y('count()', title='Count')
).properties(
    title='ROUGE-2 Scores'
)

chart_rougeL = alt.Chart(df_rouge_scores).mark_bar().encode(
    x=alt.X('ROUGE-L:Q', bin=True, title='ROUGE-L Score'),
    y=alt.Y('count()', title='Count')
).properties(
    title='ROUGE-L Scores'
)

# Combine charts side by side
combined_chart = alt.hconcat(chart_rouge1, chart_rouge2, chart_rougeL).resolve_scale(y='independent')

# Display the chart
st.altair_chart(combined_chart, use_container_width=True)

**ROUGE-1**

Definition: Measures the overlap of unigrams (individual words) between the generated summary and the reference summary.
Interpretation: A higher ROUGE-1 score indicates that the generated summary has a higher overlap of words with the reference summary, suggesting it captures more of the important content.   
Good Value: A ROUGE-1 score above 0.5 is generally considered good

**ROUGE-2**

Definition: Measures the overlap of bigrams (pairs of consecutive words) between the generated summary and the reference summary.
Interpretation: A higher ROUGE-2 score indicates that the generated summary preserves more of the sequence of words found in the reference summary, reflecting better fluency and coherence.   
Good Value: A ROUGE-2 score above 0.3 is typically seen as good.

**ROUGE-L**

Definition: Measures the longest common subsequence (LCS) between the generated summary and the reference summary.
Interpretation: A higher ROUGE-L score indicates that the generated summary has a longer sequence of words in common with the reference summary, suggesting better structural similarity and more comprehensive coverage of the important content.   
Good Value: A ROUGE-L score above 0.4 is considered good