In [None]:
import pandas as pd
import transformers
import ast
import torch

In [None]:
# data loading
data = pd.read_csv('data/sequences_topics_sentiment.csv')
topic_representation = pd.read_csv('data/topic_info.csv')

In [None]:
avg_sentiment = data.groupby('topic_id')['sentiment'].mean()
print(avg_sentiment)

In [None]:
prompt = (
        "The following are topics detected in reviews by users. Give me a short summary of what the reviews "
        "say about the product using the following information. Don't use numerical data, just give me a summary of a few lines.:\n\n"
    )

In [None]:
 # construct the final prompt using the topics representative words and the most positive and negative sequences
for topic_id in data['topic_id'].unique():
    # filtering
    topic_data = data[data['topic_id'] == topic_id]
    topic_keywords_str = topic_representation.loc[topic_representation['Topic'] == topic_id].iloc[0, 3]
    topic_keywords = ast.literal_eval(topic_keywords_str) if isinstance(topic_keywords_str, str) else topic_keywords_str
        
    # get the most positive and most negative sentences
    positive_sequence = topic_data.loc[topic_data['sentiment'].idxmax()]['sequence']
    negative_sequence = topic_data.loc[topic_data['sentiment'].idxmin()]['sequence']

    # get the average sentiment score
    avg_sentiment_score = avg_sentiment[topic_id]

    # add each topic info to the prompt
    prompt += (
        f"* Topic {topic_id}. Representative words = {', '.join(topic_keywords)}\n"
        f"  - Average sentiment score: {avg_sentiment_score:.4f}\n"
        f"  - Most negative sentence: {negative_sequence}\n"
        f"  - Most positive sentence: {positive_sequence}\n\n"
    )

In [None]:
messages = [
{"role": "system", "content": "You are a review summarizer, specialized in summerizing reviews about a serger sewing machine. You are going to help the user decide whether to buy a serger sewing machine or not. In order to do this, you will use topics identified in the reviews and the sentiment analysis of each topic, plus, the most negative and positive comment.. The sentiment analysis score goes from -1 to 1 (worst to best)"},
{"role": "user", "content": prompt}
]

In [None]:
model_id = "meta-llama/Llama-3.2-1B-Instruct" 

if  torch.cuda.is_available():
    device = "cuda"
else:
    raise ValueError("No se reconoció GPU.")

pipeline = transformers.pipeline(
	"text-generation", 
	model=model_id,
	model_kwargs={"torch_dtype": torch.bfloat16},
	device=device
)

In [None]:
output = pipeline(messages, max_new_tokens=500)

In [None]:
assistant_content = output[0]['generated_text'][2]['content']
print(assistant_content)