### ENV: Microsoft Fabric
### Model: OpenAI

In [None]:
import pandas as pd
import openai

In [None]:
# OPTION NOT USED IN THIS CASE:

# use python-dotenv, generate a .env file to handle access credentials separately (see documentation)
# in Azure, you can pay for Azure OpenAI
'''
from dotenv import load_dotenv
import os

load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
'''

In [None]:
# since we're not using .env, we load the key directly here

# 1. Set up your API key (see below for how to obtain it)
openai.api_key = "TU_API_KEY_AQUÍ"

In [1]:
# 2. dataset
df = spark.read.format("csv").option("header","true").load("abfss://Testing@onelake.dfs.fabric.microsoft.com/TestLake.Lakehouse/Files/nps_result.csv")
# df now is a Spark DataFrame containing CSV data from "abfss://Testing@onelake.dfs.fabric.microsoft.com/TestLake.Lakehouse/Files/nps_result.csv".


In [None]:


# 3. Group comments by score
agrupado = df.groupby("score")["comment"].apply(lambda x: " ".join(str(c) for c in x)).reset_index()

# 4. Function to generate summary using OpenAI
def summarize_comments(texto, puntaje):
    prompt = f"""
            I have a list of customer comments who gave a score of {score}.
            Some comments are irrelevant or empty (such as "na", "??", ".", "...", etc.).
            Ignore those cases and generate a clear summary of the actual opinions.

            Commments:
            {text}

            Summary:
            """
    try:
        answer = openai.ChatCompletion.create(
            #  usa "gpt-3.5-turbo", if cannot use GPT-4
            model="gpt-4",
            # rol type (user, assistant, system)
            messages=[{"role": "user", "content": prompt.strip()}], 
            # creativity: 0 means very conservative, 1 means very creative.
            temperature=0.4,
            # How many words (tokens) can it return to you at most
            max_tokens=200
            )
        return answer.choices[0].message["content"].strip()
    except Exception as e:
        return f"Error: {e}"

# 5. Apply the summary for each score group
agrupado["resumen"] = agrupado.apply(lambda row: summarize_comments(row["comment"], row["score"]), axis=1)

# 6. result
print(agrupado[["score", "resumen"]])
