Using OpenAPI 4.0-Turbo perform sentiment analysis on return comments

In [1]:
import pandas as pd
import duckdb
from typing import Optional, Tuple
import openpyxl

Fetch data, load into duckdb table

For now, from CSV

In [None]:
def import_data(
        fname: str,
        clear=True,
        db_path='temp_db',
        tname='staging_table',
        ftype='csv',
        fetch_data: bool = True
        )-> Tuple[duckdb.DuckDBPyConnection, Optional[pd.DataFrame]]:
    """
    load CSV or Parquet file into a DuckDB table.
    fname = Filename of the file to load
    clear = If True, drop the table if it exists before loading, useful for reseting data.
    db_path = Name of the DuckDB database (default: 'temp_db')
    tname = Name of the table to create (default: 'staging_table')
    ftype = Type of file to load ('csv' or 'parquet', default: 'csv')
    Returns a DuckDB connection object.
    """
    con = duckdb.connect(db_path)

    if ftype not in ('csv', 'parquet'):
        con.close()
        raise ValueError("file_type must be 'csv' or 'parquet'")
    
    # handles:
    #   clearing table if clear is True
    #   create table if not exists
    #   import csv and parquet files
    func = 'read_csv_auto' if ftype == 'csv' else 'read_parquet'
    mode = 'OR REPLACE TABLE' if clear else 'TABLE IF NOT EXISTS'
    con.execute(f"""
        CREATE {mode} {tname} AS
        SELECT * FROM {func}('{fname}')
    """)

    # if fetch_data is True, return the data as a DataFrame
    df: Optional[pd.DataFrame] = None
    if fetch_data:
        df = con.execute(f"SELECT * FROM {tname}").df()

    return con, df


In [5]:
# run import_data
file_path = r'data\anthro_return_comments_200_scored.csv'
con, df = import_data(file_path, db_path='first_db', clear=True)

In [17]:
con = duckdb.connect('first_db')
df = con.execute("SELECT * FROM staging_table").df()
comments = df['RETURN_COMMENT'].nunique()
comments

184

In [18]:
# just fetch data
# con = duckdb.connect('first_db')
df = con.execute(f"SELECT * FROM staging_table").df()
con.close()

In [8]:
df.describe()


Unnamed: 0,sentiment_score,confidence_percent,confidence_lean,possible_brand_value
count,200.0,200.0,200.0,200.0
mean,2.635,79.9,-0.026,0.0
std,0.827769,7.861234,0.272018,0.0
min,1.0,50.0,-0.9,0.0
25%,2.0,75.0,0.0,0.0
50%,3.0,75.0,0.0,0.0
75%,3.0,90.0,0.0,0.0
max,5.0,95.0,0.8,0.0


In [None]:
df.head()


Connect to ChatGPT to send prompt and data for intial testing and analysis.

In [None]:
prompt1 = f"""
You are a linguistic expert in product evaluation. Your job is to analyze customer return comments for **product feedback**, 
identify key and sub-themes related to product evaluation, and assign a sentiment score to each theme.

Requirements:
 Review the comment exclusively for *product evaluation* themes (e.g. fit, materials, quality, style).  
 Focus on the customers evaluation of the product itself.  
 Identify **1 to 4 unique themes**, each **1 to 5 words** long.  
 Avoid overly generic descriptions such as “product is bad.” Instead describe the causal themes. 
 Assign each theme a sentiment score from **1 to 5**:  
  • 1 = very negative  
  • 2 = negative  
  • 3 = neutral (no evaluative language)  
  • 4 = positive  
  • 5 = very positive  
 **Important:** purely descriptive size/fit comments without strong judgment 
   (e.g. “too small,” “sleeves too short”) → **2 (negative)**.  
 Output **only** this exact template—no shorthand numeric pairs like “1: 0” or “2: 3.”  
 For **Total_Sentiment**, output a tuple of two values:  
   1. **Average positive score** (average of all themes scored ≥4)  
   2. **Average negative score** (average of all themes scored ≤2)
   3. Use 0.0 for missing associated themes or scores.  
 e.g. `(3.5, 1.2)` or '(0.0, 4.5)'


 
START
RETURN_NO:  
Theme 1: <theme>  
Sentiment 1: <score>  
Theme 2: <theme>  
Sentiment 2: <score>  
Theme 3: <theme>  
Sentiment 3: <score>  
Theme 4: <theme>  
Sentiment 4: <score>  
Total_Sentiment: (<avg_positive>, <avg_negative>)  
END
"""

In [9]:
prompt1_test = """
You are a linguistic expert in product evaluation. You will receive a batch of return comments as a JSON array, each record with these input fields:

  • RETURN_NO  
  • RETURN_COMMENT  

You must process each record into one output row. Do not consider information from prior records for future records.
Return your answer as a single JSON array of objects, one object per input record, with exactly these keys:

  IDENTIFIER – a unique ID you generate for this row  
  RETURN_NO – copied from the input  
  RETURN_COMMENT – copied from the input  
  Theme 1 – product theme #1  
  Sentiment 1 – score for theme #1  
  Theme 2 – product theme #2 (or empty string)  
  Sentiment 2  – score for theme #2 (or 0)  
  Theme 3, Sentiment 3
  Theme 4, Sentiment 4
  Pos_mean– average of all positive scores (or 0 if no positive scores)
  Neg_mean– average of all negative scores (or 0 if no negative scores)
  Total_sentiment - "Some Positive, Some Negative", "Nuetral", "Negative", "Very Positive", "Nuetral and Negative" ... etc.

**IMPORTANT, if 0 positive or negative score, ensure mean is 0.0  

**Scoring rules (per theme):**  
1 = very negative  
2 = negative  
3 = neutral (no evaluative language)  
4 = positive  
5 = very positive  

**Size/Fit Exception**  
- Purely descriptive size/fit without judgment (“runs small,” “sleeves too short”) → **2 (negative)**  

**General rule**  
- Look for negative evaluative words (“not flattering,” “cheap feel,”, "unexpected") → score 1 or 2  
- Look for positive evaluative words (“lovely,” “beautiful color,” “soft fabric”) → score 4 or 5  

Here’s an example of the kind of JSON array you should output (for two input records):

[
  {{
    "IDENTIFIER":"A1",
    "RETURN_NO":"001",
    "RETURN_COMMENT":"The colors are not as vibrant…",
    "Theme 1":"Color accuracy",
    "Sentiment 1":2,
    "Theme 2":"Packaging damage",
    "Sentiment 2":1,
    "Theme 3":"Quality perception",
    "Sentiment 3":2,
    "Theme 4":"",
    "Sentiment 4":0,
    "Pos_mean": 0
    "Neg_mean": 1.66
    "Total_sentiment": "Negative"
  }},
  {{
    "IDENTIFIER":"A2",
    "RETURN_NO":"002",
    "RETURN_COMMENT":"LOVELY, but runs EXTREMELY large!",
    "Theme 1":"Positive style",
    "Sentiment 1":4,
    "Theme 2":"Oversize fit",
    "Sentiment 2":2,
    "Theme 3":"",
    "Sentiment 3":0,
    "Theme 4":"",
    "Sentiment 4":0,
    "Pos_mean": 4
    "Neg_mean": 2
    "Total_sentiment": "Very Positive, Some Negative"
  }}
]
"""


In [None]:
prompt2 = f"""
You are a linguistic expert in customer satisfaction evaluation. Your job is to analyze customer return comments for **customer sentiment**, identify key and sub-themes expressing a customer’s satisfaction level, and assign a sentiment score to each theme.

Requirements:
 Review the comment exclusively for *customer satisfaction* themes (e.g. “larger than expected,” “did not arrive in time,” “not hip and cool”).  
 Focus on the customers tone and overall satisfaction, **not** on product attributes.  
 Identify **1 to 4 unique themes**, each **1 to 5 words** long.  
 Avoid overly generic descriptions such as “customer is unhappy.” Instead, describe the causal experience (e.g. “late delivery,” “felt underdressed”).  
 Assign each theme a sentiment score from **1 to 5**:  
   1 = very negative  
   2 = negative  
   3 = neutral (no evaluative language)  
   4 = positive  
   5 = very positive  
 **Exception:** purely descriptive size/fit comments without judgment (e.g. “too small,” “sleeves too short”) → **3 (neutral)**.  
 **Note:** Pay special attention to customer-expression feedback around modesty or style (e.g. “too sheer,” “see-through,” “didn’t make me look good”) as **negative satisfaction** themes.  
 **Do not** output any bare numeric pairs like “1:0” or “2:3.” Use only the full field labels below.  
 For **Total_Sentiment**, output a tuple of two floats:  
   1. **Average positive score** (mean of themes scored ≥4)  
   2. **Average negative score** (mean of themes scored ≤2)  
   Use `0.0` if there are no themes in one of those groups.  
   e.g. `(3.5, 1.2)` or `(0.0, 4.5)`

Output as a JSON object with the following structure:

START  
Theme 1: <theme>  
Sentiment 1: <score>  
Theme 2: <theme>  
Sentiment 2: <score>  
Theme 3: <theme>  
Sentiment 3: <score>  
Theme 4: <theme>  
Sentiment 4: <score>  
Total_Sentiment: (<avg_positive>, <avg_negative>)  
END
"""


In [None]:
prompt2_test = """
You are a linguistic expert in customer satisfaction evaluation. You will receive a JSON array of return comments, each record with:

  • RETURN_NO  
  • RETURN_COMMENT  

You must process each record into one output row. Do not consider information from prior records for future records.
Return your answer as a single JSON array of objects, one object per input record, with exactly these keys:

  IDENTIFIER       – unique ID you generate (e.g. “C1”, “C2”, …)  
  RETURN_NO        – copied from the input  
  RETURN_COMMENT   – copied from the input  
  Theme 1          – satisfaction theme #1 (1–5 words)  
  Sentiment 1      – score for theme #1 (1–5)  
  Theme 2          – satisfaction theme #2 or ""  
  Sentiment 2      – its score or 0  
  Theme 3, Sentiment 3  
  Theme 4, Sentiment 4  
  Pos_mean– average of all positive scores (or 0 if no positive scores)
  Neg_mean– average of all negative scores (or 0 if no negative scores)
  Total_sentiment - "Some Positive, Some Negative", "Nuetral", "Negative", "Very Positive", "Nuetral and Negative" ... etc.

  **IMPORTANT, if 0 positive or negative score, ensure mean is 0.0

**Scoring rules (per theme):**  
 1 = very negative  
 2 = negative  
 3 = neutral (no evaluative language)  
 4 = positive  
 5 = very positive  

**Key Considrations**  
 • Purely descriptive size/fit (“runs small,” “sleeves too short”) → **3 (neutral)**
 • Quality Issues (“Missing Buttons,” “Package was damaged”) → **1 (Very Negative)**  

**Focus**  
 • Only on the customer’s satisfaction and experience—**not** on product attributes.  
 • Pay special attention to style/modesty feedback (“too sheer,” “didn’t make me look good”) as negative themes.  

**Output example** (for two records):

[
  {{
    "IDENTIFIER":"C1",
    "RETURN_NO":"N001",
    "RETURN_COMMENT":"Dress arrived late and felt underdressed at event.",
    "Theme 1":"Late delivery",
    "Sentiment 1":2,
    "Theme 2":"Felt underdressed",
    "Sentiment 2":1,
    "Theme 3":"",
    "Sentiment 3":0,
    "Theme 4":"",
    "Sentiment 4":0,
    "Pos_mean":0.0,
    "Neg_mean":1.5,
    "Total_sentiment":"Negative"
  }},
  {{
    "IDENTIFIER":"C2",
    "RETURN_NO":"N002",
    "RETURN_COMMENT":"Too large but can also see the back ruffles from the front which make you look like you have gills",
    "Theme 1":"you look like have gills",
    "Sentiment 1":1,
    "Theme 2":"Too large",
    "Sentiment 2":3,
    "Theme 3":"",
    "Sentiment 3":0,
    "Theme 4":"",
    "Sentiment 4":0,
    "Pos_mean":0.0,
    "Neg_mean":1.0,
    "Total_sentiment":"Very Negative"
  }}
]
"""

New Function to send prompt + df to chatGPT and evaluate.

In [11]:
import os
import json
import logging
import pandas as pd
import openai
import re
from dotenv import load_dotenv
load_dotenv()

openai.api_key = os.getenv("OPEN_API_KEY")
if not openai.api_key:
    raise EnvironmentError("Please set the OPEN_API_KEY environment variable")

def parse_gpt_response(response: str, debug: bool = True) -> dict:
    """
    Parse the GPT response in the custom template format into a dictionary.
    """
    # Remove START/END and split lines
    lines = response.replace('START', '').replace('END', '').strip().split('\n')
    result = {}
    for line in lines:
        line = line.strip()
        if not line:
            continue
        # Match Theme/Sentiment/Total_Sentiment
        theme_match = re.match(r"Theme (\d+): (.+)", line)
        sentiment_match = re.match(r"Sentiment (\d+): (.+)", line)
        total_match = re.match(r"Total_Sentiment: (.+)", line)
        if theme_match:
            idx, value = theme_match.groups()
            result[f"Theme {idx}"] = value.strip()
        elif sentiment_match:
            idx, value = sentiment_match.groups()
            result[f"Sentiment {idx}"] = value.strip()
        elif total_match:
            value = total_match.group(1).strip()
            result["Total_Sentiment"] = value
    if debug:
        print("Parsed single response block:", result)
    return result

def ai_format_df(prompt: str, df: pd.DataFrame, debug: bool = True) -> pd.DataFrame:
    """
    Sends `prompt` plus the JSON version of `df` to ChatGPT,
    and returns the model's JSON response as a new DataFrame.
    """
    import re
    df_json = df.to_json(orient="records")
    if debug:
        print("Prompt sent to model:\n", prompt)
    messages = [
        {"role": "system", "content": "You are a helpful assistant that takes a JSON array and returns a modified JSON array."},
        {"role": "user", "content": prompt},
        {"role": "user", "content": df_json}
    ]
    resp = openai.chat.completions.create(
        model="gpt-4-turbo",
        messages=messages,
        temperature=0.1,
    )
    content = resp.choices[0].message.content.strip()
    if debug:
        print("Raw response from OpenAI:\n", content)
    if not content:
        raise ValueError("Empty response from OpenAI")
    logging.debug(f"AI response: {content}")

    # --- NEW: Strip code block markers if present ---
    # Remove triple backticks and optional 'json' after them
    content_clean = re.sub(r"^```(?:json)?\s*|```$", "", content.strip(), flags=re.MULTILINE).strip()
    # Remove trailing commas before closing brackets
    content_clean = re.sub(r",\s*([\]}])", r"\1", content_clean)
    # Remove any leading/trailing whitespace
    content_clean = content_clean.strip()
    if debug:
        print("Cleaned response before JSON parse:\n", content_clean)

    # Try to parse as JSON, else use custom parser
    try:
        data = json.loads(content_clean)
        if isinstance(data, dict):
            data = [data]
        if debug:
            print("Parsed data from response (JSON):\n", data)
        result_df = pd.DataFrame(data)
    except json.JSONDecodeError as e:
        if debug:
            print("Response is not valid JSON, using custom parser. Error:", e)
        blocks = [b for b in content.split('END') if b.strip()]
        parsed = [parse_gpt_response(block, debug=debug) for block in blocks]
        if debug:
            print("Parsed data from response (custom):\n", parsed)
        result_df = pd.DataFrame(parsed)
    return result_df

Function to run both product and customer sentiment analysis.

In [None]:
# sample_df = df.iloc[list(range(10)) + list(range(25, 50)) + list(range(150, 170))]

In [None]:
# sample_df = sample_df[['RETURN_NO', 'RETURN_COMMENT']].copy()
# sample_df.head()
# sample_df.describe()

Unnamed: 0,RETURN_NO,RETURN_COMMENT
count,55,55
unique,54,53
top,N172323333,They arrive broken
freq,2,2


In [None]:
# simple_df = sample_df.iloc[:10].copy()
# simple_df.head()

In [None]:

# product_eval = ai_format_df(prompt1_test, sample_df, debug=True)
# results_path = r"C:\Code\URBN\review_analyzer\results\big_test_product_eval_results.csv"
# product_eval.to_csv(results_path, index=False)
# print(product_eval.head())


Prompt sent to model:
 
You are a linguistic expert in product evaluation. You will receive a batch of return comments as a JSON array, each record with these input fields:

  • RETURN_NO  
  • RETURN_COMMENT  

You must process each record into one output row. Do not consider information from prior records for future records.
Return your answer as a single JSON array of objects, one object per input record, with exactly these keys:

  IDENTIFIER – a unique ID you generate for this row  
  RETURN_NO – copied from the input  
  RETURN_COMMENT – copied from the input  
  Theme 1 – product theme #1  
  Sentiment 1 – score for theme #1  
  Theme 2 – product theme #2 (or empty string)  
  Sentiment 2  – score for theme #2 (or 0)  
  Theme 3, Sentiment 3
  Theme 4, Sentiment 4
  Pos_mean– average of all positive scores (or 0 if no positive scores)
  Neg_mean– average of all negative scores (or 0 if no negative scores)
  Total_sentiment - "Some Positive, Some Negative", "Nuetral", "Negative",

In [None]:

# customer_eval = ai_format_df(prompt2_test, sample_df, debug=True)
# results_path = r"C:\Code\URBN\review_analyzer\results\big_test_customer_eval_results.csv"
# customer_eval.to_csv(results_path, index=False)
# print(customer_eval.head())


Prompt sent to model:
 
You are a linguistic expert in customer satisfaction evaluation. You will receive a JSON array of return comments, each record with:

  • RETURN_NO  
  • RETURN_COMMENT  

You must process each record into one output row. Do not consider information from prior records for future records.
Return your answer as a single JSON array of objects, one object per input record, with exactly these keys:

  IDENTIFIER       – unique ID you generate (e.g. “C1”, “C2”, …)  
  RETURN_NO        – copied from the input  
  RETURN_COMMENT   – copied from the input  
  Theme 1          – satisfaction theme #1 (1–5 words)  
  Sentiment 1      – score for theme #1 (1–5)  
  Theme 2          – satisfaction theme #2 or ""  
  Sentiment 2      – its score or 0  
  Theme 3, Sentiment 3  
  Theme 4, Sentiment 4  
  Pos_mean– average of all positive scores (or 0 if no positive scores)
  Neg_mean– average of all negative scores (or 0 if no negative scores)
  Total_sentiment - "Some Positiv

In [12]:
def batch_ai_format_df(prompt, df, batch_size=50, debug=False):
    results = []
    for i in range(0, len(df), batch_size):
        batch = df.iloc[i:i+batch_size]
        out = ai_format_df(prompt, batch, debug=debug)
        results.append(out)
    return pd.concat(results, ignore_index=True)

# Evaluate product comments in batches and insert into duckdb
def evaluate_and_store_product(df, prompt, db_path='first_db', table_name='product_evaluated', batch_size=50, debug=False):
    product_eval = batch_ai_format_df(prompt, df, batch_size=batch_size, debug=debug)
    con = duckdb.connect(db_path)
    con.execute(f"CREATE OR REPLACE TABLE {table_name} AS SELECT * FROM product_eval")
    con.close()
    return product_eval

# Evaluate customer comments in batches and insert into duckdb
def evaluate_and_store_customer(df, prompt, db_path='first_db', table_name='customer_evaluated', batch_size=50, debug=False):
    customer_eval = batch_ai_format_df(prompt, df, batch_size=batch_size, debug=debug)
    con = duckdb.connect(db_path)
    con.execute(f"CREATE OR REPLACE TABLE {table_name} AS SELECT * FROM customer_eval")
    con.close()
    return customer_eval

# Example usage:
# product_eval = evaluate_and_store_product(sample_df, prompt1_test, debug=True)
# customer_eval = evaluate_and_store_customer(sample_df, prompt2_test, debug=True)


In [13]:
product_eval = evaluate_and_store_product(df, prompt1_test, debug=True)
customer_eval = evaluate_and_store_customer(df, prompt2_test, debug=True)

Prompt sent to model:
 
You are a linguistic expert in product evaluation. You will receive a batch of return comments as a JSON array, each record with these input fields:

  • RETURN_NO  
  • RETURN_COMMENT  

You must process each record into one output row. Do not consider information from prior records for future records.
Return your answer as a single JSON array of objects, one object per input record, with exactly these keys:

  IDENTIFIER – a unique ID you generate for this row  
  RETURN_NO – copied from the input  
  RETURN_COMMENT – copied from the input  
  Theme 1 – product theme #1  
  Sentiment 1 – score for theme #1  
  Theme 2 – product theme #2 (or empty string)  
  Sentiment 2  – score for theme #2 (or 0)  
  Theme 3, Sentiment 3
  Theme 4, Sentiment 4
  Pos_mean– average of all positive scores (or 0 if no positive scores)
  Neg_mean– average of all negative scores (or 0 if no negative scores)
  Total_sentiment - "Some Positive, Some Negative", "Nuetral", "Negative",

In [None]:
# # create small sample set
# set = [1,6,165,190,194]
# sample_df = df.iloc[set]

In [None]:
# sample_df.head()

In [None]:
# sample_df = sample_df[['RETURN_NO', 'RETURN_COMMENT']].copy()
# product_eval = ai_format_df(prompt1_test, sample_df, debug=False)
# product_eval.head()

In [None]:
# results_path = r"C:\Code\URBN\review_analyzer\results\product_eval_results.csv"
# product_eval.to_csv(results_path, index=False)
# print(product_eval.head())


In [None]:
# sample_df = sample_df[['RETURN_NO', 'RETURN_COMMENT']].copy()
# product_eval = ai_format_df(prompt2_test, sample_df, debug=False)
# product_eval.head()
# results_path = r"C:\Code\URBN\review_analyzer\results\product_eval_results.csv"
# product_eval.to_csv(results_path, index=False)
# print(product_eval.head())


In [14]:
def export_evaluations_to_excel(db_path='first_db', product_table='product_evaluated', customer_table='customer_evaluated', out_path='evaluation_results.xlsx'):
    """
    Export product and customer evaluation tables from DuckDB to a single Excel file with two sheets.
    """
    import duckdb
    import pandas as pd
    with duckdb.connect(db_path) as con:
        product_df = con.execute(f"SELECT * FROM {product_table}").df()
        customer_df = con.execute(f"SELECT * FROM {customer_table}").df()
    with pd.ExcelWriter(out_path, engine='openpyxl') as writer:
        product_df.to_excel(writer, sheet_name='Product', index=False)
        customer_df.to_excel(writer, sheet_name='Customer', index=False)
    print(f"Exported to {out_path}")

# Example usage:
# export_evaluations_to_excel(db_path='first_db', out_path='evaluation_results.xlsx')


In [15]:
export_evaluations_to_excel(db_path='first_db', out_path='results/evaluation_results_gpt_4.0_turbo.xlsx')


Exported to results/evaluation_results_gpt_4.0_turbo.xlsx


In [16]:
con.close()