In [None]:
#Installing Dependencies

python --version
python -m pip install --upgrade pip
pip install pandas
pip install openai
pip install tqdm
python -m venv myenv
source myenv/bin/activate  # On Linux/Mac
myenv\Scripts\activate  # On Windows

In [None]:
import pandas as pd
from openai import OpenAI
import logging
from tqdm import tqdm

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Set up API keys
openai_api_key = "<OpenAI_API_Key>"

# Initialize OpenAI client
openai_client = OpenAI(api_key=openai_api_key)

# Load the CSV file
df = pd.read_csv("extracted_reviews.csv")

# Clean the data
df['Review Text'] = df['Review Text'].fillna('')
df['Review Text'] = df['Review Text'].astype(str)

def get_value(text, key):
    try:
        start = text.index(key + ":") + len(key) + 1
        end = text.index("\n", start) if "\n" in text[start:] else len(text)
        return text[start:end].strip()
    except ValueError:
        return "N/A"

def analyze_text(text, review_number, product_name):
    prompt = f"""
    Analyze the following product review and extract the specified information.
    Provide the output in a structured format as described below.

    Review: {text}

    1. Main Points: Provide a concise summary of the review's main points in 1-2 sentences.
    2. Visual Aspects: List any visual aspects of the product mentioned in the review (e.g., color, shape, size). If none are mentioned, respond with "None".
    3. Sentiment of Review: Determine the overall sentiment of the review (Positive or Negative).
    4. Keywords from Review: List the top keywords that best describe this review, separated by commas.
    5. Image Generation Prompt: Create a detailed prompt for an AI image generator based on the review. Focus on visual elements and the overall sentiment.

    Output Format:
    Main Points: <summary>
    Visual Aspects: <aspects or None>
    Sentiment of Review: <Positive or Negative>
    Keywords from Review: <keywords>
    Image Generation Prompt: <prompt>

    Begin!
    """

    try:
        response = openai_client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a product review analyst. Extract key details and format the output as requested."},
                {"role": "user", "content": prompt}
            ]
        )
        content = response.choices[0].message.content.strip()

        # Parse the structured content
        main_points = get_value(content, "Main Points")
        visual_aspects = get_value(content, "Visual Aspects")
        sentiment = get_value(content, "Sentiment of Review")
        keywords = get_value(content, "Keywords from Review")
        image_prompt = get_value(content, "Image Generation Prompt")

        return {
            "Review Number": review_number,
            "Product Name": product_name,
            "Main Points": main_points,
            "Visual Aspects": visual_aspects,
            "Sentiment of Review": sentiment,
            "Keywords from Review": keywords,
            "Image Generation Prompt": image_prompt,
        }

    except Exception as e:
        logging.error(f"Error during API call: {e}")
        return None

processed_data = []

for idx, row in tqdm(df.iterrows(), total=df.shape[0]):
    review_text = row['Review Text']
    product_name = row['Product Name']

    if not isinstance(review_text, str) or not review_text.strip():
        logging.warning(f"Skipping row {idx} due to invalid review text")
        continue

    analysis_result = analyze_text(review_text, idx + 1, product_name)

    if analysis_result:
        processed_data.append(analysis_result)
    else:
        logging.warning(f"Failed to analyze review {idx + 1}")
        processed_data.append({
            "Review Number": idx + 1,
            "Product Name": product_name,
            "Main Points": "Analysis Failed",
            "Visual Aspects": "Analysis Failed",
            "Sentiment of Review": "Analysis Failed",
            "Keywords from Review": "Analysis Failed",
            "Image Generation Prompt": "Analysis Failed"
        })

# Create and save the CSV
df_processed = pd.DataFrame(processed_data)
df_processed.to_csv("processed_reviews.csv", index=False, encoding="utf-8")

print("Processed data has been saved to `processed_reviews.csv`")


In [7]:
import pandas as pd
from openai import OpenAI
from tqdm import tqdm

# Initialize the OpenAI client
client = OpenAI(api_key="")

# Read the CSV file
file_path = "processed_reviews.csv"
df = pd.read_csv(file_path)

# Extract relevant columns
df_features = df[['Product Name', 'Keywords from Review']]

# Group by product and aggregate keywords into a single list
df_features = df_features.groupby('Product Name')['Keywords from Review'].apply(lambda x: ', '.join(x.dropna().unique())).reset_index()

def generate_product_features(product_name, keywords):
    """ Generates structured key features for a given product using OpenAI's API. """
    
    prompt = f"""
    Given the following product and associated keywords, generate a structured list of key features:

    Product: {product_name}
    Keywords: {keywords}

    Format the response as:
    - **Friendly Use:**
    - **Age Limit:**
    - **Material:**
    - **Design:**
    - **Features:**
    - **Comfort:**
    - **Controls:**
    - **Battery Life:**
    - **Noise Cancellation:**
    - **Connectivity:**
    - **Durability:**
    - **Customization:**
    - **Price:**
    
    Only include categories relevant to the product.
    """

    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are an expert at structuring product reviews into key feature summaries."},
            {"role": "user", "content": prompt}
        ]
    )

    return response.choices[0].message.content

# Process each product and store results
output_list = []

for _, row in tqdm(df_features.iterrows(), total=len(df_features)):
    product_name = row['Product Name']
    keywords = row['Keywords from Review']
    features_summary = generate_product_features(product_name, keywords)
    output_list.append({"Product Name": product_name, "Structured Features": features_summary})

# Convert to DataFrame and save to a new file
output_df = pd.DataFrame(output_list)
output_file = "structured_product_features.xlsx"
output_df.to_excel(output_file, index=False)

print(f"Structured features saved to {output_file}")

100%|██████████| 3/3 [00:48<00:00, 16.28s/it]


Structured features saved to structured_product_features.xlsx
