In [None]:
import requests
import json
import math

# --- Configuration ---
OLLAMA_API_URL = "http://localhost:11434/api/generate"
MODEL_NAME = ***************
REVIEWS_PER_BATCH = 75

In [None]:
# --- Prompt Templates ---
EXTRACTION_PROMPT_TEMPLATE = """
You are an expert market research analyst AI. Your task is to identify the primary aspects or themes from a list of customer product reviews.
An "aspect" is a specific feature, quality, or topic that customers frequently discuss.
RULES:
1. Analyze the following list of reviews provided between the [START DATA] and [END DATA] tags.
2. Identify no more than 10 of the most frequently discussed aspects.
3. Merge semantically similar topics. For example, 'too thick' and 'heavy' should be combined into a single aspect like 'bulkiness and size'.
4. The name for each aspect should be a short, clear, and descriptive noun phrase.
5. Provide your output *only* in JSON format, as a single list of strings under the key "aspects". Do not add any other explanations or conversational text.
[START DATA]
{reviews_text}
[END DATA]
"""

CONSOLIDATION_PROMPT_TEMPLATE = """
You are an expert data analyst AI. Your task is to clean up and consolidate a list of product aspects generated from multiple batches of reviews.
RULES:
1. Analyze the following list of raw aspects provided between the [START DATA] and [END DATA] tags.
2. Merge similar or duplicate aspects into a single, canonical aspect. For example, merge 'durability', 'protection', and 'strength' into 'protection and durability'.
3. Ensure each final aspect name is a short, clear, and descriptive noun phrase.
4. Return the final, consolidated list of the top 10 most important aspects.
5. Provide your output *only* in JSON format, as a single list of strings under the key "aspects". Do not add any other explanations or text.
[START DATA]
{raw_aspects_list}
[END DATA]
"""

In [None]:
def call_ollama(prompt):
    """Sends a prompt to the local Ollama API and returns the parsed JSON response."""
    try:
        payload = {
            "model": MODEL_NAME,
            "prompt": prompt,
            "stream": False,
            "format": "json" # Ollama's JSON mode is great for this!
        }
        response = requests.post(OLLAMA_API_URL, json=payload, timeout=300)
        response.raise_for_status()
        
        # The actual JSON content is a string inside the 'response' key
        response_content = json.loads(response.json()['response'])
        return response_content.get("aspects", [])
    except requests.exceptions.RequestException as e:
        print(f"Error calling Ollama API: {e}")
        return []
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON response: {e}")
        print(f"Raw response: {response.text}")
        return []

In [None]:
def run_aspect_discovery(reviews):
    """Main function to perform the batching and consolidation process."""
    
    print(f"Processing {len(reviews)} reviews in total.")
    num_batches = math.ceil(len(reviews) / REVIEWS_PER_BATCH)
    all_raw_aspects = []

    # --- STAGE 1: BATCH PROCESSING ---
    print(f"\n--- Starting Stage 1: Extracting aspects from {num_batches} batches ---")
    for i in range(num_batches):
        start_index = i * REVIEWS_PER_BATCH
        end_index = start_index + REVIEWS_PER_BATCH
        batch_reviews = reviews[start_index:end_index]
        
        # Join reviews into a single string for the prompt
        reviews_text = "\n".join(f"- {review}" for review in batch_reviews)
        prompt = EXTRACTION_PROMPT_TEMPLATE.format(reviews_text=reviews_text)
        
        print(f"Processing batch {i+1}/{num_batches}...")
        batch_aspects = call_ollama(prompt)
        
        if batch_aspects:
            print(f"  -> Found aspects: {batch_aspects}")
            all_raw_aspects.extend(batch_aspects)
        else:
            print(f"  -> No aspects found or error in batch {i+1}.")

    # --- STAGE 2: CONSOLIDATION ---
    print("\n--- Starting Stage 2: Consolidating all found aspects ---")
    if not all_raw_aspects:
        print("No raw aspects were extracted. Cannot consolidate.")
        return []

    print(f"Consolidating {len(all_raw_aspects)} raw aspects...")
    raw_aspects_text = ", ".join(f'"{aspect}"' for aspect in all_raw_aspects)
    consolidation_prompt = CONSOLIDATION_PROMPT_TEMPLATE.format(raw_aspects_list=raw_aspects_text)
    
    final_aspects = call_ollama(consolidation_prompt)
    return final_aspects


if __name__ == "__main__":
    final_product_aspects = run_aspect_discovery(all_reviews)
    
    print("\n" + "="*50)
    print("✅ FINAL CONSOLIDATED ASPECTS:")
    print("="*50)
    if final_product_aspects:
        for i, aspect in enumerate(final_product_aspects):
            print(f"{i+1}. {aspect}")
    else:
        print("Failed to generate final aspects.")

In [None]:
final_product_aspects = run_aspect_discovery(all_reviews)

In [None]:
print("\n" + "="*50)
print("✅ FINAL CONSOLIDATED ASPECTS:")
print("="*50)
if final_product_aspects:
    for i, aspect in enumerate(final_product_aspects):
        print(f"{i+1}. {aspect}")
else:
    print("Failed to generate final aspects.")