In [2]:
import json

# Load Yelp reviews from JSON file
with open("yelp_sample_preprocessed.json", "r") as file:
    yelp_data = json.load(file)

# Extract review texts
reviews = [entry["preprocessed_text"] for entry in yelp_data]  


In [3]:
import cohere

# Initialize Cohere API
co = cohere.Client("3OX8jQ6maGWiw4KCbiQn95SS0t8aaDrhzwy318Xk")  # Replace with your key

In [4]:

def clean_topics(topics):
    """Remove punctuation and unnecessary text from topic names."""
    return [topic.strip().rstrip(".") for topic in topics if len(topic) > 1]

In [5]:
def extract_main_topics(text):
    """Extracts exactly 3 main topics from the given text."""
    response = co.generate(
        model="command",
        prompt=f"""Analyze the following customer review and extract **exactly 3** key topics.
        - Topics should summarize the **core themes** of the review.
        - Ensure they are **concise and relevant** (e.g., 'Service' instead of 'Service quality').
        - Avoid generic words like 'good' or 'bad.'
        - Output only the topics as a **comma-separated list**.

        Review: "{text}"

        Main Topics:
        """,
        max_tokens=15,
        temperature=0.2,
        stop_sequences=["\n"]
    )

    try:
        topics = response.generations[0].text.strip().split(",")
        return clean_topics(topics)
    except Exception:
        return ["Error extracting topics"]


In [6]:
import re


def clean_topicsm(topics_with_scores):
    """Cleans extracted topics and ensures proper formatting."""
    cleaned_topics = {}
    
    for item in topics_with_scores:
        match = re.match(r"(.*)\((\d+)%\)", item.strip())  # Extract topic & score
        if match:
            topic, score = match.groups()
            cleaned_topics[topic.strip()] = int(score)
    
    return cleaned_topics

def extract_main_topicsm(text):
    """Extracts 3 main topics with relevance scores from the given text."""
    response = co.generate(
        model="command",
        prompt=f"""
        Analyze the following customer review and extract **exactly 3** key topics along with relevance scores.
        - Topics should summarize the **core themes** of the review.
        - Assign a relevance percentage (0-100%) to each topic, based on its importance in the review.
        - Format the response as: Topic1 (X%), Topic2 (Y%), Topic3 (Z%)

        Review: "{text}"

        Main Topics:
        """,
        max_tokens=50
    )
    
    try:
        topics = response.generations[0].text.strip().split(",")  # Get raw topics
        return clean_topicsm(topics)  # Extract and clean topics with scores
    except Exception:
        return {"Error extracting topics": 100}  # Default if extraction fails


In [7]:
def extract_subtopics(text, topic):
    """Extracts 1-3 concise, one-word subtopics for a given main topic."""
    response = co.generate(
        model="command",
        prompt=f"""For the topic '{topic}', extract **1 relevant subtopics** based on the review.
        - Each subtopic should be **one word** (e.g., 'Speed' instead of 'Fast Delivery').
        - Avoid generic words like 'good' or 'bad.'
        - Output only the subtopics as a **comma-separated list**.

        Review: "{text}"

        Subtopics:
        """,
        max_tokens=10,
        temperature=0.2,
        stop_sequences=["\n"]
    )

    try:
        subtopics = response.generations[0].text.strip().split(",")
        return clean_topics(subtopics[:3])  # Max 3 subtopics
    except Exception:
        return ["Error extracting subtopics"]

In [8]:
def process_review(text):
    """Processes a single review to extract topics, subtopics, and emotion."""
    main_topics = extract_main_topics(text)
    subtopics = {topic: extract_subtopics(text, topic) for topic in main_topics}

    return {
        "review": text,
        "topics": {
            "main": main_topics,
            "subtopics": subtopics
        }
    }


In [9]:
# Process and print results for the first 5 reviews
for i, review in enumerate(reviews[:5]):  
    result = process_review(review)
    
    print(f"\n**Review {i+1}**")
    print("Original Text:", result["review"])
    print("Extracted Topics:", result["topics"])


**Review 1**
Original Text: yum great local restaurant made name serf delicious food snickerdoodle pancake delicious specialize various kind egg benedict delicious
Extracted Topics: {'main': ['Food', 'Diversity', 'Specialization'], 'subtopics': {'Food': ['Taste', 'Variety', 'Egg Benedict'], 'Diversity': ['Diversity', 'Food', 'Pancakes'], 'Specialization': ['Specialization', 'Variety', 'Deliciousness']}}

**Review 2**
Original Text: soooo good great food great service rice bowl amazing wow kind people exeptional food would come
Extracted Topics: {'main': ['Service', 'Food Quality', 'Experience'], 'subtopics': {'Service': ['Speed', 'Kindness', 'Food Quality'], 'Food Quality': ['Quality', 'Service', 'Experience'], 'Experience': ['Service', 'Food', 'Experience']}}

**Review 3**
Original Text: went saturday afternoon everyone friendly informed special margoli cut hair great friendly gave best cut ive ever wasnt easy cause hadnt cut since fall atmosphere relaxing comparison another salon ma

In [10]:
import pandas as pd
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
import datetime

In [11]:
# Download VADER for sentiment analysis (only needed once)
nltk.download('vader_lexicon')

# Initialize Sentiment Analyzer
sia = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\smrit\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [12]:
def analyze_sentiment(text):
    """Uses VADER sentiment analysis to return a score from -100 to +100."""
    sentiment = sia.polarity_scores(text)  # Returns pos, neu, neg, and compound scores
    return int(sentiment["compound"] * 100)  # Scale to -100 to +100

In [13]:
def compute_adorescore(text, topics):
    """Computes Adorescore using weighted sentiment and topic relevance."""
    overall_sentiment = analyze_sentiment(text)
    
    topic_breakdown = {}
    weighted_sum = 0
    total_weight = sum(topics.values())  # Sum of topic relevance scores

    for topic, weight in topics.items():
        topic_sentiment = analyze_sentiment(topic)  # Get sentiment of topic itself
        weighted_score = (topic_sentiment + overall_sentiment) / 2  # Average topic + review sentiment
        weighted_score *= (weight / 100)  # Adjust by topic relevance

        topic_breakdown[topic] = round(weighted_score, 2)
        weighted_sum += weighted_score

    # Compute final Adorescore
    adorescore = round(weighted_sum / total_weight * 100) if total_weight > 0 else 0

    return {
        "adorescore": {
            "overall": adorescore,
            "breakdown": topic_breakdown
        },
        "timestamp": datetime.datetime.now().isoformat()
    }

In [14]:
for i, review in enumerate(reviews[:10]):  
    main_topics = extract_main_topicsm(review)  # Extract topics with relevance scores
    adorescore_data = compute_adorescore(review, main_topics)

    print(f"\n**Review {i+1}**")
    print("Original Text:", review)
    print("Extracted Topics:", main_topics)
    print("Adorescore:", adorescore_data["adorescore"]["overall"])
    print("Topic Breakdown:", adorescore_data["adorescore"]["breakdown"])
    print("Timestamp:", adorescore_data["timestamp"])



**Review 1**
Original Text: yum great local restaurant made name serf delicious food snickerdoodle pancake delicious specialize various kind egg benedict delicious
Extracted Topics: {'Food Quality': 70, 'Local Vibe': 20, 'Specialist Menu': 10, 'accounting for the largest percentage': 70}
Adorescore: 48
Topic Breakdown: {'Food Quality': 33.6, 'Local Vibe': 9.6, 'Specialist Menu': 4.8, 'accounting for the largest percentage': 33.6}
Timestamp: 2025-02-26T00:44:37.429892

**Review 2**
Original Text: soooo good great food great service rice bowl amazing wow kind people exeptional food would come
Extracted Topics: {'Food': 80, 'Service': 10, 'General Impression': 10}
Adorescore: 50
Topic Breakdown: {'Food': 38.8, 'Service': 4.85, 'General Impression': 5.95}
Timestamp: 2025-02-26T00:44:39.418319

**Review 3**
Original Text: went saturday afternoon everyone friendly informed special margoli cut hair great friendly gave best cut ive ever wasnt easy cause hadnt cut since fall atmosphere relaxin