In [1]:

import spacy
import re
from collections import Counter

# --- 1. Setup and Sample Data ---
try:
    nlp = spacy.load("en_core_web_sm")
    print("spaCy model 'en_core_web_sm' loaded successfully.")
except OSError:
    print("ERROR: spaCy model 'en_core_web_sm' not found.")
    print("Please run: python -m spacy download en_core_web_sm")
    exit()

# Sample Amazon-style product reviews
reviews = [
    "I love the new Sonic X headphones. The battery life lasts forever and the sound quality is amazing. Highly recommend this product from TechCorp.",
    "This Ultra-Wipe Cleaner arrived broken and leaked everywhere. The packaging was terrible. I am extremely disappointed and will return it.",
    "The Cozy Blanket is the best purchase of the year! It's super soft and the crimson color is vibrant. A truly wonderful experience.",
    "My delivery of the Alpha Laptop was delayed by two weeks, which was frustrating. The laptop itself is fine, but the service was terrible.",
    "Excellent value for the Stellar Mixer. It makes baking so much easier. No complaints about the quality or performance.",
]

# --- 2. Rule-Based Sentiment Keywords ---
# Define simple lists of keywords for sentiment scoring
POSITIVE_WORDS = ["love", "amazing", "recommend", "best", "super", "vibrant", "wonderful", "excellent", "easier", "no complaints", "great", "fantastic"]
NEGATIVE_WORDS = ["broken", "leaked", "terrible", "disappointed", "return", "frustrating", "delayed", "worst"]

# --- 3. Analysis Function ---

def analyze_review(text):
    """
    Performs NER using spaCy and rule-based sentiment analysis.
    """
    doc = nlp(text)
    
    # 3.1. Named Entity Recognition (NER)
    
    # Extract entities. We are interested in potential brands/products (ORG, PRODUCT)
    entities = {}
    
    # Use spaCy's built-in entities
    for ent in doc.ents:
        # Check if the entity type is relevant for product/brand names
        if ent.label_ in ['ORG', 'PRODUCT', 'GPE']:
            # GPE (Geo-Political Entity) sometimes catches brand names
            # ORG (Organization) is often used for brands/companies
            entities[ent.text] = ent.label_
            
    # Simple rule for potential custom brand/product names:
    # Look for capitalized words not at the start of a sentence that aren't already identified
    potential_products = set()
    for token in doc:
        # Check if the token is Title Case (e.g., Ultra-Wipe, Cozy Blanket)
        # and not a stop word or punctuation
        if token.pos_ not in ('PUNCT', 'SPACE') and token.text[0].isupper() and token.text.lower() not in [e.lower() for e in entities]:
             potential_products.add(token.text)

    # 3.2. Rule-Based Sentiment Analysis
    
    positive_count = 0
    negative_count = 0
    
    # Tokenize and score based on keyword lists
    for token in doc:
        token_text = token.text.lower()
        if token_text in POSITIVE_WORDS:
            positive_count += 1
        elif token_text in NEGATIVE_WORDS:
            negative_count += 1
            
    # Determine overall sentiment
    if positive_count > negative_count:
        sentiment = "Positive"
    elif negative_count > positive_count:
        sentiment = "Negative"
    else:
        sentiment = "Neutral/Mixed"
        
    return {
        "text": text,
        "sentiment": sentiment,
        "positive_score": positive_count,
        "negative_score": negative_count,
        "spaCy_entities": entities,
        "potential_products": potential_products
    }

# --- 4. Execution and Output ---
print("-" * 50)
print("PRODUCT REVIEW NLP ANALYSIS")
print("-" * 50)

for i, review in enumerate(reviews):
    print(f"\nReview {i+1}:")
    results = analyze_review(review)
    
    print(f"  Text: {results['text']}")
    print(f"  --- NER Results ---")
    
    # Print the combined list of extracted entities
    product_brands = {**results['spaCy_entities']}
    # Add potential products to the display dictionary
    for p in results['potential_products']:
        product_brands[p] = "Potential Product"

    # Separate names and types for cleaner output
    names = ', '.join(product_brands.keys()) if product_brands else 'None Found'
    
    print(f"  Extracted Entities: {names}")
    
    print(f"  --- Sentiment Analysis (Rule-Based) ---")
    print(f"  Sentiment: {results['sentiment']} (Pos Score: {results['positive_score']}, Neg Score: {results['negative_score']})")

print("\n" + "-" * 50)


spaCy model 'en_core_web_sm' loaded successfully.
--------------------------------------------------
PRODUCT REVIEW NLP ANALYSIS
--------------------------------------------------

Review 1:
  Text: I love the new Sonic X headphones. The battery life lasts forever and the sound quality is amazing. Highly recommend this product from TechCorp.
  --- NER Results ---
  Extracted Entities: TechCorp, X, Highly, I, The, Sonic
  --- Sentiment Analysis (Rule-Based) ---
  Sentiment: Positive (Pos Score: 3, Neg Score: 0)

Review 2:
  Text: This Ultra-Wipe Cleaner arrived broken and leaked everywhere. The packaging was terrible. I am extremely disappointed and will return it.
  --- NER Results ---
  Extracted Entities: Wipe, Ultra, I, This, Cleaner, The
  --- Sentiment Analysis (Rule-Based) ---
  Sentiment: Negative (Pos Score: 0, Neg Score: 5)

Review 3:
  Text: The Cozy Blanket is the best purchase of the year! It's super soft and the crimson color is vibrant. A truly wonderful experience.
  ---