In [1]:
# Install and import all required libraries
!pip install -q selenium beautifulsoup4 pandas scikit-learn nltk requests

[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m9.7/9.7 MB[0m [31m31.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m512.0/512.0 kB[0m [31m18.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
import os
import sys
import json
import re
import time
import random
import numpy as np
import pandas as pd
from typing import Dict, List, Tuple
from collections import defaultdict
import warnings

warnings.filterwarnings('ignore')

# --- Package Installation ---
try:
    import pandas
    import nltk
    from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
    import requests
    from bs4 import BeautifulSoup
    import cloudscraper
except ImportError:
    print("Installing required packages...")
    import subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q",
                           "pandas", "nltk", "vaderSentiment", "requests",
                           "beautifulsoup4", "lxml", "cloudscraper"])
    print("Packages installed successfully.")
    import pandas
    import nltk
    from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
    import requests
    from bs4 import BeautifulSoup
    import cloudscraper

# Download NLTK resources
nltk.download('stopwords', quiet=True)
nltk.download('vader_lexicon', quiet=True)


# ============================================================================
# STAGE 1: ADVANCED MULTI-METHOD SCRAPER
# ============================================================================

class ProductScraper:
    """
    Advanced scraper with multiple bypass methods:
    1. Cloudscraper (bypasses basic Cloudflare protection)
    2. Enhanced headers with cookies
    3. Mobile user agent fallback
    4. API endpoint scraping
    """

    def __init__(self):
        # Method 1: CloudScraper (best for Cloudflare)
        self.scraper = cloudscraper.create_scraper(
            browser={
                'browser': 'chrome',
                'platform': 'windows',
                'mobile': False
            }
        )

        # Method 2: Regular requests with enhanced headers
        self.session = requests.Session()

        self.desktop_agents = [
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:122.0) Gecko/20100101 Firefox/122.0'
        ]

        self.mobile_agents = [
            'Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1',
            'Mozilla/5.0 (Linux; Android 13; SM-S911B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Mobile Safari/537.36'
        ]

    def _get_headers(self, mobile=False):
        """Generate realistic headers"""
        agents = self.mobile_agents if mobile else self.desktop_agents

        headers = {
            'User-Agent': random.choice(agents),
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.9',
            'Accept-Encoding': 'gzip, deflate, br',
            'DNT': '1',
            'Connection': 'keep-alive',
            'Upgrade-Insecure-Requests': '1',
            'Sec-Fetch-Dest': 'document',
            'Sec-Fetch-Mode': 'navigate',
            'Sec-Fetch-Site': 'none',
            'Sec-Fetch-User': '?1',
            'Cache-Control': 'max-age=0',
            'TE': 'trailers'
        }

        # Add cookies to appear more legitimate
        if not mobile:
            headers['Cookie'] = 'session-id=262-1234567-1234567; ubid-acbin=262-1234567-1234567'

        return headers

    def _clean_text(self, text: str) -> str:
        if not text:
            return ""
        return " ".join(text.split()).strip()

    def _extract_product_name(self, soup) -> str:
        """Extract product name with multiple fallbacks"""
        selectors = [
            '#productTitle',
            'h1#title',
            'h1.product-title',
            'span#productTitle',
            'h1[class*="product"]'
        ]

        for selector in selectors:
            try:
                elem = soup.select_one(selector)
                if elem:
                    text = self._clean_text(elem.get_text())
                    if text and len(text) > 5:
                        return text
            except:
                continue

        # Last resort: find any h1 with product-like text
        try:
            all_h1 = soup.find_all('h1')
            for h1 in all_h1:
                text = self._clean_text(h1.get_text())
                if text and len(text) > 10:
                    return text
        except:
            pass

        return "Unknown Product"

    def _extract_price(self, soup) -> float:
        """Extract price with multiple methods"""
        # Method 1: Standard price selectors
        price_selectors = [
            'span.a-price-whole',
            'span.a-price span.a-offscreen',
            '#priceblock_ourprice',
            '#priceblock_dealprice',
            'span.a-color-price',
            '.a-price .a-offscreen'
        ]

        for selector in price_selectors:
            try:
                elems = soup.select(selector)
                for elem in elems:
                    text = elem.get_text() if hasattr(elem, 'get_text') else elem.text
                    text = re.sub(r'[‚Çπ$,\s]', '', text)
                    match = re.search(r'(\d+\.?\d*)', text)
                    if match:
                        price = float(match.group(1))
                        if 100 < price < 1000000:  # Sanity check
                            return price
            except:
                continue

        # Method 2: Search for price in scripts (JSON-LD)
        try:
            scripts = soup.find_all('script', type='application/ld+json')
            for script in scripts:
                try:
                    data = json.loads(script.string)
                    if 'offers' in data:
                        price = float(data['offers'].get('price', 0))
                        if price > 0:
                            return price
                except:
                    continue
        except:
            pass

        return 0.0

    def _extract_reviews_and_ratings(self, soup) -> Tuple[List[str], List[float]]:
        """Extract reviews with multiple strategies"""
        reviews = []
        ratings = []

        # Strategy 1: Standard review divs
        review_containers = [
            '[data-hook="review"]',
            '.review',
            '[id*="customer-review"]',
            '.a-section.review'
        ]

        for container_selector in review_containers:
            review_elements = soup.select(container_selector)

            for review_elem in review_elements:
                try:
                    # Extract review text
                    text_selectors = [
                        '[data-hook="review-body"]',
                        '.review-text',
                        '.review-text-content',
                        '[class*="review-text"]'
                    ]

                    review_text = None
                    for text_sel in text_selectors:
                        text_elem = review_elem.select_one(text_sel)
                        if text_elem:
                            review_text = self._clean_text(text_elem.get_text())
                            break

                    if not review_text or len(review_text) < 20:
                        continue

                    # Extract rating
                    rating_selectors = [
                        '[data-hook="review-star-rating"]',
                        '.review-rating',
                        'i[class*="star"]',
                        '[class*="star-rating"]'
                    ]

                    rating = 3.0  # default
                    for rating_sel in rating_selectors:
                        rating_elem = review_elem.select_one(rating_sel)
                        if rating_elem:
                            rating_text = rating_elem.get_text() if hasattr(rating_elem, 'get_text') else str(rating_elem)

                            # Try different rating formats
                            patterns = [
                                r'(\d+\.?\d*)\s*(?:out\s*of|stars?)',
                                r'(\d+\.?\d*)\s*stars?',
                                r'star-(\d+)',
                            ]

                            for pattern in patterns:
                                match = re.search(pattern, rating_text, re.IGNORECASE)
                                if match:
                                    rating = float(match.group(1))
                                    break
                            break

                    reviews.append(review_text)
                    ratings.append(rating)

                except Exception as e:
                    continue

            if reviews:  # If we found reviews, don't try other selectors
                break

        return reviews, ratings

    def _try_mobile_version(self, url: str) -> Dict:
        """Try mobile Amazon site (often less protected)"""
        mobile_url = url.replace('www.amazon.in', 'm.amazon.in')
        print(f"  - Trying mobile version...")

        try:
            response = self.session.get(mobile_url, headers=self._get_headers(mobile=True), timeout=15)

            if response.status_code == 200:
                soup = BeautifulSoup(response.content, 'lxml')

                product_name = self._extract_product_name(soup)
                price = self._extract_price(soup)
                reviews, ratings = self._extract_reviews_and_ratings(soup)

                if reviews:
                    return {
                        'product_name': product_name,
                        'price': price,
                        'reviews': reviews,
                        'ratings': ratings
                    }
        except:
            pass

        return None

    def _try_reviews_page(self, url: str) -> Tuple[List[str], List[float]]:
        """Try dedicated reviews page"""
        try:
            # Extract ASIN
            asin_match = re.search(r'/dp/([A-Z0-9]{10})', url)
            if not asin_match:
                return [], []

            asin = asin_match.group(1)

            # Try different review URL formats
            review_urls = [
                f"https://www.amazon.in/product-reviews/{asin}/",
                f"https://www.amazon.in/{asin}/product-reviews/",
                f"https://m.amazon.in/product-reviews/{asin}/"
            ]

            for review_url in review_urls:
                try:
                    print(f"  - Trying reviews page: {review_url[:50]}...")
                    time.sleep(random.uniform(1, 2))

                    # Try with cloudscraper first
                    response = self.scraper.get(review_url, timeout=15)

                    if response.status_code == 200:
                        soup = BeautifulSoup(response.content, 'lxml')

                        # Check if blocked
                        if 'captcha' not in response.text.lower():
                            reviews, ratings = self._extract_reviews_and_ratings(soup)
                            if reviews:
                                return reviews, ratings
                except:
                    continue
        except:
            pass

        return [], []

    def scrape_product(self, url: str) -> Dict:
        """Main scraping method with multiple fallback strategies"""
        print(f"‚ñ∂ Attempting to scrape: {url[:60]}...")

        methods = [
            ('Cloudscraper (Desktop)', lambda: self._scrape_with_cloudscraper(url, False)),
            ('Enhanced Requests', lambda: self._scrape_with_requests(url)),
            ('Mobile Version', lambda: self._try_mobile_version(url)),
            ('Reviews Page Direct', lambda: self._scrape_reviews_only(url))
        ]

        for method_name, method_func in methods:
            try:
                print(f"  - Method: {method_name}")
                result = method_func()

                if result and result.get('reviews'):
                    print(f"  - ‚úì Success with {method_name}: {len(result['reviews'])} reviews")
                    return result

                time.sleep(random.uniform(2, 4))

            except Exception as e:
                print(f"  - {method_name} failed: {str(e)[:50]}")
                continue

        print(f"  - ‚õî All methods failed for this URL")
        return None

    def _scrape_with_cloudscraper(self, url: str, mobile: bool = False) -> Dict:
        """Scrape using cloudscraper"""
        response = self.scraper.get(url, timeout=20)

        if response.status_code != 200:
            return None

        soup = BeautifulSoup(response.content, 'lxml')

        # Check if blocked
        page_text = soup.get_text().lower()
        if 'captcha' in page_text or 'robot' in page_text[:500]:
            return None

        product_name = self._extract_product_name(soup)
        price = self._extract_price(soup)
        reviews, ratings = self._extract_reviews_and_ratings(soup)

        # If no reviews on main page, try reviews page
        if not reviews:
            reviews, ratings = self._try_reviews_page(url)

        if reviews:
            return {
                'product_name': product_name,
                'price': price,
                'reviews': reviews,
                'ratings': ratings
            }

        return None

    def _scrape_with_requests(self, url: str) -> Dict:
        """Scrape using enhanced requests"""
        time.sleep(random.uniform(1, 3))

        response = self.session.get(url, headers=self._get_headers(), timeout=15)

        if response.status_code != 200:
            return None

        soup = BeautifulSoup(response.content, 'lxml')

        if 'captcha' in response.text.lower():
            return None

        product_name = self._extract_product_name(soup)
        price = self._extract_price(soup)
        reviews, ratings = self._extract_reviews_and_ratings(soup)

        if not reviews:
            reviews, ratings = self._try_reviews_page(url)

        if reviews:
            return {
                'product_name': product_name,
                'price': price,
                'reviews': reviews,
                'ratings': ratings
            }

        return None

    def _scrape_reviews_only(self, url: str) -> Dict:
        """Focus on getting reviews only"""
        reviews, ratings = self._try_reviews_page(url)

        if reviews:
            # Get basic product info from main page if possible
            try:
                response = self.scraper.get(url, timeout=10)
                soup = BeautifulSoup(response.content, 'lxml')
                product_name = self._extract_product_name(soup)
                price = self._extract_price(soup)
            except:
                product_name = "Product"
                price = 0.0

            return {
                'product_name': product_name,
                'price': price,
                'reviews': reviews,
                'ratings': ratings
            }

        return None


# ============================================================================
# CSV Creation
# ============================================================================

def create_csv_from_data(products_data: List[Dict], filepath: str):
    """Creates a CSV file from the product data"""
    print(f"\n‚ñ∂ Creating CSV file at '{filepath}'...")
    rows = []
    for product in products_data:
        for review, rating in zip(product['reviews'], product['ratings']):
            rows.append({
                'product_name': product['product_name'],
                'price': product['price'],
                'rating': rating,
                'review_text': review
            })

    df = pd.DataFrame(rows)
    try:
        df.to_csv(filepath, index=False)
        print(f" ‚úì Successfully saved {len(df)} reviews to '{filepath}'")
        return df
    except Exception as e:
        print(f"  - Error saving CSV: {e}")
        return None


# ============================================================================
# STAGE 2: SENTIMENT ANALYSIS
# ============================================================================

class SentimentAnalyzer:
    def __init__(self):
        self.sia = SentimentIntensityAnalyzer()

    def get_compound_score(self, text: str) -> float:
        if not text:
            return 0.0
        return self.sia.polarity_scores(str(text))['compound']

    def predict(self, text: str) -> Dict:
        if not text:
            return {'sentiment': 'Neutral', 'confidence': 1.0, 'compound_score': 0.0}

        scores = self.sia.polarity_scores(str(text))
        compound_score = scores['compound']

        if compound_score >= 0.05:
            sentiment_label = 'Positive'
            confidence = scores['pos']
        elif compound_score <= -0.05:
            sentiment_label = 'Negative'
            confidence = scores['neg']
        else:
            sentiment_label = 'Neutral'
            confidence = scores['neu']

        return {
            'sentiment': sentiment_label,
            'confidence': float(confidence),
            'compound_score': float(compound_score)
        }

    def evaluate_on_data(self, products_data: List[Dict]) -> pd.DataFrame:
        print("\n‚ñ∂ Applying VADER sentiment analysis...")
        all_reviews = []
        all_ratings = []
        all_sentiments = []
        all_comp_scores = []
        product_names = []

        for product in products_data:
            for review, rating in zip(product['reviews'], product['ratings']):
                prediction = self.predict(review)
                all_reviews.append(review)
                all_ratings.append(rating)
                all_sentiments.append(prediction['sentiment'])
                all_comp_scores.append(prediction['compound_score'])
                product_names.append(product['product_name'])

        df = pd.DataFrame({
            'product': product_names,
            'review': all_reviews,
            'original_rating': all_ratings,
            'vader_sentiment': all_sentiments,
            'vader_compound_score': all_comp_scores
        })

        print(f" ‚úì Processed {len(df)} total reviews.")
        if len(df) > 0:
            print("\n ‚úì Sentiment Distribution:")
            print(df['vader_sentiment'].value_counts())

        return df


# ============================================================================
# STAGE 3: ASPECT-BASED ANALYSIS
# ============================================================================

class AspectAnalyzer:
    def __init__(self, sentiment_analyzer: SentimentAnalyzer):
        self.analyzer = sentiment_analyzer
        self.aspect_keywords = {
            'battery': ['battery', 'charge', 'charging', 'power', 'backup', 'mah', 'drain'],
            'camera': ['camera', 'photo', 'picture', 'lens', 'zoom', 'video', 'selfie'],
            'screen': ['screen', 'display', 'brightness', 'amoled', 'refresh'],
            'performance': ['performance', 'speed', 'fast', 'slow', 'lag', 'processor', 'ram'],
            'design': ['design', 'look', 'build', 'weight', 'premium', 'slim'],
            'price': ['price', 'cost', 'expensive', 'cheap', 'value', 'worth'],
            'sound': ['sound', 'audio', 'speaker', 'volume', 'music'],
            'software': ['software', 'os', 'ui', 'update', 'bloatware', 'apps']
        }

    def analyze_aspects(self, product_data: Dict) -> Dict[str, Dict]:
        reviews = product_data.get('reviews', [])
        if not reviews:
            return {}

        aspect_scores = defaultdict(list)
        aspect_counts = defaultdict(int)

        for review in reviews:
            review_lower = str(review).lower()
            compound_score = self.analyzer.get_compound_score(review)

            for aspect, keywords in self.aspect_keywords.items():
                if any(kw in review_lower for kw in keywords):
                    aspect_scores[aspect].append(compound_score)
                    aspect_counts[aspect] += 1

        aspect_analysis = {}
        for aspect, scores in aspect_scores.items():
            if scores:
                avg_score = np.mean(scores)

                if avg_score >= 0.05:
                    sentiment_label = 'Positive'
                elif avg_score <= -0.05:
                    sentiment_label = 'Negative'
                else:
                    sentiment_label = 'Neutral'

                aspect_analysis[aspect] = {
                    'avg_compound_score': float(avg_score),
                    'sentiment': sentiment_label,
                    'mention_count': aspect_counts[aspect],
                }

        return aspect_analysis


# ============================================================================
# STAGE 4: LLM RECOMMENDATIONS
# ============================================================================

class GeminiAnalyzer:
    def __init__(self, api_key: str = None):
        api_key=""      #ENTER YOUR GEMINI API KEY HERE : get your gemini api key from here : https://aistudio.google.com/app/api-keys
        self.api_key = api_key or os.environ.get("GEMINI_API_KEY", "")
        if self.api_key:
            self.api_url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?key={self.api_key}"

    def get_recommendations(self, product_data: Dict, aspect_data: Dict) -> str:
        if not self.api_key:
            return "‚ö† No Gemini API key provided. Set GEMINI_API_KEY environment variable."

        print("\n‚ñ∂ Generating AI recommendations...")

        try:
            aspect_summary = []
            if aspect_data:
                sorted_aspects = sorted(aspect_data.items(), key=lambda x: x[1]['avg_compound_score'])
                for aspect, data in sorted_aspects:
                    aspect_summary.append(
                        f"  - {aspect.upper()}: {data['sentiment']} (Score: {data['avg_compound_score']:.3f}, {data['mention_count']} mentions)"
                    )

            prompt = f"""You are a tech product expert. Analyze this smartphone and recommend 3 BETTER alternatives currently available in India.

CURRENT PRODUCT:
Name: {product_data['product_name'][:100]}
Price: ‚Çπ{product_data['price']:,.0f}

USER REVIEW SENTIMENT ANALYSIS:
{chr(10).join(aspect_summary) if aspect_summary else 'Limited review data'}

TASK:
Find 3 similar smartphones that are BETTER alternatives. For each alternative, provide:
1. Product name and current price in India (‚Çπ)
2. 2-3 lines explaining WHY it's better (specifically address the weak points above, especially camera if negative)
3. Key improvements over the current product

Format your response clearly with product names as headers."""

            payload = {
                "contents": [{"parts": [{"text": prompt}]}],
                "generationConfig": {
                    "temperature": 0.7,
                    "topK": 40,
                    "topP": 0.95,
                    "maxOutputTokens": 2048,
                },
                "tools": [{"google_search": {}}]
            }

            response = requests.post(
                self.api_url,
                headers={'Content-Type': 'application/json'},
                json=payload,
                timeout=90
            )
            response.raise_for_status()

            result = response.json()

            # Debug: Print the full response structure
            print(f"  - API Response Status: {response.status_code}")

            # Extract text from response
            candidates = result.get('candidates', [])
            if not candidates:
                print(f"  - ‚ö† No candidates in response")
                return "‚ö† No recommendations generated (API returned empty response)"

            content = candidates[0].get('content', {})
            parts = content.get('parts', [])

            if not parts:
                print(f"  - ‚ö† No parts in content")
                return "‚ö† No recommendations generated (empty content)"

            # Combine all text parts
            full_text = ""
            for part in parts:
                if 'text' in part:
                    full_text += part['text'] + "\n"

            if not full_text.strip():
                print(f"  - ‚ö† Empty text in response")
                # Print raw response for debugging
                print(f"  - Raw response: {json.dumps(result, indent=2)[:500]}")
                return "‚ö† No recommendations generated (text extraction failed)"

            print(f"  - ‚úì Generated {len(full_text)} characters of recommendations")
            return full_text.strip()

        except requests.exceptions.HTTPError as e:
            error_detail = e.response.text if hasattr(e, 'response') else str(e)
            print(f"  - ‚õî API Error: {error_detail[:200]}")
            return f"‚ö† API Error: {error_detail[:200]}"
        except Exception as e:
            print(f"  - ‚õî Error: {str(e)}")
            return f"‚ö† Error generating recommendations: {str(e)[:200]}"


# ============================================================================
# MAIN EXECUTION
# ============================================================================

def main():
    print("=" * 80)
    print("ENHANCED AMAZON PRODUCT SCRAPER & ANALYZER")
    print("=" * 80)

    PRODUCT_URL = input("\nEnter Amazon India product URL: ").strip()

    if not PRODUCT_URL:
        print("‚ùå No URL provided")
        return

    # --- STAGE 1: Web Scraping ---
    print("\n" + "=" * 80)
    print("STAGE 1: MULTI-METHOD WEB SCRAPING")
    print("=" * 80)

    scraper = ProductScraper()
    product_data = scraper.scrape_product(PRODUCT_URL)

    if not product_data or not product_data.get('reviews'):
        print("\n‚ùå Failed to scrape reviews from all methods.")
        print("\nPossible solutions:")
        print("1. Use a VPN or different IP address")
        print("2. Try a different product URL")
        print("3. Run the script from a cloud service (Google Colab, etc.)")
        print("4. Use Amazon Product Advertising API (requires approval)")
        return

    products_list = [product_data]
    df = create_csv_from_data(products_list, 'scraped_product_data.csv')

    # --- STAGE 2: Sentiment Analysis ---
    print("\n" + "=" * 80)
    print("STAGE 2: SENTIMENT ANALYSIS")
    print("=" * 80)

    sentiment_analyzer = SentimentAnalyzer()
    sentiment_df = sentiment_analyzer.evaluate_on_data(products_list)

    # --- STAGE 3: Aspect Analysis ---
    print("\n" + "=" * 80)
    print("STAGE 3: ASPECT-BASED ANALYSIS")
    print("=" * 80)

    aspect_analyzer = AspectAnalyzer(sentiment_analyzer)
    aspects = aspect_analyzer.analyze_aspects(product_data)

    if aspects:
        print(f"\n‚ñ∂ Analysis for: {product_data['product_name']}")
        print(f"  {'Aspect':<14} | {'Mentions':<8} | {'Score':<8} | {'Sentiment'}")
        print("  " + "-" * 50)

        for aspect, data in sorted(aspects.items(), key=lambda x: x[1]['mention_count'], reverse=True):
            emoji = 'üòä' if data['sentiment'] == 'Positive' else 'üòê' if data['sentiment'] == 'Neutral' else 'üòû'
            print(f"  {aspect.upper():<14} | {data['mention_count']:<8} | {data['avg_compound_score']:<8.3f} | {data['sentiment']} {emoji}")

    # --- STAGE 4: AI Recommendations ---
    print("\n" + "=" * 80)
    print("STAGE 4: AI RECOMMENDATIONS")
    print("=" * 80)

    gemini = GeminiAnalyzer()
    recommendations = gemini.get_recommendations(product_data, aspects)

    print("\n" + "‚îÄ" * 80)
    print("üì± ALTERNATIVE PRODUCT RECOMMENDATIONS:")
    print("‚îÄ" * 80)
    print(recommendations)
    print("‚îÄ" * 80)

    print(f"\n{'=' * 80}")
    print(f"‚úÖ ANALYSIS COMPLETE!")
    print(f"   ‚Ä¢ Scraped: {len(product_data.get('reviews', []))} reviews")
    print(f"   ‚Ä¢ Analyzed: {len(aspects)} product aspects")
    print(f"   ‚Ä¢ CSV saved: scraped_product_data.csv")
    print(f"{'=' * 80}")


if __name__ == "__main__":
    main()

ENHANCED AMAZON PRODUCT SCRAPER & ANALYZER

Enter Amazon India product URL: https://www.amazon.in/Galaxy-S24-FE-Graphite-Processor/dp/B0F1YQDGL9/ref=pd_sbs_d_sccl_1_4/521-6229103-3245537?pd_rd_w=KM0q9&content-id=amzn1.sym.6d240404-f8ea-42f5-98fe-bf3c8ec77086&pf_rd_p=6d240404-f8ea-42f5-98fe-bf3c8ec77086&pf_rd_r=18FFJYGBX5BYN57AC59C&pd_rd_wg=wsTmc&pd_rd_r=09ff76bc-efc4-42a9-ba7a-a14c7682e430&pd_rd_i=B0F1YQDGL9&psc=1

STAGE 1: MULTI-METHOD WEB SCRAPING
‚ñ∂ Attempting to scrape: https://www.amazon.in/Galaxy-S24-FE-Graphite-Processor/dp/B0...
  - Method: Cloudscraper (Desktop)
  - ‚úì Success with Cloudscraper (Desktop): 8 reviews

‚ñ∂ Creating CSV file at 'scraped_product_data.csv'...
 ‚úì Successfully saved 8 reviews to 'scraped_product_data.csv'

STAGE 2: SENTIMENT ANALYSIS

‚ñ∂ Applying VADER sentiment analysis...
 ‚úì Processed 8 total reviews.

 ‚úì Sentiment Distribution:
vader_sentiment
Positive    7
Negative    1
Name: count, dtype: int64

STAGE 3: ASPECT-BASED ANALYSIS

‚ñ∂ Analys