In [1]:
import pandas as pd
from textblob import TextBlob
import nltk

# Download necessary NLTK data for TextBlob
nltk.download('punkt', quiet=True)

# Step 2: Load a sample dataset of reviews
def create_sample_dataset():
    """Create a sample dataset of product reviews"""
    reviews = [
        "This product is absolutely amazing! I love it.",
        "Worst purchase ever. Completely disappointed.",
        "It's okay, not great but not terrible either.",
        "Excellent quality and fast delivery. Highly recommended!",
        "The product broke after 2 days. Very poor quality.",
        "Good value for money. Satisfied with my purchase.",
        "Terrible customer service and the product doesn't work.",
        "Best thing I've bought this year! Works perfectly.",
        "Average product, nothing special.",
        "I'm so happy with this purchase! Exceeded my expectations."
    ]

    return pd.DataFrame({
        'review_text': reviews,
        'review_id': range(1, len(reviews) + 1)
    })

# Step 3: Preprocess text (convert to lowercase)
def preprocess_text(text):
    """Basic text preprocessing"""
    return str(text).lower().strip()

# Step 4-5: Apply TextBlob sentiment analyzer and extract polarity/subjectivity
def analyze_sentiment(text):
    """Analyze sentiment using TextBlob"""
    analysis = TextBlob(text)

    # Polarity: [-1, 1] where -1 is negative, 1 is positive
    # Subjectivity: [0, 1] where 0 is objective, 1 is subjective
    polarity = analysis.sentiment.polarity
    subjectivity = analysis.sentiment.subjectivity

    return polarity, subjectivity

# Step 6: Classify sentiment based on polarity
def classify_sentiment(polarity):
    """Classify sentiment into categories"""
    if polarity > 0.1:
        return 'Positive'
    elif polarity < -0.1:
        return 'Negative'
    else:
        return 'Neutral'

# Step 7: Analyze overall sentiment
def analyze_overall_sentiment(df):
    """Calculate overall sentiment statistics"""
    total_reviews = len(df)
    positive_count = len(df[df['sentiment'] == 'Positive'])
    negative_count = len(df[df['sentiment'] == 'Negative'])
    neutral_count = len(df[df['sentiment'] == 'Neutral'])

    avg_polarity = df['polarity'].mean()
    avg_subjectivity = df['subjectivity'].mean()

    return {
        'Total Reviews': total_reviews,
        'Positive Reviews': positive_count,
        'Negative Reviews': negative_count,
        'Neutral Reviews': neutral_count,
        'Positive %': (positive_count / total_reviews) * 100,
        'Negative %': (negative_count / total_reviews) * 100,
        'Neutral %': (neutral_count / total_reviews) * 100,
        'Average Polarity': avg_polarity,
        'Average Subjectivity': avg_subjectivity
    }

# Main execution
def main():
    print("=" * 60)
    print("SENTIMENT ANALYSIS USING TEXTBLOB")
    print("=" * 60)

    # Load dataset
    print("\nStep 2: Loading sample dataset...")
    df = create_sample_dataset()
    print(f"Loaded {len(df)} reviews")

    # Preprocess text
    print("\nStep 3: Preprocessing text (converting to lowercase)...")
    df['cleaned_text'] = df['review_text'].apply(preprocess_text)

    # Apply sentiment analysis
    print("\nStep 4-5: Applying TextBlob sentiment analysis...")
    df[['polarity', 'subjectivity']] = df['cleaned_text'].apply(
        lambda x: pd.Series(analyze_sentiment(x))
    )

    # Classify sentiment
    print("\nStep 6: Classifying sentiments...")
    df['sentiment'] = df['polarity'].apply(classify_sentiment)

    # Display results
    print("\n" + "=" * 60)
    print("SENTIMENT ANALYSIS RESULTS")
    print("=" * 60)

    print("\nDetailed Review Analysis:")
    for idx, row in df.iterrows():
        print(f"\nReview {row['review_id']}:")
        print(f"  Text: {row['review_text'][:50]}...")
        print(f"  Polarity: {row['polarity']:.3f}")
        print(f"  Subjectivity: {row['subjectivity']:.3f}")
        print(f"  Sentiment: {row['sentiment']}")

    # Analyze overall sentiment
    print("\n" + "=" * 60)
    print("OVERALL SENTIMENT ANALYSIS")
    print("=" * 60)

    overall_stats = analyze_overall_sentiment(df)

    for key, value in overall_stats.items():
        if '%' in key:
            print(f"{key}: {value:.2f}%")
        elif key in ['Average Polarity', 'Average Subjectivity']:
            print(f"{key}: {value:.3f}")
        else:
            print(f"{key}: {value}")

    # Step 8: Save results
    print("\nStep 8: Saving results to CSV files...")

    # Save detailed results
    detailed_df = df[['review_id', 'review_text', 'cleaned_text',
                     'polarity', 'subjectivity', 'sentiment']]
    detailed_df.to_csv('sentiment_analysis_detailed.csv', index=False)
    print("âœ“ Detailed results saved to 'sentiment_analysis_detailed.csv'")

    # Save summary statistics
    summary_df = pd.DataFrame([overall_stats])
    summary_df.to_csv('sentiment_analysis_summary.csv', index=False)
    print("âœ“ Summary statistics saved to 'sentiment_analysis_summary.csv'")

    # Display color-coded summary
    print("\n" + "=" * 60)
    print("COLOR-CODED SENTIMENT SUMMARY")
    print("=" * 60)

    colors = {'Positive': 'ðŸŸ¢', 'Negative': 'ðŸ”´', 'Neutral': 'ðŸŸ¡'}

    print("\nSentiment Distribution:")
    for sentiment in ['Positive', 'Negative', 'Neutral']:
        count = len(df[df['sentiment'] == sentiment])
        percentage = (count / len(df)) * 100
        bar = colors[sentiment] * int(percentage / 5)  # Scale for visualization
        print(f"{sentiment}: {bar} {percentage:.1f}% ({count} reviews)")

    print("\n" + "=" * 60)
    print("Analysis complete!")
    print("=" * 60)

if __name__ == "__main__":
    main()

SENTIMENT ANALYSIS USING TEXTBLOB

Step 2: Loading sample dataset...
Loaded 10 reviews

Step 3: Preprocessing text (converting to lowercase)...

Step 4-5: Applying TextBlob sentiment analysis...

Step 6: Classifying sentiments...

SENTIMENT ANALYSIS RESULTS

Detailed Review Analysis:

Review 1:
  Text: This product is absolutely amazing! I love it....
  Polarity: 0.625
  Subjectivity: 0.750
  Sentiment: Positive

Review 2:
  Text: Worst purchase ever. Completely disappointed....
  Polarity: -0.875
  Subjectivity: 0.875
  Sentiment: Negative

Review 3:
  Text: It's okay, not great but not terrible either....
  Polarity: 0.200
  Subjectivity: 0.750
  Sentiment: Positive

Review 4:
  Text: Excellent quality and fast delivery. Highly recomm...
  Polarity: 0.467
  Subjectivity: 0.713
  Sentiment: Positive

Review 5:
  Text: The product broke after 2 days. Very poor quality....
  Polarity: -0.520
  Subjectivity: 0.780
  Sentiment: Negative

Review 6:
  Text: Good value for money. Satisfied w