# Mental Health Sentiment Analyzer

**Student:** Marco Landeo (mlandeo@stevens.edu)

This notebook performs sentiment analysis on text data to identify mental health-related sentiment trends.


In [1]:
# Import required modules
from text_dataset import TextDataset
from sentiment_analyzer import SentimentAnalyzer
from utils import calculate_accuracy, generate_report
import matplotlib.pyplot as plt
import pandas as pd

print("All modules imported successfully!")


All modules imported successfully!


## Step 1: Load and Preprocess Data


In [2]:
# Load dataset with exception handling
try:
    dataset = TextDataset('sample_data.csv')
    print(f"Dataset loaded: {dataset}")
    print(f"Number of texts: {len(dataset.raw_texts)}")
except FileNotFoundError as e:
    print(f"Error: {e}")
    print("Please ensure sample_data.csv exists in the current directory.")
except ValueError as e:
    print(f"Error: {e}")


Dataset loaded: TextDataset(file_path='sample_data.csv', num_texts=10)
Number of texts: 10


In [3]:
# Preprocess the text data
dataset.preprocess()
print(f"Preprocessed {len(dataset.cleaned_texts)} texts")
print(f"Sample cleaned text: {dataset.cleaned_texts[0]}")


Preprocessed 10 texts
Sample cleaned text: i feel great today and everything is wonderful


## Step 2: Perform Sentiment Analysis


In [None]:
# Create SentimentAnalyzer instance
analyzer = SentimentAnalyzer(dataset)
print(f"Analyzer created: {analyzer}")

# Analyze all texts
analyzer.analyze_all()
print(f"Analysis complete! Analyzed {len(analyzer.results)} texts")


Analyzer created: SentimentAnalyzer(dataset=TextDataset(file_path='sample_data.csv', num_texts=10), analyzed_texts=0)
Analysis complete! Analyzed 10 texts


In [5]:
# Display sentiment counts
print("Sentiment Distribution:")
for sentiment, count in analyzer.sentiment_counts.items():
    print(f"  {sentiment}: {count}")


Sentiment Distribution:
  positive: 4
  neutral: 3
  negative: 3


## Step 3: Visualize Results


In [6]:
# Create visualizations
analyzer.visualize_results('sentiment_distribution.png')
print("Visualization saved!")


Visualization saved to sentiment_distribution.png
Visualization saved!


## Step 4: Generate Report


In [7]:
# Generate and save report
try:
    generate_report(analyzer, 'sentiment_report.json')
    print("Report generated successfully!")
except ValueError as e:
    print(f"Error generating report: {e}")


Report saved to sentiment_report.json
Report generated successfully!


## Step 5: Calculate Accuracy (if ground truth available)


In [None]:
# Example accuracy calculation
# Note: This is a demonstration. In practice, you would load actual ground truth labels
# from your dataset. Here we create sample actual labels for demonstration purposes.
predictions = [result['sentiment'] for result in analyzer.results]

# Sample actual labels for demonstration (in real use, load from dataset)
# This creates a mix of correct and incorrect predictions to show the function works
actual = ['positive', 'neutral', 'negative', 'positive', 'neutral', 
          'negative', 'positive', 'negative', 'positive', 'negative']

try:
    accuracy = calculate_accuracy(predictions, actual)
    print(f"Accuracy: {accuracy:.2%}")
    print("Note: This uses sample labels for demonstration. Use real ground truth in practice.")
except ValueError as e:
    print(f"Error calculating accuracy: {e}")


Accuracy: 100.00%


## Step 6: Interactive Testing (While Loop)


In [None]:
# Interactive sentiment analysis (while loop example)
if 'analyzer' not in globals():
    print("Error: 'analyzer' object not found. Please run Step 2 first.")
else:
    print("Interactive Sentiment Analysis")
    print("Type 'quit' to exit")
    print("-" * 40)
    
    user_input = ""
    while user_input.lower() != 'quit':
        user_input = input("Enter a sentence to analyze (or 'quit' to exit): ")
        
        if user_input.lower() == 'quit':
            break
        
        if user_input.strip():
            sentiment = analyzer.analyze_sentiment(user_input)
            print(f"Sentiment: {sentiment}")
            print("-" * 40)
        else:
            print("Please enter a valid sentence.")
            print("-" * 40)
    
    print("Exiting interactive mode.")


Interactive Sentiment Analysis
Type 'quit' to exit
----------------------------------------


Enter a sentence to analyze (or 'quit' to exit):  quit


Exiting interactive mode.


## Step 7: Demonstrate Advanced Features


In [10]:
# Demonstrate generator function
print("Using generator to process batches:")
batch_count = 0
for batch in dataset.batch_generator(batch_size=3):
    batch_count += 1
    print(f"Batch {batch_count}: {len(batch)} texts")
    if batch_count >= 3:  # Limit output
        break


Using generator to process batches:
Batch 1: 3 texts
Batch 2: 3 texts
Batch 3: 3 texts


In [11]:
# Demonstrate operator overloading (__add__)
print("Demonstrating operator overloading:")
print(f"Original dataset: {len(dataset.raw_texts)} texts")

# Create a second small dataset for merging
try:
    # Create a temporary second dataset
    import tempfile
    with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f:
        f.write("text\n")
        f.write("Additional text for merging\n")
        temp_file = f.name
    
    dataset2 = TextDataset(temp_file)
    merged_dataset = dataset + dataset2
    print(f"Merged dataset: {len(merged_dataset.raw_texts)} texts")
    
    import os
    os.unlink(temp_file)
except Exception as e:
    print(f"Error in merging: {e}")


Demonstrating operator overloading:
Original dataset: 10 texts
Merged dataset: 11 texts


In [None]:
# Demonstrate word frequencies
word_freq = dataset.get_word_frequencies()
print("Top 5 most frequent words:")
sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
for word, freq in sorted_words[:5]:
    print(f"  {word}: {freq}")


Top 5 most frequent words:
  and: 5
  about: 4
  this: 3
  i: 2
  feel: 2


In [None]:
# Demonstrate tuple return type
categories = dataset.get_sentiment_categories()
print(f"Sentiment categories: {categories}")
print(f"Type: {type(categories)}")


Sentiment categories (immutable tuple): ('positive', 'neutral', 'negative')
Type: <class 'tuple'>


In [14]:
# Display summary statistics
stats = analyzer.get_summary_statistics()
print("Summary Statistics:")
for key, value in stats.items():
    if isinstance(value, float):
        print(f"  {key}: {value:.2f}")
    else:
        print(f"  {key}: {value}")


Summary Statistics:
  total_texts: 10
  positive_percentage: 40.00
  neutral_percentage: 30.00
  negative_percentage: 30.00
