# Mental Health Sentiment Analyzer

**Student:** Marco Landeo (mlandeo@stevens.edu)

This notebook performs sentiment analysis on text data to identify mental health-related sentiment trends.


In [1]:
# Import required modules
from text_dataset import TextDataset
from sentiment_analyzer import SentimentAnalyzer
from utils import calculate_accuracy, generate_report
import matplotlib.pyplot as plt
import pandas as pd

print("All modules imported successfully!")


All modules imported successfully!


## Step 1: Load and Preprocess Data


In [2]:
# Load dataset with exception handling
try:
    dataset = TextDataset('sample_data.csv')
    print(f"Dataset loaded: {dataset}")
    print(f"Number of texts: {len(dataset.raw_texts)}")
except FileNotFoundError as e:
    print(f"Error: {e}")
    print("Please ensure sample_data.csv exists in the current directory.")
except ValueError as e:
    print(f"Error: {e}")


Dataset loaded: TextDataset(file_path='sample_data.csv', num_texts=10)
Number of texts: 10


In [3]:
# Preprocess the text data
dataset.preprocess()
print(f"Preprocessed {len(dataset.cleaned_texts)} texts")
print(f"Sample cleaned text: {dataset.cleaned_texts[0]}")


Preprocessed 10 texts
Sample cleaned text: i feel great today and everything is wonderful


## Step 2: Perform Sentiment Analysis


In [4]:
# Create SentimentAnalyzer instance (composition relationship)
analyzer = SentimentAnalyzer(dataset)
print(f"Analyzer created: {analyzer}")

# Analyze all texts (uses for loop internally)
analyzer.analyze_all()
print(f"Analysis complete! Analyzed {len(analyzer.results)} texts")


Analyzer created: SentimentAnalyzer(dataset=TextDataset(file_path='sample_data.csv', num_texts=10), analyzed_texts=0)
Analysis complete! Analyzed 10 texts


In [5]:
# Display sentiment counts
print("Sentiment Distribution:")
for sentiment, count in analyzer.sentiment_counts.items():
    print(f"  {sentiment}: {count}")


Sentiment Distribution:
  positive: 4
  neutral: 2
  negative: 4


## Step 3: Visualize Results


In [6]:
# Create visualizations
analyzer.visualize_results('sentiment_distribution.png')
print("Visualization saved!")


Visualization saved to sentiment_distribution.png
Visualization saved!


## Step 4: Generate Report


In [7]:
# Generate and save report
try:
    generate_report(analyzer, 'sentiment_report.json')
    print("Report generated successfully!")
except ValueError as e:
    print(f"Error generating report: {e}")


Report saved to sentiment_report.json
Report generated successfully!


## Step 5: Calculate Accuracy (if ground truth available)


In [8]:
# Example accuracy calculation
# In a real scenario, you would have ground truth labels
predictions = [result['sentiment'] for result in analyzer.results]
# For demonstration, we'll use the predictions as "actual" (in real use, get from dataset)
# This is just to demonstrate the function works
actual = predictions.copy()  # In practice, load from dataset

try:
    accuracy = calculate_accuracy(predictions, actual)
    print(f"Accuracy: {accuracy:.2%}")
except ValueError as e:
    print(f"Error calculating accuracy: {e}")


Accuracy: 100.00%


## Step 6: Interactive Testing (While Loop)


In [None]:
# Interactive sentiment analysis (while loop example)
# User can test their own sentences
# NOTE: Make sure to run the previous cells first to create the 'analyzer' object!

# Check if analyzer exists (from previous cells)
if 'analyzer' not in globals():
    print("ERROR: 'analyzer' object not found!")
    print("Please run the previous cells (especially Step 2) to create the analyzer first.")
    print("The analyzer is created in the cell: '## Step 2: Perform Sentiment Analysis'")
else:
    print("Interactive Sentiment Analysis")
    print("Type 'quit' to exit")
    print("-" * 40)
    
    user_input = ""
    while user_input.lower() != 'quit':
        user_input = input("Enter a sentence to analyze (or 'quit' to exit): ")
        
        if user_input.lower() == 'quit':
            break
        
        if user_input.strip():  # If statement to check if input is not empty
            sentiment = analyzer.analyze_sentiment(user_input)
            print(f"Sentiment: {sentiment}")
            print("-" * 40)
        else:
            print("Please enter a valid sentence.")
            print("-" * 40)
    
    print("Exiting interactive mode.")


Interactive Sentiment Analysis
Type 'quit' to exit
----------------------------------------


Enter a sentence to analyze (or 'quit' to exit):  I feel terribly sad


Sentiment: negative
----------------------------------------


Enter a sentence to analyze (or 'quit' to exit):  I feel nuetral


Sentiment: neutral
----------------------------------------


Enter a sentence to analyze (or 'quit' to exit):  I feel weird


Sentiment: neutral
----------------------------------------


Enter a sentence to analyze (or 'quit' to exit):  I feel happy sometimes


Sentiment: positive
----------------------------------------


Enter a sentence to analyze (or 'quit' to exit):  quit


Exiting interactive mode.


## Step 7: Demonstrate Advanced Features


In [None]:
# Demonstrate generator function
print("Using generator to process batches:")
batch_count = 0
for batch in dataset.batch_generator(batch_size=3):
    batch_count += 1
    print(f"Batch {batch_count}: {len(batch)} texts")
    if batch_count >= 3:  # Limit output
        break


In [None]:
# Demonstrate operator overloading (__add__)
print("Demonstrating operator overloading:")
print(f"Original dataset: {len(dataset.raw_texts)} texts")

# Create a second small dataset for merging
try:
    # Create a temporary second dataset
    import tempfile
    with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f:
        f.write("text\n")
        f.write("Additional text for merging\n")
        temp_file = f.name
    
    dataset2 = TextDataset(temp_file)
    merged_dataset = dataset + dataset2
    print(f"Merged dataset: {len(merged_dataset.raw_texts)} texts")
    
    import os
    os.unlink(temp_file)
except Exception as e:
    print(f"Error in merging: {e}")


In [None]:
# Demonstrate word frequencies (mutable dict)
word_freq = dataset.get_word_frequencies()
print("Top 5 most frequent words:")
sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
for word, freq in sorted_words[:5]:
    print(f"  {word}: {freq}")


In [None]:
# Demonstrate immutable tuple
categories = dataset.get_sentiment_categories()
print(f"Sentiment categories (immutable tuple): {categories}")
print(f"Type: {type(categories)}")


In [None]:
# Display summary statistics
stats = analyzer.get_summary_statistics()
print("Summary Statistics:")
for key, value in stats.items():
    if isinstance(value, float):
        print(f"  {key}: {value:.2f}")
    else:
        print(f"  {key}: {value}")
