In [1]:
import pandas as pd
import numpy as np
import json
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
import textwrap
import re

# For sentiment analysis
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Set plotly template
pio.templates.default = "plotly_white"

print("📚 Libraries loaded successfully!")


📚 Libraries loaded successfully!


In [2]:
# Load the Truth Social posts data
print("📊 Loading Truth Social posts data...")

df = pd.read_csv('../data/truth_social_posts_final_2025_cleaned.csv')
df['date_parsed'] = pd.to_datetime(df['date_parsed'])

print(f"✅ Loaded {len(df):,} posts")
print(f"📅 Date range: {df['date_parsed'].min()} to {df['date_parsed'].max()}")

# Initialize VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

print("🎭 Sentiment analyzer initialized!")

df.head()


📊 Loading Truth Social posts data...
✅ Loaded 3,492 posts
📅 Date range: 2025-01-01 10:48:00 to 2025-07-18 23:17:00
🎭 Sentiment analyzer initialized!


Unnamed: 0,speaker,handle,platform,post_url,image_url,deleted_flag,content_text,content_links,date_parsed,is_retweet,is_media_only
0,Donald Trump,@realDonaldTrump,Truth Social,https://truthsocial.com/@realDonaldTrump/posts...,https://media-cdn.factba.se/realdonaldtrump-tr...,False,,,2025-07-18 23:17:00,False,True
1,Donald Trump,@realDonaldTrump,Truth Social,https://truthsocial.com/@realDonaldTrump/posts...,https://media-cdn.factba.se/realdonaldtrump-tr...,False,,,2025-07-18 22:03:00,False,True
2,Donald Trump,@realDonaldTrump,Truth Social,https://truthsocial.com/@realDonaldTrump/posts...,https://media-cdn.factba.se/realdonaldtrump-tr...,False,We have fulfilled so many of our promises…✅One...,,2025-07-18 21:39:00,False,False
3,Donald Trump,@realDonaldTrump,Truth Social,https://truthsocial.com/@realDonaldTrump/posts...,https://media-cdn.factba.se/realdonaldtrump-tr...,False,RT@realDonaldTrumpEverybody should watch Sean ...,[{'url': 'https://truthsocial.com/@realDonaldT...,2025-07-18 20:51:00,False,False
4,Donald Trump,@realDonaldTrump,Truth Social,https://truthsocial.com/@realDonaldTrump/posts...,https://media-cdn.factba.se/realdonaldtrump-tr...,False,,,2025-07-18 20:46:00,False,True


In [3]:
# Prepare text for sentiment analysis (keep emojis for better sentiment detection)
def clean_text_for_sentiment(text):
    """Clean text for sentiment analysis - keeps emojis but removes noise"""
    if pd.isna(text) or text == '':
        return None
    
    # Remove RT prefix if present
    if text.startswith('RT@'):
        # Find the end of the RT attribution
        parts = text.split(' ', 1)
        if len(parts) > 1:
            text = parts[1]
        else:
            return None
    
    # Remove URLs but keep everything else including emojis
    text = re.sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', '', text)
    
    # Clean up extra whitespace
    text = re.sub(r'\s+', ' ', text).strip()
    
    # Only return text that has some content
    if len(text) < 3:
        return None
    
    return text

# Calculate sentiment scores using multiple methods
def calculate_sentiment_metrics(text):
    """Calculate sentiment using both VADER and TextBlob"""
    if not text:
        return {
            'vader_compound': np.nan, 'vader_pos': np.nan, 'vader_neu': np.nan, 'vader_neg': np.nan,
            'textblob_polarity': np.nan, 'textblob_subjectivity': np.nan,
            'sentiment_category': 'Unknown'
        }
    
    try:
        # VADER sentiment (good for social media)
        vader_scores = analyzer.polarity_scores(text)
        
        # TextBlob sentiment
        blob = TextBlob(text)
        textblob_polarity = blob.sentiment.polarity
        textblob_subjectivity = blob.sentiment.subjectivity
        
        # Determine overall sentiment category based on VADER compound score
        compound = vader_scores['compound']
        if compound >= 0.05:
            sentiment_category = 'Positive'
        elif compound <= -0.05:
            sentiment_category = 'Negative'
        else:
            sentiment_category = 'Neutral'
        
        return {
            'vader_compound': vader_scores['compound'],
            'vader_pos': vader_scores['pos'],
            'vader_neu': vader_scores['neu'],
            'vader_neg': vader_scores['neg'],
            'textblob_polarity': textblob_polarity,
            'textblob_subjectivity': textblob_subjectivity,
            'sentiment_category': sentiment_category
        }
    except Exception as e:
        print(f"Error analyzing sentiment: {e}")
        return {
            'vader_compound': np.nan, 'vader_pos': np.nan, 'vader_neu': np.nan, 'vader_neg': np.nan,
            'textblob_polarity': np.nan, 'textblob_subjectivity': np.nan,
            'sentiment_category': 'Error'
        }

print("🔧 Sentiment analysis functions defined!")


🔧 Sentiment analysis functions defined!


In [4]:
# Apply sentiment cleaning to all posts
print("🧹 Cleaning text for sentiment analysis...")
df['sentiment_text'] = df['content_text'].apply(clean_text_for_sentiment)

# Filter to posts with sentiment-analyzable text
sentiment_df = df[df['sentiment_text'].notna()].copy()

print(f"Posts available for sentiment analysis: {len(sentiment_df)}")
print(f"Percentage of total posts: {len(sentiment_df)/len(df)*100:.1f}%")

# Perform sentiment analysis
print("🎭 Performing sentiment analysis...")
print("   This may take a few minutes...")

# Apply sentiment analysis
sentiment_results = sentiment_df['sentiment_text'].apply(calculate_sentiment_metrics)

# Extract results into separate columns
sentiment_columns = ['vader_compound', 'vader_pos', 'vader_neu', 'vader_neg', 
                    'textblob_polarity', 'textblob_subjectivity', 'sentiment_category']

for col in sentiment_columns:
    sentiment_df[col] = sentiment_results.apply(lambda x: x[col])

# Filter out posts with sentiment analysis errors
sentiment_df = sentiment_df[sentiment_df['sentiment_category'] != 'Error'].copy()
sentiment_df = sentiment_df.dropna(subset=['vader_compound', 'textblob_polarity', 'textblob_subjectivity'])

print(f"✅ Sentiment analysis completed!")
print(f"📊 Final dataset: {len(sentiment_df)} posts with complete sentiment data")

# Show sentiment distribution
print("\n🎯 SENTIMENT DISTRIBUTION:")
sentiment_counts = sentiment_df['sentiment_category'].value_counts()
for category, count in sentiment_counts.items():
    percentage = count/len(sentiment_df)*100
    print(f"   {category}: {count:,} posts ({percentage:.1f}%)")

# Show statistics
print("\n📈 SENTIMENT STATISTICS:")
print(f"   VADER Compound: {sentiment_df['vader_compound'].mean():.3f} ± {sentiment_df['vader_compound'].std():.3f}")
print(f"   TextBlob Polarity: {sentiment_df['textblob_polarity'].mean():.3f} ± {sentiment_df['textblob_polarity'].std():.3f}")
print(f"   TextBlob Subjectivity: {sentiment_df['textblob_subjectivity'].mean():.3f} ± {sentiment_df['textblob_subjectivity'].std():.3f}")

sentiment_df.head()


🧹 Cleaning text for sentiment analysis...
Posts available for sentiment analysis: 1847
Percentage of total posts: 52.9%
🎭 Performing sentiment analysis...
   This may take a few minutes...


✅ Sentiment analysis completed!
📊 Final dataset: 1847 posts with complete sentiment data

🎯 SENTIMENT DISTRIBUTION:
   Positive: 1,190 posts (64.4%)
   Negative: 435 posts (23.6%)
   Neutral: 222 posts (12.0%)

📈 SENTIMENT STATISTICS:
   VADER Compound: 0.340 ± 0.655
   TextBlob Polarity: 0.177 ± 0.299
   TextBlob Subjectivity: 0.458 ± 0.248


Unnamed: 0,speaker,handle,platform,post_url,image_url,deleted_flag,content_text,content_links,date_parsed,is_retweet,is_media_only,sentiment_text,vader_compound,vader_pos,vader_neu,vader_neg,textblob_polarity,textblob_subjectivity,sentiment_category
2,Donald Trump,@realDonaldTrump,Truth Social,https://truthsocial.com/@realDonaldTrump/posts...,https://media-cdn.factba.se/realdonaldtrump-tr...,False,We have fulfilled so many of our promises…✅One...,,2025-07-18 21:39:00,False,False,We have fulfilled so many of our promises…✅One...,0.7717,0.132,0.868,0.0,0.298611,0.472222,Positive
3,Donald Trump,@realDonaldTrump,Truth Social,https://truthsocial.com/@realDonaldTrump/posts...,https://media-cdn.factba.se/realdonaldtrump-tr...,False,RT@realDonaldTrumpEverybody should watch Sean ...,[{'url': 'https://truthsocial.com/@realDonaldT...,2025-07-18 20:51:00,False,False,should watch Sean Hannity tonight. He really g...,0.0,0.0,1.0,0.0,0.25,0.2,Neutral
5,Donald Trump,@realDonaldTrump,Truth Social,https://truthsocial.com/@realDonaldTrump/posts...,https://media-cdn.factba.se/realdonaldtrump-tr...,False,I pledged that we would bring back American li...,,2025-07-18 19:53:00,False,False,I pledged that we would bring back American li...,0.8555,0.224,0.776,0.0,0.02,0.348095,Positive
6,Donald Trump,@realDonaldTrump,Truth Social,https://truthsocial.com/@realDonaldTrump/posts...,https://media-cdn.factba.se/realdonaldtrump-tr...,False,BREAKING NEWS: We have just filed a POWERHOUSE...,,2025-07-18 18:51:00,False,False,BREAKING NEWS: We have just filed a POWERHOUSE...,-0.6548,0.116,0.743,0.141,0.038889,0.575,Negative
7,Donald Trump,@realDonaldTrump,Truth Social,https://truthsocial.com/@realDonaldTrump/posts...,https://media-cdn.factba.se/realdonaldtrump-tr...,False,Congresswoman Monica De La Cruz is doing an in...,,2025-07-18 18:48:00,False,False,Congresswoman Monica De La Cruz is doing an in...,0.9909,0.358,0.606,0.036,0.319444,0.541307,Positive


In [5]:
# Create sentiment color mapping
print("🎨 Setting up 3D visualization...")

# Define colors for sentiment categories
sentiment_colors = {
    'Positive': '#2E8B57',    # Sea Green
    'Negative': '#DC143C',    # Crimson
    'Neutral': '#4169E1',     # Royal Blue
    'Unknown': '#808080'      # Gray
}

# Add color column
sentiment_df['color'] = sentiment_df['sentiment_category'].map(sentiment_colors)

print(f"📊 Visualization ready for {len(sentiment_df)} posts")
print("   Axes:")
print(f"     X: TextBlob Polarity (range: {sentiment_df['textblob_polarity'].min():.3f} to {sentiment_df['textblob_polarity'].max():.3f})")
print(f"     Y: TextBlob Subjectivity (range: {sentiment_df['textblob_subjectivity'].min():.3f} to {sentiment_df['textblob_subjectivity'].max():.3f})")
print(f"     Z: VADER Compound (range: {sentiment_df['vader_compound'].min():.3f} to {sentiment_df['vader_compound'].max():.3f})")


🎨 Setting up 3D visualization...
📊 Visualization ready for 1847 posts
   Axes:
     X: TextBlob Polarity (range: -1.000 to 1.000)
     Y: TextBlob Subjectivity (range: 0.000 to 1.000)
     Z: VADER Compound (range: -0.996 to 0.997)


In [6]:
# Create hover text with post content and sentiment information
print("📝 Creating hover text with sentiment details...")

def create_sentiment_hover_text(row, wrap_width=60):
    """Create rich hover text for each post with sentiment information"""
    # Truncate content if too long
    content = row['sentiment_text'] if pd.notna(row['sentiment_text']) else row['content_text']
    if pd.notna(content) and len(content) > 200:
        content = content[:200] + "..."
    
    # Wrap the content to avoid overflow in hover
    if pd.notna(content):
        wrapped_lines = textwrap.wrap(content, width=wrap_width)
        content_multiline = "<br>".join(wrapped_lines)
    else:
        content_multiline = "[No text content]"
    
    # Format date
    date_str = row['date_parsed'].strftime('%Y-%m-%d %H:%M')
    
    # Format sentiment scores
    polarity = row['textblob_polarity']
    subjectivity = row['textblob_subjectivity']
    vader_compound = row['vader_compound']
    sentiment_cat = row['sentiment_category']
    
    hover_text = (
        f"<b>Date:</b> {date_str}<br>"
        f"<b>Speaker:</b> {row['speaker']}<br>"
        f"<b>Sentiment:</b> {sentiment_cat}<br>"
        f"<b>Polarity:</b> {polarity:.3f} (TB)<br>"
        f"<b>Subjectivity:</b> {subjectivity:.3f} (TB)<br>"
        f"<b>Compound:</b> {vader_compound:.3f} (VADER)<br>"
        f"<b>Content:</b><br>{content_multiline}"
    )
    
    return hover_text

# Apply to dataframe
sentiment_df['hover_text'] = sentiment_df.apply(create_sentiment_hover_text, axis=1)

print(f"✅ Hover text created for {len(sentiment_df)} posts")
print("\n📋 Sample hover text:")
print(sentiment_df['hover_text'].iloc[0])


📝 Creating hover text with sentiment details...
✅ Hover text created for 1847 posts

📋 Sample hover text:
<b>Date:</b> 2025-07-18 21:39<br><b>Speaker:</b> Donald Trump<br><b>Sentiment:</b> Positive<br><b>Polarity:</b> 0.299 (TB)<br><b>Subjectivity:</b> 0.472 (TB)<br><b>Compound:</b> 0.772 (VADER)<br><b>Content:</b><br>We have fulfilled so many of our promises…✅One Big Beautiful<br>Bill✅Laken Riley Act✅TAKE IT DOWN Act✅Rescissions Act of<br>2025✅HALT Fentanyl Act✅GENIUS Act


In [7]:
# Create the 3D interactive scatter plot
print("🎨 Creating 3D interactive polarity visualization...")

# Create the main 3D scatter plot
fig = go.Figure()

# Add points for each sentiment category
for sentiment_cat in ['Positive', 'Negative', 'Neutral']:
    if sentiment_cat not in sentiment_df['sentiment_category'].values:
        continue
        
    mask = sentiment_df['sentiment_category'] == sentiment_cat
    data = sentiment_df[mask]
    
    if len(data) == 0:
        continue
    
    fig.add_trace(go.Scatter3d(
        x=data['textblob_polarity'],
        y=data['textblob_subjectivity'],
        z=data['vader_compound'],
        mode='markers',
        name=f"{sentiment_cat} ({len(data)} posts)",
        marker=dict(
            size=4,
            color=sentiment_colors[sentiment_cat],
            opacity=0.7,
            line=dict(width=0.5, color='darkgray')
        ),
        text=data['hover_text'],
        hovertemplate='%{text}<extra></extra>',  # Hide the trace name in hover
        hoverinfo='text'
    ))

# Update layout for better visualization
fig.update_layout(
    title=dict(
        text="🎭 3D Interactive Polarity Analysis - Truth Social Posts<br><sub>Hover over points to see sentiment details | X=Polarity, Y=Subjectivity, Z=VADER Compound</sub>",
        x=0.5,
        font=dict(size=20)
    ),
    scene=dict(
        xaxis_title="TextBlob Polarity (-1=Negative, +1=Positive)",
        yaxis_title="TextBlob Subjectivity (0=Objective, 1=Subjective)",
        zaxis_title="VADER Compound (-1=Negative, +1=Positive)",
        camera=dict(
            eye=dict(x=1.3, y=1.3, z=1.3)  # Good initial viewing angle
        ),
        bgcolor="rgba(240,240,240,0.1)",
        xaxis=dict(
            gridcolor="lightgray", 
            gridwidth=1,
            range=[-1, 1],  # Standard polarity range
            zeroline=True,
            zerolinewidth=2,
            zerolinecolor="black"
        ),
        yaxis=dict(
            gridcolor="lightgray", 
            gridwidth=1,
            range=[0, 1]   # Standard subjectivity range
        ),
        zaxis=dict(
            gridcolor="lightgray", 
            gridwidth=1,
            range=[-1, 1], # Standard VADER range
            zeroline=True,
            zerolinewidth=2,
            zerolinecolor="black"
        )
    ),
    width=1400,
    height=900,
    font=dict(size=12),
    legend=dict(
        x=0.02,
        y=0.98,
        bgcolor="rgba(255,255,255,0.9)",
        bordercolor="gray",
        borderwidth=1,
        font=dict(size=11)
    )
)

print("✅ 3D polarity scatter plot created!")
print(f"   📊 {len(sentiment_df)} posts plotted across 3 sentiment dimensions")
print("   🖱️  Interactive features:")
print("      - Hover over points to see sentiment details and post content")
print("      - Click and drag to rotate the plot")
print("      - Scroll to zoom in/out")
print("      - Click legend items to show/hide sentiment categories")
print("      - Zero lines show neutral sentiment boundaries")

# Display the plot
fig.show()


🎨 Creating 3D interactive polarity visualization...
✅ 3D polarity scatter plot created!
   📊 1847 posts plotted across 3 sentiment dimensions
   🖱️  Interactive features:
      - Hover over points to see sentiment details and post content
      - Click and drag to rotate the plot
      - Scroll to zoom in/out
      - Click legend items to show/hide sentiment categories
      - Zero lines show neutral sentiment boundaries


In [8]:
# Save the interactive plot as HTML file
print("💾 Saving interactive polarity visualization...")

# Save 3D plot
fig.write_html("../images/3d_polarity_analysis_interactive.html")
print("✅ 3D interactive polarity plot saved to '../images/3d_polarity_analysis_interactive.html'")

# Save the sentiment analysis dataset
sentiment_export = sentiment_df[[
    'speaker', 'handle', 'content_text', 'sentiment_text', 'date_parsed',
    'vader_compound', 'vader_pos', 'vader_neu', 'vader_neg',
    'textblob_polarity', 'textblob_subjectivity', 'sentiment_category'
]].copy()

sentiment_export.to_csv('../data/truth_social_posts_with_sentiment.csv', index=False)
print("✅ Sentiment analysis dataset saved to '../data/truth_social_posts_with_sentiment.csv'")

# Create sentiment analysis summary
sentiment_summary = {
    'analysis_info': {
        'total_posts_analyzed': len(sentiment_df),
        'analysis_date': pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S'),
        'sentiment_tools': ['VADER', 'TextBlob'],
        'date_range': {
            'start': sentiment_df['date_parsed'].min().strftime('%Y-%m-%d'),
            'end': sentiment_df['date_parsed'].max().strftime('%Y-%m-%d')
        }
    },
    'sentiment_distribution': {
        cat: {
            'count': int(count),
            'percentage': float(count/len(sentiment_df)*100)
        } for cat, count in sentiment_counts.items()
    },
    'sentiment_statistics': {
        'vader_compound': {
            'mean': float(sentiment_df['vader_compound'].mean()),
            'std': float(sentiment_df['vader_compound'].std()),
            'min': float(sentiment_df['vader_compound'].min()),
            'max': float(sentiment_df['vader_compound'].max())
        },
        'textblob_polarity': {
            'mean': float(sentiment_df['textblob_polarity'].mean()),
            'std': float(sentiment_df['textblob_polarity'].std()),
            'min': float(sentiment_df['textblob_polarity'].min()),
            'max': float(sentiment_df['textblob_polarity'].max())
        },
        'textblob_subjectivity': {
            'mean': float(sentiment_df['textblob_subjectivity'].mean()),
            'std': float(sentiment_df['textblob_subjectivity'].std()),
            'min': float(sentiment_df['textblob_subjectivity'].min()),
            'max': float(sentiment_df['textblob_subjectivity'].max())
        }
    }
}

# Save summary
with open('../data/sentiment_analysis_summary.json', 'w') as f:
    json.dump(sentiment_summary, f, indent=2)
    
print("✅ Sentiment analysis summary saved to '../data/sentiment_analysis_summary.json'")

print("\n🎉 3D Interactive Polarity Analysis Complete!")
print("📁 Files created:")
print("   📊 3d_polarity_analysis_interactive.html - Interactive 3D visualization")
print("   📄 truth_social_posts_with_sentiment.csv - Data with sentiment analysis")
print("   📋 sentiment_analysis_summary.json - Analysis summary statistics")
print("\n🖱️  Open the HTML file in your browser to interact with the visualization!")
print("\n💡 INTERPRETATION GUIDE:")
print("   🟢 Green (Positive): VADER compound ≥ 0.05")
print("   🔴 Red (Negative): VADER compound ≤ -0.05")
print("   🔵 Blue (Neutral): -0.05 < VADER compound < 0.05")
print("   📊 X-axis: TextBlob polarity (emotional tone)")
print("   📊 Y-axis: TextBlob subjectivity (objective vs personal opinion)")
print("   📊 Z-axis: VADER compound (overall sentiment strength)")


💾 Saving interactive polarity visualization...
✅ 3D interactive polarity plot saved to '../images/3d_polarity_analysis_interactive.html'


✅ Sentiment analysis dataset saved to '../data/truth_social_posts_with_sentiment.csv'
✅ Sentiment analysis summary saved to '../data/sentiment_analysis_summary.json'

🎉 3D Interactive Polarity Analysis Complete!
📁 Files created:
   📊 3d_polarity_analysis_interactive.html - Interactive 3D visualization
   📄 truth_social_posts_with_sentiment.csv - Data with sentiment analysis
   📋 sentiment_analysis_summary.json - Analysis summary statistics

🖱️  Open the HTML file in your browser to interact with the visualization!

💡 INTERPRETATION GUIDE:
   🟢 Green (Positive): VADER compound ≥ 0.05
   🔴 Red (Negative): VADER compound ≤ -0.05
   🔵 Blue (Neutral): -0.05 < VADER compound < 0.05
   📊 X-axis: TextBlob polarity (emotional tone)
   📊 Y-axis: TextBlob subjectivity (objective vs personal opinion)
   📊 Z-axis: VADER compound (overall sentiment strength)
