# Twitter Climate Sentiment - Visualizations

This notebook generates visualizations from the processed tweet data:
1. Geographic heatmap of negative sentiment
2. Sentiment by weather event type
3. Top negative locations with weather metrics
4. Daily sentiment trend

In [None]:
# Imports
import pandas as pd
import folium
from folium.plugins import HeatMap
import plotly.express as px
import plotly.graph_objects as go
from pyhive import hive
import os

print('Libraries loaded successfully')

In [None]:
# Connect to Hive
conn = hive.Connection(
    host='localhost',
    port=10000,
    database='default'
)

print('Connected to Hive')

## 1. Geographic Heatmap of Negative Sentiment

In [None]:
# Query negative sentiment tweets with geolocation
query = """
SELECT 
    geo_lat,
    geo_lon,
    sentiment_score,
    text_clean,
    weather_event_type
FROM final_analytics
WHERE 
    sentiment_label = 'negative'
    AND geo_lat IS NOT NULL
    AND geo_lon IS NOT NULL
LIMIT 1000
"""

df_negative = pd.read_sql(query, conn)
print(f'Loaded {len(df_negative)} negative sentiment tweets')
df_negative.head()

In [None]:
# Create heatmap
m = folium.Map(location=[39.8, -98.5], zoom_start=4)

# Prepare heatmap data: [lat, lon, weight]
heat_data = [
    [row['geo_lat'], row['geo_lon'], abs(row['sentiment_score'])]
    for _, row in df_negative.iterrows()
]

# Add heatmap layer
HeatMap(
    heat_data,
    radius=15,
    blur=25,
    max_zoom=13,
    gradient={0.4: 'blue', 0.6: 'yellow', 0.8: 'orange', 1.0: 'red'}
).add_to(m)

# Save
output_path = '../visualizations/negative_sentiment_heatmap.html'
os.makedirs(os.path.dirname(output_path), exist_ok=True)
m.save(output_path)

print(f'Heatmap saved to {output_path}')
m

## 2. Sentiment by Weather Event Type

In [None]:
# Query sentiment by event type
query = """
SELECT 
    weather_event_type,
    COUNT(*) as tweet_count,
    AVG(sentiment_score) as avg_sentiment
FROM final_analytics
WHERE weather_event_type IS NOT NULL
GROUP BY weather_event_type
ORDER BY avg_sentiment
"""

df_events = pd.read_sql(query, conn)
print(f'Loaded {len(df_events)} event types')
df_events

In [None]:
# Create bar chart
fig = px.bar(
    df_events,
    x='weather_event_type',
    y='avg_sentiment',
    color='avg_sentiment',
    color_continuous_scale=['red', 'yellow', 'green'],
    labels={
        'avg_sentiment': 'Average Sentiment Score',
        'weather_event_type': 'Weather Event Type'
    },
    title='Average Sentiment by Weather Event Type',
    text='tweet_count'
)

fig.update_traces(textposition='outside')
fig.update_layout(height=600, showlegend=False)

# Save
fig.write_html('../visualizations/sentiment_by_event.html')
fig.write_image('../visualizations/sentiment_by_event.png', width=1200, height=800)

print('Chart saved')
fig.show()

## 3. Top Negative Locations

In [None]:
# Query top negative locations
query = """
SELECT 
    ROUND(geo_lat, 2) as lat,
    ROUND(geo_lon, 2) as lon,
    place_name,
    COUNT(*) as negative_count,
    AVG(sentiment_score) as avg_sentiment,
    AVG(weather_temp_c) as avg_temp,
    AVG(weather_precip_mm) as avg_precip,
    AVG(weather_wind_speed_ms) as avg_wind,
    MAX(weather_event_type) as primary_event
FROM final_analytics
WHERE sentiment_label = 'negative'
GROUP BY ROUND(geo_lat, 2), ROUND(geo_lon, 2), place_name
ORDER BY negative_count DESC
LIMIT 10
"""

df_top_negative = pd.read_sql(query, conn)
print(f'Top {len(df_top_negative)} negative locations')

# Style the table
styled = df_top_negative.style.background_gradient(
    subset=['avg_sentiment'],
    cmap='RdYlGn'
).format({
    'avg_sentiment': '{:.3f}',
    'avg_temp': '{:.1f}Â°C',
    'avg_precip': '{:.1f}mm',
    'avg_wind': '{:.1f}m/s'
})

# Save
styled.to_html('../visualizations/top_negative_locations.html')
print('Table saved')

df_top_negative

## 4. Daily Sentiment Trend

In [None]:
# Query daily sentiment
query = """
SELECT 
    processing_date,
    AVG(sentiment_score) as avg_sentiment,
    COUNT(*) as tweet_count
FROM final_analytics
GROUP BY processing_date
ORDER BY processing_date
"""

df_daily = pd.read_sql(query, conn)
print(f'Loaded {len(df_daily)} days of data')
df_daily

In [None]:
# Create line chart
fig = px.line(
    df_daily,
    x='processing_date',
    y='avg_sentiment',
    title='Daily Average Sentiment Trend',
    labels={
        'avg_sentiment': 'Average Sentiment Score',
        'processing_date': 'Date'
    },
    markers=True
)

# Add neutral line
fig.add_hline(
    y=0,
    line_dash='dash',
    line_color='gray',
    annotation_text='Neutral'
)

fig.update_layout(height=500)

# Save
fig.write_html('../visualizations/sentiment_time_series.html')
print('Time series saved')

fig.show()

In [None]:
# Close connection
conn.close()
print('\nAll visualizations generated successfully!')
print('Output directory: ../visualizations/')