# Week 2: Sentiment Analysis & Integration

This notebook performs sentiment analysis on tweets and integrates the results with CTA ridership and 311 complaint data.


In [None]:
# Import necessary libraries
import sys
sys.path.append('../src')

import pandas as pd
import numpy as np
from sentiment.sentiment_analyzer import analyze_tweets, SentimentAnalyzer
from sentiment.aggregate_sentiment import aggregate_by_day
from sentiment.integrate_data import integrate_all_data


## 1. Sentiment Analysis

Analyze sentiment of tweets using VADER and TextBlob.


In [None]:
# Load cleaned tweet data
df_tweets = pd.read_csv('../data/cleaned/tweets.csv')
print(f"Loaded {len(df_tweets)} tweets")

# Analyze sentiment
df_tweets_analyzed = analyze_tweets(df_tweets)

# Save results
df_tweets_analyzed.to_csv('../data/cleaned/tweets_with_sentiment.csv', index=False)
print("\nSentiment analysis complete!")


## 2. Sentiment Aggregation

Aggregate tweets by day and sentiment category.


In [None]:
# Aggregate by day
daily_sentiment = aggregate_by_day(df_tweets_analyzed)

# Display results
print("Daily Sentiment Summary:")
print(daily_sentiment.head(10))

# Save aggregated data
daily_sentiment.to_csv('../data/combined/daily_sentiment.csv', index=False)


## 3. Data Integration

Merge sentiment data with CTA ridership and 311 complaints.


In [None]:
# Integrate all datasets
combined_df = integrate_all_data()

# Display results
print("Combined Data Summary:")
print(f"Shape: {combined_df.shape}")
print(f"\nColumns: {list(combined_df.columns)}")
print(f"\nFirst few rows:")
print(combined_df.head())

# Save combined data
combined_df.to_csv('../data/combined/combined_data.csv', index=False)
print("\nData integration complete!")
