# Week 3: Correlation & Visualization

This notebook performs correlation analysis and creates exploratory visualizations.


In [None]:
# Import necessary libraries
import sys
sys.path.append('../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from visualization.correlation_analysis import calculate_correlations, generate_correlation_report
from visualization.visualizations import (
    plot_sentiment_vs_ridership,
    plot_complaint_sentiment_heatmap,
    plot_time_series,
    plot_sentiment_distribution,
    create_correlation_matrix
)


## 1. Load Combined Data


In [None]:
# Load combined data
df = pd.read_csv('../data/combined/combined_data.csv')
df['date'] = pd.to_datetime(df['date'])

print(f"Loaded {len(df)} records")
print(f"Date range: {df['date'].min()} to {df['date'].max()}")
print(f"\nColumns: {list(df.columns)}")
df.head()


## 2. Correlation Analysis


In [None]:
# Calculate correlations
correlation_results = calculate_correlations(df)

# Generate and display report
report = generate_correlation_report(correlation_results)
print(report)


## 3. Create Visualizations


In [None]:
# Create correlation matrix
corr_matrix = create_correlation_matrix(df)
print("Correlation Matrix:")
print(corr_matrix)


In [None]:
# Plot sentiment vs. ridership
fig1 = plot_sentiment_vs_ridership(df)
fig1.show()


In [None]:
# Plot time series
fig2 = plot_time_series(df)
fig2.show()


In [None]:
# Plot complaint heatmap
fig3 = plot_complaint_sentiment_heatmap(df)
fig3.show()
