In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from pathlib import Path
from matplotlib.dates import DateFormatter
from matplotlib.colors import LogNorm

# Configuration
DATA_PATH = Path('..') / 'data' / 'processed_global_data.csv'
IMAGES_DIR = Path('../images')
IMAGES_DIR.mkdir(parents=True, exist_ok=True)

plt.style.use('ggplot')
sns.set_palette("husl")
plt.rcParams['figure.dpi'] = 300
plt.rcParams['axes.prop_cycle'] = plt.cycler(color=plt.cm.tab10.colors)

# Helper function
def save_viz(filename):
    """Save visualization with consistent settings"""
    path = IMAGES_DIR / filename
    plt.savefig(path, bbox_inches='tight', facecolor='white')
    plt.close()
    print(f"Saved: {path}")

# Load processed data
try:
    df = pd.read_csv(DATA_PATH, parse_dates=['Date'])
    print("Data loaded successfully!")
    print(f"Data range: {df['Date'].min().date()} to {df['Date'].max().date()}")
    
except FileNotFoundError:
    print("Processed data not found. Run data processing script first!")
    exit()

# Global Analysis
global_daily = df.groupby('Date').agg({
    'Confirmed': 'sum',
    'Deaths': 'sum',
    'Recovered': 'sum'
}).reset_index()

# 1. Enhanced Global Trends
plt.figure(figsize=(16, 8))
ax = plt.gca()
for col in ['Confirmed', 'Deaths', 'Recovered']:
    plt.plot(global_daily['Date'], global_daily[col], label=col, linewidth=2)
    
ax.xaxis.set_major_formatter(DateFormatter("%b %Y"))
plt.title('Global COVID-19 Spread (Log Scale)', fontsize=14)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Cases (Log Scale)', fontsize=12)
plt.yscale('log')
plt.legend()
save_viz('1_global_trends_log.png')

# 2. Daily New Cases Heatmap
df['New Cases'] = df.groupby('Country/Region')['Confirmed'].diff().fillna(0)

plt.figure(figsize=(18, 10))
pivot = df.pivot_table(index='Country/Region', columns='Date', values='New Cases', aggfunc='sum')
top_countries = pivot.sum(axis=1).sort_values(ascending=False).index[:15]
sns.heatmap(pivot.loc[top_countries], cmap='YlOrRd', norm=LogNorm())
plt.title('Daily New Cases Heatmap (Top 15 Countries)', fontsize=14)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Country', fontsize=12)
save_viz('2_daily_cases_heatmap.png')

# 3. Mortality vs Recovery Rate
country_data = df.groupby('Country/Region')[['Confirmed', 'Deaths', 'Recovered']].max()
country_data = country_data[country_data['Confirmed'] > 10000]
country_data['Mortality Rate'] = (country_data['Deaths'] / country_data['Confirmed']) * 100
country_data['Recovery Rate'] = (country_data['Recovered'] / country_data['Confirmed']) * 100

plt.figure(figsize=(12, 12))
sc = sns.scatterplot(data=country_data,
                    x='Mortality Rate',
                    y='Recovery Rate',
                    size='Confirmed',
                    hue='Confirmed',
                    palette='viridis',
                    sizes=(50, 500),
                    alpha=0.7)

plt.title('Mortality Rate vs Recovery Rate (Countries >10k Cases)', fontsize=14)
plt.xlabel('Mortality Rate (%)', fontsize=12)
plt.ylabel('Recovery Rate (%)', fontsize=12)
plt.axhline(50, color='grey', linestyle='--', linewidth=0.8)
plt.axvline(5, color='grey', linestyle='--', linewidth=0.8)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
save_viz('3_mortality_recovery_scatter.png')

# 4. Growth Factor Analysis
global_daily['7d_MA'] = global_daily['Confirmed'].rolling(7).mean()
global_daily['Growth Factor'] = global_daily['7d_MA'].pct_change()

plt.figure(figsize=(16, 8))
plt.plot(global_daily['Date'], global_daily['Growth Factor'], 
        color='#2ecc71', linewidth=2)
plt.fill_between(global_daily['Date'], global_daily['Growth Factor'], 
                alpha=0.2, color='#2ecc71')
plt.axhline(1.0, color='#e74c3c', linestyle='--', label='Critical Threshold (1.0)')
plt.title('7-Day Moving Average Growth Factor', fontsize=14)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Growth Factor', fontsize=12)
plt.legend()
save_viz('4_growth_factor.png')

# 5. Interactive Plotly Visualization
fig = px.line(global_daily, 
             x='Date', 
             y=['Confirmed', 'Deaths', 'Recovered'],
             title='Interactive Global COVID-19 Trends',
             labels={'value': 'Cases', 'variable': 'Metric'},
             template='plotly_white')

fig.update_layout(
    hovermode="x unified",
    legend_title_text='Metric',
    xaxis_title="Date",
    yaxis_title="Cases",
    yaxis_type="log"
)
fig.write_html(IMAGES_DIR / "5_interactive_plot.html")
print(f"Saved: {IMAGES_DIR/'5_interactive_plot.html'}")

# 6. Normalized Country Comparison
pop_data = {
    'Country/Region': ['US', 'India', 'Brazil', 'Russia', 'UK', 'France', 'Germany'],
    'Population': [331_000_000, 1_393_000_000, 213_000_000, 
                  144_000_000, 67_000_000, 65_000_000, 83_000_000]
}
pop_df = pd.DataFrame(pop_data)

merged = country_data.reset_index().merge(pop_df, on='Country/Region')
merged['Cases_per_100k'] = (merged['Confirmed'] / merged['Population']) * 1e5

plt.figure(figsize=(14, 8))
sns.barplot(x='Cases_per_100k', y='Country/Region', data=merged,
           palette='rocket_r')
plt.title('Confirmed Cases per 100k Population', fontsize=14)
plt.xlabel('Cases per 100k', fontsize=12)
plt.ylabel('Country', fontsize=12)
save_viz('6_cases_per_capita.png')

# 7. Time Series Comparison (Enhanced)
countries = ['US', 'India', 'Brazil', 'Russia', 'UK']
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']

plt.figure(figsize=(16, 8))
for country, color in zip(countries, colors):
    country_df = df[df['Country/Region'] == country]
    plt.plot(country_df['Date'], country_df['Confirmed'], 
            label=country, color=color, linewidth=2)
    plt.fill_between(country_df['Date'], country_df['Confirmed'],
                   alpha=0.1, color=color)

plt.title('Country Comparison: Cumulative Cases', fontsize=14)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Confirmed Cases', fontsize=12)
plt.yscale('log')
plt.legend()
save_viz('7_country_comparison.png')

print("\nAll visualizations generated successfully!")