In [9]:
import pandas as pd
import matplotlib.pyplot as plt
import os
from datetime import datetime
import numpy as np

In [10]:
# Define events with their dates, colors, and vertical text positions
events = {
    "2020-03-17": ("First ECQ ongoing; PH under 'state of calamity'", "#FF1493", 1.0),
    "2020-03-25": ("Bayanihan Act signed", "#FF0505", 1.1),
    "2020-04-07": ("ECQ in MM extended to Apr 30", "#FF1493", 1.0),
    "2020-04-24": ("ECQ in MM extended to May 15", "#FF1493", 1.1),
    "2020-05-12": ("IATF puts MECQ in MM", "#FF1493", 1.0),
    "2020-05-26": ("MM mayors agreed GCQ", "#FF1493", 1.1),
    "2020-06-01": ("GCQ starts in MM", "#FF1493", 1.0),
    "2020-08-02": ("100k cases surpassed in PH", "#000000", 1.0),
    "2020-08-04": ("MECQ imposed again in MM", "#FF1493", 1.0),
    "2020-08-19": ("PhilHealth scandal", "#FF0505", 1.1),
    "2020-09-18": ("Bayanihan 2 Act signed", "#FF0505", 1.0),
    "2020-09-28": ("All PH provinces infected", "#000000", 1.1),
    "2020-12-19": ("Alpha in UK", "#000000", 1.0),
    "2021-01-09": ("DOH monitors Beta/Delta overseas", "#000000", 1.0),
    "2021-01-13": ("Alpha in PH", "#000000", 2.0),
    "2021-01-14": ("Pfizer approved", "#FFD700", 3.0),
    "2021-01-28": ("AstraZeneca approved", "#FFD700", 1.0),
    "2021-03-02": ("Beta in Pasay", "#000000", 1.1),
    "2021-03-12": ("Gamma in PH", "#000000", 1.0)
}

In [11]:
def get_formatted_metric_name(metric_type):
    """Get properly formatted metric name"""
    if metric_type == "clustering_coefficient":
        return "Clustering Coefficient"
    else:
        return "Network Density"

In [12]:
def add_event_lines(ax, start_date, end_date, ylim):
    """Add vertical lines for events with annotations"""
    # Convert dates to datetime for comparison
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)
    
    # Filter events within the date range
    filtered_events = {}
    for date, (label, color, _) in events.items():
        if start_date <= pd.to_datetime(date) <= end_date:
            filtered_events[date] = (label, color)
    
    # Sort events by date for better text placement
    sorted_events = dict(sorted(filtered_events.items()))
    
    # Calculate text positions to avoid overlap
    text_positions = {}
    current_pos = 1.0
    last_date = None
    date_threshold = pd.Timedelta(days=5)
    
    for date in sorted_events.keys():
        if last_date is not None:
            if pd.to_datetime(date) - pd.to_datetime(last_date) < date_threshold:
                current_pos += 0.2
            else:
                current_pos = 1.0
        text_positions[date] = current_pos
        last_date = date
    
    # Add event lines and annotations
    for date, (label, color) in sorted_events.items():
        event_date = pd.to_datetime(date)
        ax.axvline(x=event_date, color=color, linestyle='--', linewidth=2.5, alpha=0.8, zorder=5)
        
        # Calculate y position for text to avoid overlap
        text_y = ylim[1] + (ylim[1] - ylim[0]) * 0.05 * text_positions[date]

        # Special handling for overlapping text
        if label == "Pfizer approved":
            x_offset = 14 
        elif label == "MECQ imposed again in MM":
            x_offset = 14
        else:
            x_offset = 5  # Default x-offset for other labels
        
        # Add text with rotation and offset
        ax.annotate(label, 
                   xy=(event_date, ylim[1]),
                   xytext=(x_offset, 10),
                   textcoords='offset points',
                   rotation=45,
                   ha='left',
                   va='bottom',
                   fontsize=8,
                   zorder=6)

In [13]:
def plot_individual_graph(file_path, metric_type):
    """Create individual graph for a single file"""
    # Read data
    df = pd.read_csv(file_path)
    df['date'] = pd.to_datetime(df['date'])
    
    # Extract file information from path
    file_name = os.path.basename(file_path)
    parts = file_name.split('_')
    data_type = parts[1].upper()  # MSV or RSV
    threshold = parts[2]   # threshold value
    days_filename = parts[3].split('.')[0]  # keep as "15day" for filename
    days_display = days_filename.replace("day", " days")  # "15 days" for display
    
    # Get formatted metric name
    metric_name = get_formatted_metric_name(metric_type)
    
    # Create plot
    plt.figure(figsize=(15, 8))
    plt.plot(df['date'], df[df.columns[1]], linewidth=2)
    
    # Add event lines
    ylim = plt.ylim()
    add_event_lines(plt.gca(), df['date'].iloc[0], df['date'].iloc[-1], ylim)
    
    # Customize plot
    plt.grid(True, alpha=0.3)
    plt.xlabel('Date')
    plt.ylabel(f'{metric_name} Value')
    
    # Move title to bottom with space in days
    plt.figtext(0.5, 0.02, 
                f'{metric_name} ({data_type}) – {threshold} Threshold ({days_display})',
                ha='center', 
                va='center', 
                fontsize=12)
    
    # Adjust layout
    plt.tight_layout()
    plt.subplots_adjust(bottom=0.10)
    
    # Save plot with original filename format (without space)
    plt.savefig(f'{metric_type}_{data_type.lower()}_{threshold}_{days_filename}.png')
    plt.close()

In [14]:
def plot_merged_graph(files, metric_type, data_type, days):
    """Create merged graph for multiple thresholds"""
    plt.figure(figsize=(15, 8))
    
    # Color map for thresholds
    colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']
    
    # Format days strings
    days_filename = days  # keep as "15day" for filename
    days_display = days.replace("day", " days")  # "15 days" for display
    
    # Get formatted metric name
    metric_name = get_formatted_metric_name(metric_type)
    
    # Plot each threshold
    for i, file_path in enumerate(sorted(files)):
        df = pd.read_csv(file_path)
        df['date'] = pd.to_datetime(df['date'])
        
        # Extract threshold from filename
        threshold = os.path.basename(file_path).split('_')[2]
        
        plt.plot(df['date'], df[df.columns[1]], 
                label=f'Threshold {threshold}', 
                linewidth=2,
                color=colors[i])
    
    # Add event lines
    ylim = plt.ylim()
    add_event_lines(plt.gca(), df['date'].iloc[0], df['date'].iloc[-1], ylim)
    
    # Customize plot
    plt.grid(True, alpha=0.3)
    plt.xlabel('Date')
    plt.ylabel(f'{metric_name} Value')
    plt.legend(bbox_to_anchor=(0.89, 1), loc='upper left')
    
    # Move title to bottom with space in days
    plt.figtext(0.5, 0.02, 
                f'{metric_name} ({data_type.upper()}) – All Thresholds ({days_display})',
                ha='center', 
                va='center', 
                fontsize=12)
    
    # Adjust layout
    plt.tight_layout()
    plt.subplots_adjust(bottom=0.10)
    
    # Save plot with original filename format (without space)
    plt.savefig(f'{metric_type}_{data_type}_merged_{days_filename}.png')
    plt.close()

In [15]:
# Run the visualizations
def generate_all_visualizations():
    # Define base directories
    cluscoeff_dir = "../../gt_netdense_cluscoeff/gt_cluscoeff_rsvmsv_15or30day"
    netdense_dir = "../../gt_netdense_cluscoeff/gt_netdense_rsvmsv_15or30day"
    
    # Generate individual graphs
    for directory, metric in [(cluscoeff_dir, "clustering_coefficient"), 
                             (netdense_dir, "network_density")]:
        for file in os.listdir(directory):
            if file.endswith(".csv"):
                plot_individual_graph(os.path.join(directory, file), metric)
    
    # Generate merged graphs
    for directory, metric in [(cluscoeff_dir, "clustering_coefficient"), 
                             (netdense_dir, "network_density")]:
        for data_type in ['msv', 'rsv']:
            for days in ['15day', '30day']:
                # Get all files for this combination
                files = [
                    os.path.join(directory, f) for f in os.listdir(directory)
                    if f'{data_type}' in f and f'{days}' in f
                ]
                if files:
                    plot_merged_graph(files, metric, data_type, days)

In [16]:
# Execute the visualization generation
generate_all_visualizations()