In [25]:
import pandas as pd
import matplotlib.pyplot as plt
import os
from datetime import datetime
import numpy as np

In [26]:
VIBRANT_COLORS = {
    '0.4': '#00C9A7',  # Hot pink
    '0.5': '#8338EC',  # Bright purple
    '0.6': '#3A86FF',  # Bright blue
    '0.8': '#FB5607'   # Bright orange
}

In [27]:
# Define events with their dates, colors, and vertical text positions
events = {
    "2020-03-17": ("First ECQ ongoing; PH under 'state of calamity'", "#FF1493", 1.0),
    "2020-03-25": ("Bayanihan Act signed", "#FF0505", 1.1),
    "2020-04-07": ("ECQ in MM extended to Apr 30", "#FF1493", 1.0),
    "2020-04-24": ("ECQ in MM extended to May 15", "#FF1493", 1.1),
    "2020-05-12": ("IATF puts MECQ in MM", "#FF1493", 1.0),
    "2020-05-26": ("MM mayors agreed GCQ", "#FF1493", 1.1),
    "2020-06-01": ("GCQ starts in MM", "#FF1493", 1.0),
    "2020-08-02": ("100k cases surpassed in PH", "#000000", 1.0),
    "2020-08-04": ("MECQ imposed again in MM", "#FF1493", 1.0),
    "2020-08-19": ("PhilHealth scandal", "#FF0505", 1.1),
    "2020-09-18": ("Bayanihan 2 Act signed", "#FF0505", 1.0),
    "2020-09-28": ("All PH provinces infected", "#000000", 1.1),
    "2020-12-19": ("Alpha in UK", "#000000", 1.0),
    "2021-01-09": ("DOH monitors Beta/Delta overseas", "#000000", 1.0),
    "2021-01-13": ("Alpha in PH", "#000000", 2.0),
    "2021-01-14": ("Pfizer approved", "#FFD700", 3.0),
    "2021-01-28": ("AstraZeneca approved", "#FFD700", 1.0),
    "2021-03-02": ("Beta in Pasay", "#000000", 1.1),
    "2021-03-12": ("Gamma in PH", "#000000", 1.0)
}

In [28]:
def add_event_lines(ax, start_date, end_date, ylim):
    """Add vertical lines for events with annotations"""
    # Convert dates to datetime for comparison
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)
    
    # Filter events within the date range
    filtered_events = {}
    for date, (label, color, pos) in events.items():
        if start_date <= pd.to_datetime(date) <= end_date:
            filtered_events[date] = (label, color, pos)
    
    # Sort events by date for better text placement
    sorted_events = dict(sorted(filtered_events.items()))
    
    # Add event lines and annotations
    for date, (label, color, pos) in sorted_events.items():
        event_date = pd.to_datetime(date)
        ax.axvline(x=event_date, color=color, linestyle='--', linewidth=2.5, alpha=0.8, zorder=5)
        
      # Special handling for overlapping text
        if label == "Pfizer approved":
            x_offset = 14 
        elif label == "MECQ imposed again in MM":
            x_offset = 14
        else:
            x_offset = 5  # Default x-offset for other labels
        
        # Add text with rotation and offset
        ax.annotate(label, 
                   xy=(event_date, ylim[1]),
                   xytext=(x_offset, 10),
                   textcoords='offset points',
                   rotation=45,
                   ha='left',
                   va='bottom',
                   fontsize=8,
                   zorder=6)

In [29]:
def plot_individual_metric(file_path, metric_type):
    """Create individual plot for a single file"""
    # Read data
    df = pd.read_csv(file_path)
    df['date'] = pd.to_datetime(df['date'])
    
    # Extract file information
    file_name = os.path.basename(file_path)
    parts = file_name.split('_')
    group_name = parts[1]
    threshold = parts[2].replace('threshold', '')
    # Clean up the days string properly
    days = parts[3].split('.')[0].replace('day', ' days')
    
    # Create plot
    plt.figure(figsize=(15, 8))
    plt.plot(df['date'], df.iloc[:, 1], 
             linewidth=2, 
             color=VIBRANT_COLORS[threshold])
    
    # Add event lines
    ylim = plt.ylim()
    add_event_lines(plt.gca(), df['date'].iloc[0], df['date'].iloc[-1], ylim)
    
    # Customize plot
    plt.grid(True, alpha=0.3)
    plt.xlabel('Date')
    
    metric_display = "Network Density" if metric_type == "netdense" else "Clustering Coefficient"
    plt.ylabel(f'{metric_display} Value')
    
    # Title at bottom with proper formatting
    plt.figtext(0.5, 0.02, 
                f'{metric_display} - {group_name} - {threshold} Threshold ({days})',
                ha='center',
                va='center',
                fontsize=12)
    
    # Adjust layout
    plt.tight_layout()
    plt.subplots_adjust(bottom=0.10)
    
    # Save plot in current directory with clean filename
    save_name = f'individual_{metric_type}_{group_name}_{threshold}_{days.replace(" ", "")}.png'
    plt.savefig(save_name)
    plt.close()

In [30]:
def plot_group_metrics(group_folder, days_period):
    """Create merged plots for a group's metrics"""
    # Get the group name from the folder name
    group_name = os.path.basename(group_folder).split('_', 1)[1]
    
    # Define metrics to plot
    metrics = ['netdense', 'cluscoeff']
    
    for metric in metrics:
        # Find all relevant files for this metric and day period
        files = []
        for file in os.listdir(group_folder):
            if (file.startswith(metric) and 
                f'{days_period}day' in file and 
                file.endswith('.csv')):
                files.append(os.path.join(group_folder, file))
                # Also create individual plot
                plot_individual_metric(os.path.join(group_folder, file), metric)
        
        if not files:
            continue
            
        # Create the merged plot
        plt.figure(figsize=(15, 8))
        
        # Plot each threshold
        for file in sorted(files):
            df = pd.read_csv(file)
            df['date'] = pd.to_datetime(df['date'])
            
            # Extract threshold from filename
            threshold = file.split('threshold')[0].split('_')[-1]
            
            plt.plot(df['date'], df.iloc[:, 1], 
                    label=f'Threshold {threshold}',
                    linewidth=2,
                    color=VIBRANT_COLORS[threshold])
        
        # Add event lines
        ylim = plt.ylim()
        add_event_lines(plt.gca(), df['date'].iloc[0], df['date'].iloc[-1], ylim)
        
        # Customize plot
        plt.grid(True, alpha=0.3)
        plt.xlabel('Date')
        
        metric_name = "Network Density" if metric == "netdense" else "Clustering Coefficient"
        plt.ylabel(f'{metric_name} Value')
        
        plt.legend(bbox_to_anchor=(0.89, 1), loc='upper left')
        
        # Title at bottom (keeping original group name format)
        plt.figtext(0.5, 0.02, 
                   f'{metric_name} - {group_name} - All Thresholds ({days_period} days)',
                   ha='center',
                   va='center',
                   fontsize=12)
        
        # Adjust layout
        plt.tight_layout()
        plt.subplots_adjust(bottom=0.10)
        
        # Save plot in current directory
        save_name = f'merged_{metric}_{group_name}_{days_period}day.png'
        plt.savefig(save_name)
        plt.close()

In [31]:
def generate_all_plots():
    base_dir = "../../gt_netdense_cluscoeff"
    
    # Get all group folders
    group_folders = []
    for folder in os.listdir(base_dir):
        if folder.startswith('pc'):
            group_folders.append(os.path.join(base_dir, folder))
    
    # Process each group
    for folder in group_folders:
        print(f"Processing {os.path.basename(folder)}")
        plot_group_metrics(folder, 15)  # Process 15-day data
        plot_group_metrics(folder, 30)  # Process 30-day data

In [32]:
generate_all_plots()

Processing pc4_MSVFaceWearing&Others-0.5
Processing pc1_RSVSymptoms&NewNormalProtocols1-0.6
Processing pc3_RSVFaceWearing&Others-0.5
Processing pc1_MSVSymptoms-0.5
Processing pc1_RSVSymptoms&NewNormalProtocols-0.5
Processing pc3_RSVSymptoms&NewNormalProtocols2-0.6
Processing pc4_MSVSymptoms&NewNormalProtocols-0.6
