# Temporal Analysis

This notebook analyzes time-based statistics and scraping performance:
- Scraping timeline and duration
- Download intervals and rates
- Performance efficiency metrics
- Time-based insights and patterns

## Setup and Data Loading

In [None]:
import sys
sys.path.append('../visualizers')
sys.path.append('../utils')

from temporal_stats import create_combined_temporal_analysis
from data_loader import load_report_data, get_temporal_metrics, get_overview_metrics, format_duration
from plot_helpers import apply_global_style, display_config

apply_global_style()

data = load_report_data('../../sample_report.json')
print("Data loaded successfully!")
print(f"Generated at: {data.get('generated_at', 'Unknown')}")

## Performance Metrics Overview

Key performance indicators:

In [None]:
charts = create_combined_temporal_analysis(data)

performance_fig = charts['performance_metrics']
performance_fig.show(config=display_config())

## Scraping Timeline

Visual timeline of the scraping operation:

In [None]:
timeline_fig = charts['scraping_timeline']
timeline_fig.show(config=display_config())

## Scraping Efficiency

Overall efficiency gauge:

In [None]:
efficiency_fig = charts['efficiency_gauge']
efficiency_fig.show(config=display_config())

## Download Interval Analysis

Average time between downloads:

In [None]:
interval_fig = charts['interval_analysis']
interval_fig.show(config=display_config())

## Time Duration Breakdown

Total duration in different time units:

In [None]:
breakdown_fig = charts['time_breakdown']
breakdown_fig.show(config=display_config())

## Rate Comparison Analysis

Actual vs theoretical download rates:

In [None]:
rate_fig = charts['rate_comparison']
rate_fig.show(config=display_config())

## Temporal Statistics Report

Comprehensive temporal analysis:

In [None]:
temporal_metrics = get_temporal_metrics(data)
overview_metrics = get_overview_metrics(data)

duration_hours = temporal_metrics['duration_hours']
total_images = overview_metrics['total_images']
avg_interval_seconds = temporal_metrics['avg_interval_seconds']

print("=== TEMPORAL STATISTICS ===")
print(f"Start Time: {temporal_metrics['earliest']}")
print(f"End Time: {temporal_metrics['latest']}")
print(f"Total Duration: {format_duration(duration_hours)}")
print(f"Total Images: {total_images:,}")

print("\n=== DOWNLOAD INTERVALS ===")
print(f"Average Interval: {avg_interval_seconds:.2f} seconds")
print(f"Average Interval: {temporal_metrics['avg_interval_minutes']:.3f} minutes")

print("\n=== PERFORMANCE RATES ===")
if duration_hours > 0:
    images_per_hour = total_images / duration_hours
    images_per_minute = images_per_hour / 60
    images_per_second = images_per_minute / 60
    
    print(f"Images per Hour: {images_per_hour:.1f}")
    print(f"Images per Minute: {images_per_minute:.2f}")
    print(f"Images per Second: {images_per_second:.3f}")
    
    # Calculate theoretical maximum
    if avg_interval_seconds > 0:
        theoretical_max_per_hour = 3600 / avg_interval_seconds
        efficiency = (images_per_hour / theoretical_max_per_hour) * 100
        
        print(f"\n=== EFFICIENCY ANALYSIS ===")
        print(f"Theoretical Max Rate: {theoretical_max_per_hour:.1f} images/hour")
        print(f"Actual Rate: {images_per_hour:.1f} images/hour")
        print(f"Efficiency: {efficiency:.1f}%")
        
        if efficiency >= 90:
            print("🟢 Excellent efficiency - optimal performance")
        elif efficiency >= 70:
            print("🟡 Good efficiency - minor optimization possible")
        else:
            print("🔴 Low efficiency - performance optimization needed")

## Performance Insights

Analysis and recommendations based on temporal patterns:

In [None]:
print("=== PERFORMANCE INSIGHTS ===")

# Time efficiency analysis
if duration_hours < 1:
    print("⚡ Very fast scraping operation (< 1 hour)")
elif duration_hours < 24:
    print(f"🕐 Moderate duration operation ({duration_hours:.1f} hours)")
else:
    days = duration_hours / 24
    print(f"📅 Extended operation ({days:.1f} days)")

# Interval analysis
if avg_interval_seconds < 5:
    print("⚡ Very aggressive scraping rate (< 5s intervals)")
    print("   Consider: Rate limiting compliance, server load impact")
elif avg_interval_seconds < 30:
    print("⚖️  Moderate scraping rate (5-30s intervals)")
    print("   Good balance between speed and server courtesy")
else:
    print("🐌 Conservative scraping rate (> 30s intervals)")
    print("   Consider: Potential for optimization if resources allow")

# Volume analysis
if total_images > 50000:
    print("📈 Large-scale dataset collection")
    print("   Excellent for training robust models")
elif total_images > 10000:
    print("📊 Medium-scale dataset collection")
    print("   Good foundation for model development")
else:
    print("📋 Small-scale dataset collection")
    print("   Consider expanding for better model performance")

print("\n=== OPTIMIZATION RECOMMENDATIONS ===")

if duration_hours > 0 and total_images > 0:
    rate = total_images / duration_hours
    if rate < 100:
        print("🔧 Performance Optimization Suggestions:")
        print("   - Implement parallel processing")
        print("   - Optimize network requests")
        print("   - Review delay/timeout settings")
    elif rate < 1000:
        print("⚡ Moderate Performance - Consider:")
        print("   - Batch processing improvements")
        print("   - Connection pooling")
    else:
        print("🚀 Excellent Performance - Maintain:")
        print("   - Current optimization strategies")
        print("   - Monitor for consistency")

## Export Options

Save temporal analysis charts:

In [None]:
# Uncomment to save charts
# from plot_helpers import save_plot
# 
# save_plot(performance_fig, 'performance_metrics', 'png', width=1200, height=400)
# save_plot(timeline_fig, 'scraping_timeline', 'png', width=1000, height=400)
# save_plot(efficiency_fig, 'efficiency_gauge', 'png', width=600, height=500)
# save_plot(interval_fig, 'interval_analysis', 'png', width=800, height=500)
# save_plot(rate_fig, 'rate_comparison', 'png', width=800, height=500)
# 
# print("Temporal analysis charts saved to visualizations/output/")