In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Config Search 2: BEPS 1.0
config_search2_beps = {
    'search_number': 2,
    'search_name': 'BEPS 1.0',
    'csv_file': '/content/drive/MyDrive/data/search_2.csv',
    'critical_periods': {
        'early': ('2013-01-01', '2015-01-01', 'Early development (2013-2014)'),
        'consultation': ('2015-01-01', '2015-10-01', 'Pre-release (Jan-Sep 2015)'),
        'release': ('2015-10-01', '2015-12-01', 'Final Reports Release (Oct-Nov 2015)'),
        'post_release': ('2015-12-01', '2017-01-01', 'Post-release (Dec 2015-2016)'),
    },
}

# Config Search 3: TCJA
config_search3_tcja = {
    'search_number': 3,
    'search_name': 'TCJA',
    'csv_file': '/content/drive/MyDrive/data/search_3.csv',
    'critical_periods': {
        'pre_spike': ('2017-07-01', '2017-11-01', 'Pre-legislative (Jul-Oct 2017)'),
        'spike': ('2017-11-01', '2018-01-01', 'Legislative passage (Nov-Dec 2017)'),
        'post_spike': ('2018-01-01', '2019-01-01', 'Implementation (2018)'),
        'fade': ('2019-01-01', '2024-01-01', 'Strategic fade (2019-2023)'),
        'resurgence': ('2024-01-01', '2025-11-01', '2024-2025 resurgence'),
    },
}

# Config Search 4: Tariffs Trump 1.0
config_search4_tariffs_trump1 = {
    'search_number': 4,
    'search_name': 'Tariffs Trump 1.0',
    'csv_file': '/content/drive/MyDrive/data/search_4.csv',
    'critical_periods': {
        'announcement': ('2018-01-01', '2018-04-01', 'Initial announcements (Jan-Mar 2018)'),
        'escalation_1': ('2018-04-01', '2018-07-01', 'First escalation (Apr-Jun 2018)'),
        'escalation_2': ('2018-07-01', '2019-01-01', 'Peak escalation (Jul-Dec 2018)'),
        'sustained': ('2019-01-01', '2020-01-01', 'Sustained phase (2019)'),
        'covid': ('2020-01-01', '2021-01-01', 'COVID period (2020)'),
    },
}

# Config Search 5: BEPS 2.0
config_search5_beps2 = {
    'search_number': 5,
    'search_name': 'BEPS 2.0 / Pillar Two',
    'csv_file': '/content/drive/MyDrive/data/search_5.csv',
    'critical_periods': {
        'development': ('2019-01-01', '2021-05-01', 'Development phase (2019-Apr 2021)'),
        'g7_breakthrough': ('2021-05-01', '2021-07-01', 'G7 Agreement (May-Jun 2021)'),
        'g20_confirmation': ('2021-07-01', '2021-11-01', 'G20 Confirmation (Jul-Oct 2021)'),
        'post_political': ('2021-11-01', '2023-01-01', 'Post-political window (Nov 2021-2022)'),
        'implementation': ('2023-01-01', '2025-11-01', 'Implementation phase (2023-2025)'),
    },
}

# Config Search 6: Tariffs Biden/Trump 2.0
config_search6_tariffs_biden_trump2 = {
    'search_number': 6,
    'search_name': 'Tariffs Biden/Trump 2.0',
    'csv_file': '/content/drive/MyDrive/data/search_6.csv',
    'critical_periods': {
        'biden_transition': ('2021-01-01', '2022-01-01', 'Biden transition (2021)'),
        'biden_sustained': ('2022-01-01', '2023-01-01', 'Biden sustained (2022)'),
        'biden_late': ('2023-01-01', '2024-11-01', 'Biden late period (2023-Oct 2024)'),
        'trump_transition': ('2024-11-01', '2025-01-01', 'Trump 2.0 transition (Nov-Dec 2024)'),
        'trump_2_active': ('2025-01-01', '2025-11-01', 'Trump 2.0 active (2025)'),
    },
}

CONFIGS = [
    config_search2_beps,
    config_search3_tcja,
    config_search4_tariffs_trump1,
    config_search5_beps2,
    config_search6_tariffs_biden_trump2,
]

In [None]:
import pandas as pd
import numpy as np

def load_data(csv_file):
    df = pd.read_csv(csv_file)
    df['creation_time'] = pd.to_datetime(df['creation_time'])
    df['year'] = df['creation_time'].dt.year
    df['year_month'] = df['creation_time'].dt.to_period('M')
    return df

def basic_overview(df, config):
    print(f"\n{'='*60}")
    print(f"SEARCH {config['search_number']}: {config['search_name']}")
    print(f"{'='*60}")
    print(f"Total posts: {len(df):,}")
    print(f"Date range: {df['creation_time'].min().date()} to {df['creation_time'].max().date()}")
    
    total_days = (df['creation_time'].max() - df['creation_time'].min()).days
    total_months = total_days / 30.44
    print(f"Duration: {total_months:.1f} months")
    print(f"Average: {len(df)/total_months:.1f} posts/month")

def yearly_breakdown(df):
    print(f"\nPosts by year:")
    for year, count in df['year'].value_counts().sort_index().items():
        print(f"  {year}: {count} posts ({count/len(df)*100:.1f}%)")

def critical_period_analysis(df, periods):
    print(f"\nCritical periods:")
    print(f"{'Period':<45} {'Posts':>8} {'Months':>8} {'Posts/Mo':>10} {'Avg React':>10}")
    print("-" * 85)
    
    for key, (start, end, label) in periods.items():
        period_df = df[(df['creation_time'] >= start) & (df['creation_time'] < end)]
        if len(period_df) > 0:
            days = (pd.to_datetime(end) - pd.to_datetime(start)).days
            months = days / 30.44
            ppm = len(period_df) / months if months > 0 else 0
            avg_eng = period_df['statistics.reaction_count'].mean()
            print(f"{label:<45} {len(period_df):>8} {months:>8.1f} {ppm:>10.1f} {avg_eng:>10.1f}")

def engagement_metrics(df):
    print(f"\nEngagement metrics:")
    print(f"{'Metric':<20} {'Mean':>10} {'Median':>10} {'Max':>12}")
    print("-" * 55)
    
    for col in ['statistics.reaction_count', 'statistics.comment_count', 'statistics.share_count']:
        if col in df.columns:
            name = col.replace('statistics.', '').replace('_', ' ').title()
            print(f"{name:<20} {df[col].mean():>10.1f} {df[col].median():>10.1f} {df[col].max():>12,.0f}")

def top_actors(df, n=10):
    print(f"\nTop {n} posting pages:")
    for i, (page, count) in enumerate(df['post_owner.name'].value_counts().head(n).items(), 1):
        print(f"  {i}. {page[:40]}: {count} posts ({count/len(df)*100:.1f}%)")

def top_posts(df, n=5):
    print(f"\nTop {n} posts by engagement:")
    for i, (_, row) in enumerate(df.nlargest(n, 'statistics.reaction_count').iterrows(), 1):
        print(f"  {i}. {row['post_owner.name'][:30]} - {row['statistics.reaction_count']:,.0f} reactions ({row['creation_time'].date()})")

def analyze(config):
    df = load_data(config['csv_file'])
    basic_overview(df, config)
    yearly_breakdown(df)
    critical_period_analysis(df, config['critical_periods'])
    engagement_metrics(df)
    top_actors(df)
    top_posts(df)
    return df

In [None]:
# Run analysis for all searches
for config in CONFIGS:
    try:
        analyze(config)
    except FileNotFoundError:
        print(f"\nSkipping Search {config['search_number']}: file not found")
    print()