# Earnings Surprise Tracker & Post-Earnings Drift (FTSE 350)

This notebook computes earnings surprises and studies average returns after announcements. 
Switch `PRICES_SOURCE` to `'yfinance'` for live data (internet required).

In [None]:
import pandas as pd, numpy as np, matplotlib.pyplot as plt, sys, os
from datetime import datetime
PRICES_SOURCE = 'sample'   # 'sample' or 'yfinance'

earnings = pd.read_csv('../data/sample_earnings.csv', parse_dates=['Date'])
tickers = pd.read_csv('../data/sample_ftse350.csv')['Ticker'].tolist()

if PRICES_SOURCE == 'sample':
    prices = pd.read_csv('../data/sample_prices.csv', parse_dates=['Date']).set_index('Date')
else:
    import yfinance as yf
    start = '2020-01-01'; end = datetime.today().strftime('%Y-%m-%d')
    all_tickers = sorted(set(tickers) | set(earnings['Ticker'].tolist()))
    prices = yf.download(all_tickers, start=start, end=end, progress=False)['Adj Close']
    if isinstance(prices, pd.Series):
        prices = prices.to_frame()
    prices = prices.dropna(how='all')
    prices.columns = [c if isinstance(c, str) else c[1] for c in prices.columns]

def compute_surprise(row):
    cons = row['ConsensusEPS']; rep = row['ReportedEPS']
    if cons == 0 or pd.isna(cons) or pd.isna(rep):
        return np.nan
    return (rep - cons) / abs(cons)

def bucket_surprise(s):
    if pd.isna(s): return 'NA'
    if s > 0.05: return 'Positive (>+5%)'
    if s < -0.05: return 'Negative (<-5%)'
    return 'Neutral (-5%..+5%)'

def forward_return(series, date, h):
    # align to next trading day on/after date
    if date not in series.index:
        idx = series.index.searchsorted(date)
        if idx >= len(series): return np.nan
        date = series.index[idx]
    base = series.loc[date]
    idx = series.index.searchsorted(date) + h
    if idx >= len(series): return np.nan
    return series.iloc[idx] / base - 1.0

horizons = [1,5,20,60]
earnings['Surprise'] = earnings.apply(compute_surprise, axis=1)
earnings['Bucket'] = earnings['Surprise'].apply(bucket_surprise)

rows = []
for _, r in earnings.iterrows():
    s = {}
    for h in horizons:
        s[f'+{h}d'] = forward_return(prices[r['Ticker']].dropna(), r['Date'], h) if r['Ticker'] in prices.columns else np.nan
    rows.append(s)
fwd = pd.DataFrame(rows)

result = pd.concat([earnings, fwd], axis=1)
result.to_csv('../outputs/event_metrics.csv', index=False)

agg = result.groupby('Bucket')[[f'+{h}d' for h in horizons]].mean().T
agg.index = [int(s.strip('+d')) for s in agg.index.str.replace('+','').str.replace('d','')]

plt.figure()
for bucket in ['Positive (>+5%)','Neutral (-5%..+5%)','Negative (<-5%)']:
    if bucket in agg.columns:
        agg[bucket].sort_index().plot(label=bucket)
plt.legend(); plt.title('Average Post-Earnings Drift by Surprise Bucket')
plt.xlabel('Days after earnings'); plt.ylabel('Average return')
plt.tight_layout()
plt.show()