In [25]:
import pandas as pd
import numpy as np

def calculate_decay_weight(days_old, half_life=7):
    """
    Calculate exponential decay weight based on age.
    
    Parameters:
    - days_old: Number of days since recommendation
    - half_life: Number of days for weight to decay to 50% (default: 7 days)
    
    Returns weight between 0 and 1
    """
    return np.exp(-np.log(2) * days_old / half_life)

# Load the data
df = pd.read_csv('sp500_ibes_recommendations_2020_2024.csv')

# Convert date columns to datetime
df['anndats'] = pd.to_datetime(df['anndats'])
df['ireccd'] = df['ireccd']-3

print("Processing analyst recommendations with exponential decay...")
print(f"Total recommendations: {len(df)}")

# Parameters for decay
half_life = 7  # Days for weight to decay to 50%
lookback_days = 30  # Consider recommendations from past 30 days

# Get all unique month-ends in the data
all_months = pd.date_range(
    start=df['anndats'].min().replace(day=1),
    end=df['anndats'].max() + pd.offsets.MonthEnd(0),
    freq='M'
)

print(f"Using exponential decay with half_life={half_life} days, lookback={lookback_days} days")

# Storage for monthly aggregations
monthly_results = []

# Process each month-end
for month_end in all_months:
    # Get lookback window
    lookback_start = month_end - pd.Timedelta(days=lookback_days)
    
    # Filter recommendations within lookback window
    month_recs = df[(df['anndats'] > lookback_start) & (df['anndats'] <= month_end)].copy()
    
    if len(month_recs) == 0:
        continue
    
    # Calculate days old for each recommendation (relative to month end)
    month_recs['days_old'] = (month_end - month_recs['anndats']).dt.days
    
    # Calculate decay weights
    month_recs['decay_weight'] = month_recs['days_old'].apply(
        lambda x: calculate_decay_weight(x, half_life=half_life)
    )
    
    # Group by permno and calculate weighted mean
    grouped = month_recs.groupby(['permno', 'ticker_crsp'])
    
    for (permno, ticker), group in grouped:
        # Calculate weighted mean recommendation
        weighted_mean = (group['ireccd'] * group['decay_weight']).sum()
        
        # Calculate unweighted mean for comparison
        unweighted_mean = group['ireccd'].mean()
        
        # Effective count (sum of weights)
        effective_count = group['decay_weight'].sum()
        
        monthly_results.append({
            'permno': permno,
            'ticker': ticker,
            'date': month_end,
            'weighted_mean_recommendation': weighted_mean,
            'unweighted_mean_recommendation': unweighted_mean,
            'num_recommendations': len(group),
            'effective_recommendation_count': effective_count
        })

# Create DataFrame
monthly_mean = pd.DataFrame(monthly_results)

print(f"\nGenerated {len(monthly_mean)} monthly records")

# Sort by permno and date to calculate changes
monthly_mean = monthly_mean.sort_values(['permno', 'date']).reset_index(drop=True)

# Calculate the change in weighted mean recommendation from previous month
monthly_mean['recommendation_change'] = monthly_mean.groupby('permno')['weighted_mean_recommendation'].diff()

# Also calculate unweighted change for comparison
monthly_mean['unweighted_change'] = monthly_mean.groupby('permno')['unweighted_mean_recommendation'].diff()

print("\nCalculating significant changes...")

# Filter for stocks with absolute change >= 1
significant_changes = monthly_mean[monthly_mean['recommendation_change'].abs() >= 1.0].copy()

# Reorder columns
significant_changes = significant_changes[[
    'permno', 'date', 'ticker', 
    'weighted_mean_recommendation', 'recommendation_change',
    'unweighted_mean_recommendation', 'unweighted_change',
    'num_recommendations', 'effective_recommendation_count'
]]

# Sort by date and absolute change (largest changes first)
significant_changes = significant_changes.sort_values(
    ['date', 'recommendation_change'], 
    key=lambda x: x if x.name != 'recommendation_change' else abs(x),
    ascending=[True, False]
).reset_index(drop=True)

# Display results
print("\n" + "="*80)
print("Significant Recommendation Changes (±1 or more) - WITH EXPONENTIAL DECAY")
print("="*80)
print(significant_changes.head(20))
print(f"\nShape: {significant_changes.shape}")
print(f"\nDate range: {significant_changes['date'].min()} to {significant_changes['date'].max()}")
print(f"Total significant changes: {len(significant_changes)}")
print(f"Unique stocks with significant changes: {significant_changes['permno'].nunique()}")

# Summary statistics
print("\n" + "="*80)
print("Summary Statistics (Weighted):")
print(f"Mean absolute change: {significant_changes['recommendation_change'].abs().mean():.3f}")
print(f"Max upgrade (negative change): {significant_changes['recommendation_change'].min():.3f}")
print(f"Max downgrade (positive change): {significant_changes['recommendation_change'].max():.3f}")

# Count upgrades vs downgrades
upgrades = (significant_changes['recommendation_change'] <= -1).sum()
downgrades = (significant_changes['recommendation_change'] >= 1).sum()
print(f"\nUpgrades (change <= -1): {upgrades}")
print(f"Downgrades (change >= 1): {downgrades}")

# Compare weighted vs unweighted
print("\n" + "="*80)
print("Comparison: Weighted vs Unweighted Changes")
print(f"Correlation between weighted and unweighted changes: "
      f"{significant_changes['recommendation_change'].corr(significant_changes['unweighted_change']):.3f}")

# Show sample where weighted differs from unweighted
sample_diff = significant_changes.copy()
sample_diff['abs_difference'] = abs(sample_diff['recommendation_change'] - sample_diff['unweighted_change'])
sample_diff = sample_diff.nlargest(5, 'abs_difference')

print("\nTop 5 cases where weighted differs most from unweighted:")
print(sample_diff[['permno', 'ticker', 'date', 'weighted_mean_recommendation', 
                   'unweighted_mean_recommendation', 'recommendation_change', 
                   'unweighted_change']].to_string(index=False))

# Distribution of changes
print("\n" + "="*80)
print("Distribution of Weighted Recommendation Changes:")
print(significant_changes['recommendation_change'].describe())

# # Save results
# print("\n" + "="*80)
# print("Saving results...")

# # Save significant changes
# significant_changes.to_csv('significant_recommendation_changes_decay.csv', index=False)
# print("Saved: significant_recommendation_changes_decay.csv")

# Save full monthly means with changes
monthly_mean_output = monthly_mean[[
    'permno', 'date', 'ticker',
    'weighted_mean_recommendation', 'recommendation_change',
    'unweighted_mean_recommendation', 'unweighted_change',
    'num_recommendations', 'effective_recommendation_count'
]]
monthly_mean_output.to_csv('monthly_mean_recommendations_decay.csv', index=False)
print("Saved: monthly_mean_recommendations_decay.csv")

print("\n" + "="*80)
print("SUMMARY")
print("="*80)
print(f"Half-life: {half_life} days")
print(f"Lookback window: {lookback_days} days")
print(f"Total monthly records: {len(monthly_mean)}")
print(f"Significant changes (±1): {len(significant_changes)}")
print(f"Unique stocks tracked: {monthly_mean['permno'].nunique()}")

Processing analyst recommendations with exponential decay...
Total recommendations: 27407
Using exponential decay with half_life=7 days, lookback=30 days


  all_months = pd.date_range(



Generated 16960 monthly records

Calculating significant changes...

Significant Recommendation Changes (±1 or more) - WITH EXPONENTIAL DECAY
    permno       date ticker  weighted_mean_recommendation  \
0    12060 2020-01-31     GE                     -2.546394   
1    14593 2020-01-31   AAPL                     -2.359376   
2    15488 2020-01-31   PYPL                     -2.638611   
3    80286 2020-01-31   TSCO                     -2.455617   
4    92611 2020-01-31      V                     -2.410168   
5    48486 2020-01-31   LRCX                     -2.061447   
6    75241 2020-01-31    PXD                     -2.107094   
7    75591 2020-01-31    IEX                      1.811447   
8    12308 2020-01-31   CHTR                     -1.905724   
9    79686 2020-01-31   DECK                     -2.000000   
10   75510 2020-01-31   ADBE                     -1.845900   
11   84769 2020-01-31    APH                      0.742997   
12   59328 2020-01-31   INTC                      1

In [23]:
monthly_mean[monthly_mean['date']=='2020-03-31']

Unnamed: 0,permno,ticker,date,weighted_mean_recommendation,unweighted_mean_recommendation,num_recommendations,effective_recommendation_count,recommendation_change,unweighted_change
1,10104,ORCL,2020-03-31,-0.631912,-1.333333,3,0.518696,-0.493900,-0.333333
87,10138,TROW,2020-03-31,0.500000,1.000000,1,0.500000,0.556608,2.000000
105,10145,ACD,2020-03-31,-0.410168,-0.333333,3,1.863030,0.042694,0.666667
132,10516,ADM,2020-03-31,0.000000,0.000000,1,0.056608,-0.226431,-2.000000
155,10696,FI,2020-03-31,-1.101349,-1.000000,3,1.101349,-1.101349,-1.000000
...,...,...,...,...,...,...,...,...,...
16733,93089,VRSK,2020-03-31,0.000000,0.000000,1,0.905724,0.092875,1.000000
16755,93096,DG,2020-03-31,-1.304753,-1.000000,3,1.320614,0.335917,1.000000
16790,93132,FTNT,2020-03-31,0.000000,0.000000,2,0.922950,1.031267,1.000000
16836,93246,GNRC,2020-03-31,-1.219014,-2.000000,1,0.609507,-0.313290,0.000000
