In [6]:
import pandas as pd
import numpy as np

# Load the data from the first script
df = pd.read_csv('sp500_ibes_recommendations_2020_2024.csv')

# Convert date columns to datetime
df['anndats'] = pd.to_datetime(df['anndats'])

# Create year-month column for grouping
df['year_month'] = df['anndats'].dt.to_period('M')

# Calculate mean recommendation score by ticker and month
# Lower is better (1=Strong Buy, 5=Sell)
monthly_mean = df.groupby(['permno', 'ticker_crsp', 'year_month']).agg({
    'ireccd': ['mean', 'count']  # Average recommendation and count
}).reset_index()

# Flatten column names
monthly_mean.columns = ['permno', 'ticker', 'year_month', 'mean_recommendation', 'num_recommendations']

# Convert year_month to last day of month (YYYY-MM-DD format)
monthly_mean['date'] = monthly_mean['year_month'].dt.to_timestamp('M') + pd.offsets.MonthEnd(0)
monthly_mean = monthly_mean.drop('year_month', axis=1)

# Reorder columns: permno, date, ticker, mean_recommendation, num_recommendations
monthly_mean = monthly_mean[['permno', 'date', 'ticker', 'mean_recommendation', 'num_recommendations']]

# Sort by date and permno
monthly_mean = monthly_mean.sort_values(['date', 'permno']).reset_index(drop=True)

# Display results
print("Mean Recommendation Score by Month")
print("="*60)
print(monthly_mean.head(20))
print(f"\nShape: {monthly_mean.shape}")
print(f"\nDate range: {monthly_mean['date'].min()} to {monthly_mean['date'].max()}")
print(f"Total unique stocks: {monthly_mean['permno'].nunique()}")
print(f"Average recommendations per stock-month: {monthly_mean['num_recommendations'].mean():.2f}")

# Check for any anomalies
print("\n" + "="*60)
print("Checking for anomalies:")
print(f"Rows with num_recommendations = 0: {(monthly_mean['num_recommendations'] == 0).sum()}")
print(f"Rows with NaN mean_recommendation: {monthly_mean['mean_recommendation'].isna().sum()}")

# Show any problematic rows if they exist
if (monthly_mean['num_recommendations'] == 0).sum() > 0:
    print("\nRows with num_recommendations = 0:")
    print(monthly_mean[monthly_mean['num_recommendations'] == 0])

# Save to CSV
monthly_mean.to_csv('monthly_mean_recommendations.csv', index=False)
print("\nFile saved: monthly_mean_recommendations.csv")

Mean Recommendation Score by Month
    permno       date ticker  mean_recommendation  num_recommendations
0    10107 2020-01-31   MSFT             2.000000                    1
1    10138 2020-01-31   TROW             2.000000                    1
2    10145 2020-01-31    ACD             2.000000                    1
3    10516 2020-01-31    ADM             5.000000                    1
4    10696 2020-01-31     FI             3.000000                    2
5    11308 2020-01-31     KO             2.000000                    1
6    11404 2020-01-31     ED             4.000000                    2
7    11618 2020-01-31   FAST             3.000000                    1
8    11674 2020-01-31    DTE             2.500000                    2
9    11762 2020-01-31    ETN             2.000000                    3
10   11786 2020-01-31   SIVB             2.000000                    1
11   11850 2020-01-31    XON             4.000000                    1
12   11891 2020-01-31    MGM             3

In [8]:
monthly_mean[monthly_mean['date']=='2020-03-31']

Unnamed: 0,permno,date,ticker,mean_recommendation,num_recommendations
594,10104,2020-03-31,ORCL,1.666667,3
595,10138,2020-03-31,TROW,4.000000,1
596,10145,2020-03-31,ACD,2.666667,3
597,10516,2020-03-31,ADM,3.000000,1
598,10696,2020-03-31,FI,2.000000,3
...,...,...,...,...,...
964,93089,2020-03-31,VRSK,3.000000,1
965,93096,2020-03-31,DG,2.000000,3
966,93132,2020-03-31,FTNT,3.000000,2
967,93246,2020-03-31,GNRC,1.000000,1
