In [4]:
import wrds
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt

In [5]:
conn = wrds.Connection()

Loading library list...
Done


In [2]:
ASSETG_query = """
SELECT gvkey, datadate, at
FROM comp.funda
WHERE indfmt='INDL' 
AND datafmt='STD' 
AND popsrc='D' 
AND consol='C' 
AND datadate >= '1961-12-31' 
AND datadate <= '2002-12-31'
"""
june_query = """
SELECT gvkey, datadate, prccd
FROM comp.secd
WHERE datadate >= '1961-06-30' 
AND datadate <= '2002-06-30'
AND EXTRACT(MONTH FROM datadate) = 6
"""

In [6]:
def calculate_assetg(group):
    # Shift within the group
    group['at_lag2'] = group['at'].shift(2)
    group['at_lag1'] = group['at'].shift(1)

    # Calculate ASSETG
    group['ASSETG'] = (group['at_lag1'] - group['at_lag2']) / group['at_lag2']
    return group

In [7]:
ASSETG_data = conn.raw_sql(ASSETG_query)

In [8]:
ASSETG_data['datadate'] = pd.to_datetime(ASSETG_data['datadate'])
ASSETG_data = ASSETG_data.dropna()
ASSETG_data = ASSETG_data.sort_values(['gvkey','datadate'])
ASSETG_data = ASSETG_data[ASSETG_data['datadate'].dt.month==12]

In [None]:
ASSETG_data = ASSETG_data.groupby('gvkey').apply(calculate_assetg)
ASSETG_data = ASSETG_data[
    (~pd.isna(ASSETG_data['at_lag1'])) & (ASSETG_data['at_lag1'] != 0) &
    (~pd.isna(ASSETG_data['at_lag2'])) & (ASSETG_data['at_lag2'] != 0)
]
annual_stats = ASSETG_data.groupby('datadate')['ASSETG'].agg(['mean', 'median', 'std']).copy()
annual_stats.dropna()

In [10]:
ASSETG_data['decile'] = pd.qcut(ASSETG_data['ASSETG'], 10, labels=False)

In [11]:
def has_valid_period(df, gvkey, center_year, back_years=5, forward_years=4):
    # Check if the DataFrame contains all years in the period for the given gvkey
    start_year = center_year - back_years
    end_year = center_year + forward_years
    years_in_df = set(df[(df['gvkey'] == gvkey) & (df['datadate'].dt.year >= start_year) & (df['datadate'].dt.year <= end_year)]['datadate'].dt.year)
    return set(range(start_year, end_year + 1)).issubset(years_in_df)

In [12]:
results = []
for gvkey in ASSETG_data['gvkey'].unique():
    for center_year in range(ASSETG_data['datadate'].dt.year.min(), ASSETG_data['datadate'].dt.year.max() + 1):
        if has_valid_period(ASSETG_data, gvkey, center_year):
            # Calculate averages or other operations for this 10-year period
            period_data = ASSETG_data[(ASSETG_data['gvkey'] == gvkey) & (ASSETG_data['datadate'].dt.year >= center_year - 5) & (ASSETG_data['datadate'].dt.year <= center_year + 4)]
            for decile in period_data['decile'].unique():
                decile_data = period_data[period_data['decile'] == decile]
                avg_assetg = decile_data['ASSETG'].mean()
                results.append({'gvkey': gvkey, 'center_year': center_year, 'decile': decile, 'average_ASSETG': avg_assetg})

KeyboardInterrupt: 

In [None]:
# Plotting
plt.figure(figsize=(12, 6))

# Plot mean and median
plt.plot(annual_stats.index, annual_stats['mean']*10 , label='Average Asset Growth Rate', color='blue')
plt.plot(annual_stats.index, annual_stats['median']*10, label='Median Asset Growth Rate', color='green')
plt.plot(annual_stats.index, annual_stats['std'], label='std', color='red')

# Labels and Title
plt.xlabel('Year')
plt.ylabel('Asset Growth Rate')
plt.title('Average and Median Annual Asset Growth Rates (1968-2002) with Standard Deviation')
plt.legend()

# Show the plot
plt.grid(True)
plt.show()