In [None]:
import wrds
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt

In [None]:
conn = wrds.Connection()

In [None]:
query = """
SELECT gvkey, datadate, at
FROM comp.funda
WHERE indfmt='INDL' 
AND datafmt='STD' 
AND popsrc='D' 
AND consol='C' 
AND datadate >= '1961-12-31' 
AND datadate <= '2002-12-31'
"""

In [None]:
data = conn.raw_sql(query)

In [None]:
data['datadate'] = pd.to_datetime(data['datadate'])

In [None]:
data = data.dropna()
data = data.sort_values(['gvkey','datadate'])
data = data[data['datadate'].dt.month==12]

In [None]:
def calculate_assetg(group):
    # Shift within the group
    group['at_lag2'] = group['at'].shift(2)
    group['at_lag1'] = group['at'].shift(1)

    # Calculate ASSETG
    group['ASSETG'] = (group['at_lag1'] - group['at_lag2']) / group['at_lag2']
    return group

In [None]:
filtered_data = data.groupby('gvkey').apply(calculate_assetg)
#filtered_data['year'] = pd.to_datetime(filtered_data['datadate']).dt.year
filtered_data = filtered_data[(filtered_data['at_lag1'] != 0) & (filtered_data['at_lag2'] != 0)]

In [None]:
annual_stats = filtered_data.groupby('datadate')['ASSETG'].agg(['mean', 'median', 'std'])
annual_stats.dropna()

In [None]:
# Plotting
plt.figure(figsize=(12, 6))

# Plot mean and median
plt.plot(annual_stats.index, annual_stats['mean'] , label='Average Asset Growth Rate', color='blue')
plt.plot(annual_stats.index, annual_stats['median'], label='Median Asset Growth Rate', color='green')
plt.plot(annual_stats.index, annual_stats['std'], label='std', color='red')

# Labels and Title
plt.xlabel('Year')
plt.ylabel('Asset Growth Rate')
plt.title('Average and Median Annual Asset Growth Rates (1968-2002) with Standard Deviation')
plt.legend()

# Show the plot
plt.grid(True)
plt.show()