In [92]:
import wrds
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import numpy as np
import datetime as dt
from collections import defaultdict

In [2]:
conn = wrds.Connection()

Loading library list...
Done


DATAFMT='STD' and INDFMT='INDL' and CONSOL='C' and POPSRC='D' to retrieve the standardized (as opposed to re-stated data), consolidated (as opposed to pro-forma) data presented in the industrial format (as opposed to financial services format) for domestic companys (as opposed to international firms), i.e., the U.S. and Canadian firms.

PRCC_C: close market price, CSHO: net number of all common shares

Importing linking table to convert permno to gvkey

In [3]:
ASSETG_query = """
SELECT gvkey, datadate, at
FROM comp.funda
WHERE indfmt='INDL' 
AND datafmt='STD' 
AND popsrc='D' 
AND consol='C' 
AND datadate >= '1961-12-31' 
AND datadate <= '2002-12-31'
"""
June_query = """
SELECT permno, date, shrout
FROM crsp.msf
WHERE date >= '1962-06-30' -- Adjusted start date to align with ASSETG data
AND date <= '2003-06-30' -- Adjusted end date
AND EXTRACT(MONTH FROM date) = 6 AND EXTRACT(DAY FROM date) = 30 -- Filter for June 30th
"""

Link_query = """
SELECT Gvkey, Lpermno, Linkdt, Linkenddt
FROM crsp.ccmxpf_linktable
"""

 Calculates L1ASSETG & L2 ASSETG

In [5]:
def calculate_assetg(group):
    # Shift within the group
    group['at_lag3'] = group['at'].shift(3)  
    group['at_lag2'] = group['at'].shift(2)
    group['at_lag1'] = group['at'].shift(1)

    # Calculate ASSETG
    group['ASSETG'] = (group['at_lag1'] - group['at_lag2']) / group['at_lag2']
    group['L2ASSETG'] = (group['at_lag2'] - group['at_lag3']) / group['at_lag3']
    return group

In [41]:
# grab data
ASSETG_data = conn.raw_sql(ASSETG_query)
June_data = conn.raw_sql(June_query)
daily_rets = pd.read_csv('daily_rets.csv')
linking_table = conn.raw_sql(Link_query)

convert closing price to abs, filter to only include june 30th entries, merge to shares outstanding df & calculate market value

In [42]:
daily_rets['prc'] = daily_rets['prc'].abs()
daily_rets['date'] = pd.to_datetime(daily_rets['date'])
daily_rets = daily_rets[(daily_rets['date'].dt.month == 6) & (daily_rets['date'].dt.day == 30)]

June_data['date'] = pd.to_datetime(June_data['date'])
merged_data = pd.merge(June_data, daily_rets[['permno', 'date', 'prc', 'ret']], 
                       on=['permno', 'date'], 
                       how='left')
merged_data = merged_data.dropna()
merged_data['MV'] = merged_data['shrout'] * merged_data['prc']

creates year column, 
ensures proper sorting & filters by december month (redundent maybe),
applies shifts and calculates ASSETG, drops na and 0 observations, creates deciles

In [43]:
ASSETG_data['datadate'] = pd.to_datetime(ASSETG_data['datadate'])
ASSETG_data['year'] = ASSETG_data['datadate'].dt.year
ASSETG_data = ASSETG_data.sort_values(['gvkey','datadate'])
ASSETG_data = ASSETG_data[ASSETG_data['datadate'].dt.month==12]
ASSETG_data = ASSETG_data.groupby('gvkey').apply(calculate_assetg)
ASSETG_data = ASSETG_data[
    (~pd.isna(ASSETG_data['at_lag1'])) & (ASSETG_data['at_lag1'] != 0) &
    (~pd.isna(ASSETG_data['at_lag2'])) & (ASSETG_data['at_lag2'] != 0) &
    (~pd.isna(ASSETG_data['at_lag3'])) & (ASSETG_data['at_lag3'] != 0)
]
ASSETG_data['decile'] = pd.qcut(ASSETG_data['ASSETG'], 10, labels=False)

Merging linking table to associate MV dates to ASSETG table

In [44]:
linking_table = linking_table.dropna(subset=['lpermno'])
linking_table['linkdt'] = pd.to_datetime(linking_table['linkdt'])
linking_table['linkenddt'] = pd.to_datetime(linking_table['linkenddt'])

ASSETG_data = ASSETG_data.reset_index(drop=True)
ASSETG_data = ASSETG_data.merge(linking_table, on='gvkey')

Merge ASSETG table with MV table with Permno & gvkey associations and add "june" date. Need to fix loss of data

In [46]:
ASSETG_data = ASSETG_data[
    (ASSETG_data['datadate'] >= ASSETG_data['linkdt']) & 
    (ASSETG_data['datadate'] <= ASSETG_data['linkenddt'])
].copy()
ASSETG_data['MV_date'] = ASSETG_data['datadate'] + pd.offsets.DateOffset(years=1)
ASSETG_data['MV_date'] = ASSETG_data['MV_date'].apply(lambda x: x.replace(month=6, day=30))

merged_data['date'] = pd.to_datetime(merged_data['date'])
ASSETG_data = ASSETG_data.merge(
    merged_data, 
    left_on=['lpermno', 'MV_date'], 
    right_on=['permno', 'date'],
    how='left'
)

ASSETG_data = ASSETG_data.dropna()
ASSETG_data = ASSETG_data.copy()

Calculates yearly returns for June date

In [55]:
ASSETG_data.set_index(['gvkey', 'date'], inplace=True)
ASSETG_data.sort_index(inplace=True)
ASSETG_data['yearly_return'] = ASSETG_data.groupby('gvkey')['prc'].pct_change(1)
ASSETG_data = ASSETG_data.dropna()
ASSETG_data.reset_index(inplace=True)

  ASSETG_data['yearly_return'] = ASSETG_data.groupby('gvkey')['prc'].pct_change(1)


creates dictionary with this strcuture: gvkey(year(ASSETG:x, decile:x)

In [86]:
data_dict = defaultdict(lambda: defaultdict(dict))
for idx, row in ASSETG_data.iterrows():
    data_dict[row['gvkey']][row['year']] = {'ASSETG': row['ASSETG'], 'L2ASSETG': row['L2ASSETG'], 'MV': row['MV'], 'decile': row['decile'], 'yearly_return': row['yearly_return']}

For each unique gvkey: checks that iterated year has valid period, grabs decile of iterated year and for each year surrounding it grabs associated ASSETG, sums it with associated decile and offset year key, counts the amount of times its iterated, calculates averages

In [62]:
def has_complete_data_for_period(data_dict, gvkey, center_year):
    for year_offset in range(-4, 6):  # 10-year window: 4 years back, 5 years forward
        year = center_year + year_offset
        if year not in data_dict[gvkey]:
            return False  # Data for this year is missing or incomplete
    return True  # All years in the period have the necessary data

ASSETG decile Panel

In [None]:
#Averaging Logic
sum_data = defaultdict(lambda: defaultdict(int))
count_data = defaultdict(lambda: defaultdict(int))
average_data = defaultdict(lambda: defaultdict(float))

for gvkey in tqdm(ASSETG_data['gvkey'].unique(), desc='Processing stocks'):
    min_year = min(data_dict[gvkey]) + 4
    max_year = max(data_dict[gvkey]) - 5 
    for center_year in tqdm(range(min_year, max_year + 1), desc=f'Processing years for gvkey {gvkey}'):
        if has_complete_data_for_period(data_dict, gvkey, center_year):
            center_year_decile = data_dict[gvkey][center_year]['decile']
            for year_offset in range(-4, 6):
                year = center_year + year_offset
                if year in data_dict[gvkey]:
                    assetg_value = data_dict[gvkey][year]['ASSETG']

                    # Update sum and count
                    sum_data[center_year_decile][year_offset] += assetg_value
                    count_data[center_year_decile][year_offset] += 1

                    # Calculate and update the average
                    current_sum = sum_data[center_year_decile][year_offset]
                    current_count = count_data[center_year_decile][year_offset]
                    average_data[center_year_decile][year_offset] = current_sum / current_count if current_count != 0 else 0

        else:
            print(f"Incomplete data for gvkey {gvkey} in center year {center_year}")

Creates ASSETG decile panel

In [40]:
data_for_df = {}
for year_offset in range(-4, 6):
    row_data = []
    for decile in range(10):
            row_data.append(average_data[decile][year_offset])
    data_for_df[year_offset] = row_data

df = pd.DataFrame(data_for_df, index=range(10)).transpose()
df.columns = [f'Decile {i}' for i in range(10)]
df.index.name = 'Year Offset'
df['9-0 Spread'] = df['Decile 9'] - df['Decile 0']

EW & VW Raw Return Portfolios NOT ENOUGH DATA

In [101]:
total_market_cap = defaultdict(float)
annual_weights = defaultdict(lambda: defaultdict(float))
weighted_sum_data = defaultdict(lambda: defaultdict(float))
weighted_count_data = defaultdict(lambda: defaultdict(float))
value_weighted_average_data = defaultdict(lambda: defaultdict(float))

equal_weight_sum_data = defaultdict(lambda: defaultdict(float))
equal_weight_count_data = defaultdict(lambda: defaultdict(int))
equal_weight_average_data = defaultdict(lambda: defaultdict(float))



for gvkey, yearly_data in data_dict.items():
    for year, attributes in yearly_data.items():
        # Total market value for each year
        total_market_cap[year] += attributes['MV']

for gvkey, yearly_data in data_dict.items():
    for year, attributes in yearly_data.items():
        # Calculate the weight of each gvkey for each year
        if total_market_cap[year] > 0:
            annual_weights[year][gvkey] = attributes['MV'] / total_market_cap[year]

for gvkey in tqdm(ASSETG_data['gvkey'].unique(), desc='Processing stocks'):
    min_year = min(data_dict[gvkey]) + 4
    max_year = max(data_dict[gvkey]) - 5

    for center_year in range(min_year, max_year + 1):
        if has_complete_data_for_period(data_dict, gvkey, center_year):
            center_year_decile = data_dict[gvkey][center_year]['decile']
            for year_offset in range(-4, 6):
                year = center_year + year_offset
                if year in data_dict[gvkey]:
                    yearly_return_value = data_dict[gvkey][year]['yearly_return']
                    weight = annual_weights[year][gvkey]
                    weighted_return = yearly_return_value * weight

                    # Update for value-weighted
                    weighted_sum_data[center_year_decile][year_offset] += weighted_return
                    weighted_count_data[center_year_decile][year_offset] += 1

                    # Update for equal-weighted
                    equal_weight_sum_data[center_year_decile][year_offset] += yearly_return_value
                    equal_weight_count_data[center_year_decile][year_offset] += 1

for decile in equal_weight_sum_data:
    for year_offset in equal_weight_sum_data[decile]:
        # Calculate equal-weighted average
        if equal_weight_count_data[decile][year_offset] > 0:
            equal_weight_average_data[decile][year_offset] = equal_weight_sum_data[decile][year_offset] / equal_weight_count_data[decile][year_offset]

        # Calculate value-weighted average
        if weighted_count_data[decile][year_offset] > 0:
            value_weighted_average_data[decile][year_offset] = weighted_sum_data[decile][year_offset] / weighted_count_data[decile][year_offset]


Financial & Return Characteristics

In [None]:
# Averaging Logic
average_data = defaultdict(float)
sum_data = defaultdict(float)
count_data = defaultdict(float)

average_data_L2ASSETG = defaultdict(float)
sum_data_L2ASSETG = defaultdict(float)
count_data_L2ASSETG = defaultdict(float)

average_data_MV = defaultdict(float)
sum_data_MV = defaultdict(float)
count_data_MV = defaultdict(float)


for gvkey in tqdm(ASSETG_data['gvkey'].unique(), desc='Processing stocks'):
    min_year = min(data_dict[gvkey]) + 4
    max_year = max(data_dict[gvkey]) - 5 
    for center_year in tqdm(range(min_year, max_year + 1), desc=f'Processing years for gvkey {gvkey}'):
        if has_complete_data_for_period(data_dict, gvkey, center_year):
            center_year_decile = data_dict[gvkey][center_year]['decile']
            assetg_value = data_dict[gvkey][center_year]['ASSETG']
            l2assetg_value = data_dict[gvkey][center_year]['L2ASSETG']
            MV_value = data_dict[gvkey][center_year]['MV']

            # Accumulate sum and count for center year
            sum_data[center_year_decile] += assetg_value
            count_data[center_year_decile] += 1

            sum_data_L2ASSETG[center_year_decile] += l2assetg_value
            count_data_L2ASSETG[center_year_decile] += 1

            sum_data_MV[center_year_decile] += l2assetg_value
            count_data_MV[center_year_decile] += 1

        else:
            print(f"Incomplete data for gvkey {gvkey} in center year {center_year}")

for decile in sum_data:
    if count_data[decile] > 0:
        average_data[decile] = sum_data[decile] / count_data[decile]
    if count_data_L2ASSETG[decile] > 0:
        average_data_L2ASSETG[decile] = sum_data_L2ASSETG[decile] / count_data_L2ASSETG[decile]

Create Financial & Return Characteristics Panel

In [28]:
data_for_df = []

for decile in range(10):
        row_data = [average_data[decile], average_data_L2ASSETG[decile]]
        data_for_df.append(row_data)

df = pd.DataFrame(data_for_df, columns=['Average ASSETG', 'Average L2ASSETG'], index=[f'Decile {i}' for i in range(10)])