# Cleaning Return Series for "Death of Factors"

The goal of this notebook is to go through all of the CSV files to compile a long dataset of returns. The dataset should be structured as

Metadata Columns Describing Strategies - Monthly Date - Monthly percentage return

The file Data/Index.csv contains a listing of all the factors that we're studying. There's a combination of time series and cross sectional factors, as well as across different asset classes. Most of the data comes from the AQR data library

## Cross Sectional Value + Momentum in Other Asset Classes

In [72]:
import pandas as pd
from pandas.tseries.offsets import MonthEnd
import numpy as np
from beakerx import *
from beakerx.object import beakerx
import seaborn as sns
import matplotlib.pyplot as plt

def drop_missing(self, var_name, inplace = True):
    return self.loc[~pd.isnull(self[var_name]), ] 
pd.DataFrame.drop_missing = drop_missing

MASTER_INDEX = ['Asset Class', 'Descriptor', 'Country']

In [73]:
aqr_mom_value = pd.read_csv('../Data/CSV/aqr_val_momentum.csv', sep = ',')
aqr_mom_value = aqr_mom_value.drop(['VAL', 'MOM', 'VAL^SS', 'MOM^SS', 'VAL^AA', 'MOM^AA'], axis = 1)\
                             .rename(index = str, columns = {'DATE': 'Month_date'})
aqr_mom_value['Month_date'] = pd.to_datetime(aqr_mom_value.Month_date, format = '%m/%d/%Y') + MonthEnd(0)

In [74]:
# Melt down
aqr_mom_value_melt = aqr_mom_value.melt(id_vars = 'Month_date')
aqr_mom_value_melt['value'] = pd.to_numeric(aqr_mom_value_melt['value'].str.replace('%', '')) / 100
aqr_mom_value_melt = aqr_mom_value_melt.drop_missing('value')
var_names = aqr_mom_value_melt['variable'].str.split('_', n = 2, expand = True)
aqr_mom_value_melt['Descriptor'] = var_names[0]
aqr_mom_value_melt['Country_Asset'] = var_names[2]

# Convert to get the asset class names
conversion_dict = {'Country_Asset': ['US90', 'UK90', 'ROE90', 'JP90', 'EQ', 'FX', 'FI', 'COM'],
                   'Country': ['USA', 'GBR', 'EUR', 'JPN', 'WLD', 'WLD', 'WLD', 'WLD'],
                   'Asset Class': ['Equities', 'Equities', 'Equities', 'Equities', 'Equities', 'FX', 'Bonds', 'Cmd']}
conversion_frame = pd.DataFrame.from_dict(conversion_dict)

# Join to get the names
aqr_mom_value_melt.set_index('Country_Asset', inplace = True)
conversion_frame.set_index('Country_Asset', inplace = True)
aqr_mom_value_melt = aqr_mom_value_melt.join(conversion_frame)

# Rename things
aqr_mom_value_melt['Descriptor'] = ['Value' if x == 'VALLS' else ('Momentum' if x == 'MOMLS' else np.nan) for x in aqr_mom_value_melt.Descriptor]
aqr_mom_value_melt.rename(index = str, columns = {'value': 'Month_ret'}, inplace = True)
aqr_mom_value_melt = aqr_mom_value_melt.reset_index().drop(['Country_Asset','variable'], axis = 1)

In [75]:
# Drop equities so you can use the longer history UMD, Value series
aqr_mom_value_melt = aqr_mom_value_melt.loc[aqr_mom_value_melt['Asset Class'] != 'Equities']

In [76]:
def check_data(df):
    assert(sorted(df.columns.tolist()) == ['Asset Class', 'Country', 'Descriptor', 'Month_date', 'Month_ret'])
    assert(df.dtypes['Month_date'].str == '<M8[ns]')
    assert(df.reset_index().loc[df.reset_index().duplicated(['Asset Class', 'Country', 'Descriptor', 'Month_date'])].shape[0] == 0)
check_data(aqr_mom_value_melt)

In [77]:
aqr_mom_value_melt.set_index(MASTER_INDEX, inplace = True)

In [78]:
aqr_mom_value_melt

# Momentum, Value, and Quality for All Countries

In [79]:
aqr_qual = pd.read_csv('../Data/CSV/aqr_quality.csv', sep = ',')
aqr_val = pd.read_csv('../Data/CSV/aqr_value.csv', sep = ',')
aqr_mom = pd.read_csv('../Data/CSV/aqr_cs_momentum.csv', sep = ',')
aqr_bab = pd.read_csv('../Data/CSV/aqr_bab.csv', sep = ',')

In [80]:
aqr_qual.head()

In [81]:
aqr_val.head()

In [82]:
aqr_mom.head()

In [83]:
aqr_bab.head()

In [84]:
def transform_aqr_csv_file(data, strategy_name):
    data['Month_date'] = pd.to_datetime(data['DATE'], format = '%m/%d/%Y') + MonthEnd(0)
    data_melt = data.drop(['DATE'], axis = 1).melt(id_vars = 'Month_date')
    data_melt['value'] = pd.to_numeric(data_melt['value'].str.replace('%', '')) / 100
    data_melt = data_melt.drop_missing('value')

    # Make some new variables
    data_melt = data_melt.reset_index()
    data_melt.rename(index = str, columns = {'variable': 'Country', 'value': 'Month_ret'}, inplace = True)
    data_melt['Asset Class'] = 'Equities'
    data_melt['Descriptor'] = 'Quality'

    # Index
    data_melt = data_melt.drop(['index'], axis = 1)
    check_data(data_melt)
    data_melt = data_melt.set_index(MASTER_INDEX)
    return data_melt

In [85]:
aqr_raw_frames = [aqr_val, aqr_mom, aqr_qual, aqr_bab]
aqr_names = ['Value', 'Momentum', 'Quality', 'Betting Against Beta']
long_frames = []

for aqr in zip(aqr_raw_frames, aqr_names):
    long_frames.append(transform_aqr_csv_file(aqr[0], aqr[1]))

In [86]:
all_cleaned_aqr_equity_cross_section_strategies = pd.concat(long_frames)

In [87]:
all_cleaned_aqr_equity_cross_section_strategies.head(100)

## Time Series Momentum

In [88]:
aqr_ts = pd.read_csv('../Data/CSV/aqr_ts_momentum.csv', sep = ',')
aqr_ts = aqr_ts.rename(index = str, columns = {'Date': 'Month_date'})
aqr_ts_melt = aqr_ts.drop(['TSMOM'], axis = 1).melt(id_vars = 'Month_date')
var_names = aqr_ts_melt['variable'].str.split('^', n = 1, expand = True)

asset_class_dict = {'CM': 'Cmd',
                    'EQ': 'Equities',
                    'FI': 'Bonds',
                    'FX': 'FX'}

aqr_ts_melt['Descriptor'] = 'TS Momentum'
aqr_ts_melt['Asset Class'] = [asset_class_dict[x] for x in var_names.iloc[:, 1]]
aqr_ts_melt['Country'] = 'WLD'
aqr_ts_melt['value'] = pd.to_numeric(aqr_ts_melt['value'].str.replace('%', '')) / 100
aqr_ts_melt.drop(['variable'], axis = 1, inplace = True)
aqr_ts_melt.rename(index = str, columns = {'value': 'Month_ret'}, inplace = True)
aqr_ts_melt['Month_date'] = pd.to_datetime(aqr_ts_melt['Month_date'], format = '%m/%d/%Y') + MonthEnd(0)

check_data(aqr_ts_melt)

In [89]:
aqr_ts_melt = aqr_ts_melt.set_index(MASTER_INDEX)

In [90]:
aqr_ts_melt

# Carry Trade

In [97]:
carry = pd.read_csv('../Data/CSV/adrien_carry.csv', sep = ',')
carry = carry.rename(columns = {'Dates': 'Month_date', 'HML': 'Month_ret'})
carry['Month_date'] = pd.to_datetime(carry['Month_date'], format = '%m/%d/%y')
carry['Descriptor'] = 'Carry'
carry['Asset Class'] = 'FX'
carry['Country'] = 'WLD'
carry.set_index(MASTER_INDEX, inplace = True)

In [98]:
carry.tail()

## Combine all the data

In [99]:
combined_factors = pd.concat([aqr_mom_value_melt, aqr_ts_melt, all_cleaned_aqr_equity_cross_section_strategies, carry], sort = False)
store = pd.HDFStore('../Data/data.h5')
store.put('all_factors', combined_factors)
store.close()

In [100]:
combined_factors.shape

(39820, 1)

In [101]:
combined_factors.head()