In [13]:
import pandas as pd
import numpy as np
import scipy.stats as stats
from tqdm import tqdm
from dateutil.relativedelta import relativedelta

In [14]:
from xquant.backtest.backtest import run_backtest
from xquant.portfolio import Portfolio
from xquant.util import *

In [15]:
import plotly.express as px

In [16]:
industry_index = pd.read_csv('data\\WIND_II_industry_index.csv', index_col=['Date'], parse_dates=['Date'])
total_returns = pd.read_csv('data\\quarterly_total_returns_II.csv', index_col=['Date'], parse_dates=['Date'])
earnings = pd.read_csv('data\\quarterly_earnings_II.csv', index_col=['Date'], parse_dates=['Date'])
benchmark = pd.read_csv('data\\csi_300.csv', index_col=['date'], parse_dates=['date'])

In [17]:
START = pd.Timestamp('20100401')
END = pd.Timestamp('20191231')
SECTORS = list(industry_index.columns)

## Momentum Strategy

At each rebalance date, build zero-cost portfolios that longs industries with strong momentum (U) financed by shorting industries with weak momentum (D).

$H_{0}:r_{U}-r_{D}=0$

In [18]:
def get_momentum(industry, date, look_back) -> float:
    '''calculates the momentum of an industry'''

    # skip one month to avoid factors like bid-ask spread
    end_date = date - relativedelta(months=1)
    start_date = end_date - relativedelta(months=look_back)

    try:
        # calculate return
        period = industry_index[industry].loc[start_date:end_date]
        start_price = period.head(1)[0]
        end_price = period.tail(1)[0]
    except IndexError:
        print(date, start_date, end_date)
        print(period)
        print('\n')

    momentum = (end_price/start_price) - 1

    return momentum

In [19]:
def get_mom_rank(industry, date, look_back) -> float:
    '''calculates the numeric rank of an industry's momentum among peers'''
    mom_list = [get_momentum(sector, date, look_back) for sector in SECTORS]
    mom_dict = dict(zip(SECTORS, mom_list))

    series = pd.Series(mom_dict).dropna()
    df = pd.DataFrame(series, columns=['momentum'])
    df.sort_values(by=['momentum'], ascending=False, inplace=True)
    df['rank'] = np.arange(1,len(df)+1)
    
    rank = df.at[industry, 'rank']
    return rank

In [20]:
def get_cap_portfolio(prices, include, funds) -> Portfolio:

    total_cap = prices[include].sum()
    weights = [prices[industry]/total_cap for industry in include]

    budgets = [funds*weight for weight in weights] # budget available for each industry
    
    shares = np.divide(budgets, prices[include].values)
    shares_dict = dict(zip(include, shares))

    portfolio = Portfolio(long=shares_dict, short={}, cash=0)
    
    return portfolio

In [21]:
def stock_selection_mom(funds, date, look_back) -> Portfolio:

    long_include, short_include = [],[]
        
    for industry in SECTORS:
        mom_rank = get_mom_rank(industry, date, look_back)
        if mom_rank <= 8:
            long_include.append(industry)
        elif mom_rank >= 17:
            short_include.append(industry)

    prices = industry_index.loc[closest_trading_day(date, industry_index.index, 'bfill')]    

    # equal weighted portfolio    
    long_shares = [funds/len(long_include)/prices[i] for i in long_include]
    long_stocks = dict(zip(long_include, long_shares))
    long_portfolio = Portfolio(long=long_stocks, short={}, cash=0)
    short_shares = [funds/len(short_include)/prices[j] for j in short_include]
    short_stocks = dict(zip(short_include, short_shares))
    short_portfolio = Portfolio(long=short_stocks, short={}, cash=0)

    # cap weighted portfolio
    # long_portfolio = get_cap_portfolio(prices, long_include, funds)
    # short_portfolio = get_cap_portfolio(prices, short_include, funds)

    return long_portfolio, short_portfolio

In [22]:
def calculate_return_mom(date, look_back, holding_period):
    # at any given time, the number of portfolios this strategy holds is equal to holding_period
    sub_longs, sub_shorts = [], []

    for i in range(1, holding_period+1):
        look_back_end = date - relativedelta(months=i)
        look_back_start = look_back_end - relativedelta(months=look_back)

        sub_long, sub_short = stock_selection_mom(100, look_back_start, look_back)
        sub_longs.append(sub_long)
        sub_shorts.append(sub_short)
    
    long_total_val, short_total_val = 0, 0
    for l, s  in zip(sub_longs, sub_shorts):
        long_total_val += l.get_net_liquidation(date, industry_index)
        short_total_val += s.get_net_liquidation(date, industry_index)
    
    long_return = long_total_val / (100 * holding_period) - 1
    short_return = short_total_val / (100 * holding_period) - 1

    long_short_excess = long_return - short_return
    return long_short_excess

In [45]:
chart_r = pd.DataFrame(columns=['H=3','H=6','H=9','H=12','H=18','H=24'], index=['L=3','L=6','L=9','L=12','L=18','L=24'])
chart_r.index.rename('Look-back', inplace=True)
chart_t = chart_r.copy()

In [46]:
for col in tqdm(chart_r.columns):
    for idx in chart_r.index:

        l_b, h_p = int(idx[2:]), int(col[2:])
        long_short_excess = []
        for month in pd.date_range(start='20050101', end='20210501', freq='M'):
            month = (month+relativedelta(days=1))
            long_short_excess.append(calculate_return_mom(date=month, look_back=l_b, holding_period=h_p))

        cumulative = np.prod([i+1 for i in long_short_excess])
        n_days = (pd.Timestamp('20050101') - pd.Timestamp('20210501')).days
        annualized = (cumulative + 1) ** (365/n_days) - 1
        t_stat = stats.ttest_1samp(long_short_excess, 0).statistic

        chart_r.at[idx, col] = annualized
        chart_t.at[idx, col] = t_stat

100%|██████████| 6/6 [4:09:52<00:00, 2498.72s/it]


In [70]:
chart_r

Unnamed: 0_level_0,H=3,H=6,H=9,H=12,H=18,H=24
Look-back,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
L=3,-0.0283434,-0.106247,-0.155857,-0.167946,-0.212265,-0.187806
L=6,-0.181452,-0.214069,-0.209842,-0.238656,-0.249849,-0.145846
L=9,-0.196763,-0.225141,-0.257367,-0.283568,-0.230285,-0.0765599
L=12,-0.0968398,-0.122177,-0.0970074,-0.0363347,-0.000287665,-3.90332e-07
L=18,3.71141e-08,-1.38667e-09,9.6912e-11,3.07927e-11,1.77636e-15,0.0
L=24,-4.44089e-15,0.0,0.0,0.0,0.0,0.0


In [49]:
long_short_excess = []

for month in pd.date_range(start='20050101', end='20210501', freq='M'):
    month = (month+relativedelta(days=1))
    long_short_excess.append(calculate_return_mom(date=month, look_back=6, holding_period=6))

In [65]:
cumulative = np.prod([i+1 for i in long_short_excess])
n_days = (pd.Timestamp('20210501') - pd.Timestamp('20050101')).days
annualized = (cumulative + 1) ** (365/n_days) - 1
t_stat = stats.ttest_1samp(long_short_excess, 0).statistic

In [108]:
def reverse_annualized(num):
    cum = (num+1)**(-5964/365)-1
    ann = (cum + 1) ** (365/5964) - 1
    return ann

In [114]:
pd.set_option('display.float_format', lambda x: '%.5f' % x)

In [109]:
chart_r = pd.read_csv('D:\\Downloads\\return.csv', index_col='Look-back')

In [110]:
for col in chart_r.columns:
    chart_r[col] = chart_r[col].apply(reverse_annualized)

In [115]:
chart_r

Unnamed: 0_level_0,H=3,H=6,H=9,H=12,H=18,H=24
Look-back,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
L=3,0.02917,0.11888,0.18463,0.20184,0.26946,0.23123
L=6,0.22168,0.27238,0.26557,0.31347,0.33306,0.17075
L=9,0.24496,0.29056,0.34656,0.3958,0.29918,0.08291
L=12,0.10722,0.13918,0.10743,0.0377,0.00029,0.0
L=18,0.0,0.0,0.0,0.0,0.0,0.0
L=24,0.0,0.0,0.0,0.0,0.0,0.0


In [116]:
chart_t

Unnamed: 0_level_0,H=3,H=6,H=9,H=12,H=18,H=24
Look-back,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
L=3,0.14256,2.28432,3.50044,3.98821,4.51476,4.20773
L=6,2.70019,3.09205,2.99074,3.19325,3.33353,2.37683
L=9,2.61958,2.8257,3.04318,3.17821,2.89137,1.42666
L=12,1.73993,1.91335,1.68623,0.95534,-1.08095,-2.97202
L=18,-1.1796,-2.19327,-3.03636,-3.5365,-4.57326,-5.71907
L=24,-4.11578,-4.61826,-4.95325,-5.25819,-6.34736,-7.56473


In [117]:
chart_r.to_csv('D:\\Downloads\\return.csv')
chart_t.to_csv('D:\\Downloads\\t_stat.csv')

In [19]:
# chart = pd.DataFrame(columns=['H=3','H=6','H=9','H=12','H=18','H=24'], index=['L=3','L=6','L=9','L=12','L=18','L=24'])
chart = pd.DataFrame(columns=['H=1','H=2','H=4','H=6','H=8'], index=['L=1','L=2','L=4','L=6','L=8'])
chart.index.rename('Look-back', inplace=True)

In [48]:
chart

Unnamed: 0_level_0,H=1,H=2,H=4,H=6,H=8
Look-back,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
L=1,"[0.012077497603871223, 0.12125008137385307, -0...","[0.036940124344882364, -0.006526120748145736, ...","[0.07601561975665783, 0.03566824702038551, -0....","[0.06659472380904541, -0.01858393740326636, -0...","[0.10291119281698968, -0.0511432760952627, 0.0..."
L=2,"[-0.003973236043363304, 0.11995214122560294, -...","[-0.027723653583886287, -0.0259461038937292, 0...","[-0.06521130067990555, 0.08283287040142895, -0...","[-0.07960007621053589, -0.10469688924672671, 0...","[-0.0878592115718263, -0.024953890456110628, -..."
L=4,"[0.02016729530411121, 0.007772937568838012, -0...","[0.0304098526790908, -0.06798429354568025, -0....","[0.05971937949836503, -0.22923815957353977, 0....","[0.04349884405655535, -0.0782925183531884, -0....","[0.08850921237373255, 0.027335812352578648, 0...."
L=6,"[0.007179747536274372, 0.060568870934066044, -...","[0.04771061953794509, -0.07402043885760323, 0....","[0.11434944243842526, 0.05891882845157115, 0.1...","[0.10153909004934802, -0.04575899403601991, -0...","[0.12823683239360073, 0.0842889138487044, 0.09..."
L=8,"[0.007980041539910276, 0.11303221878842207, -0...","[0.04872983398508646, -0.006523931985509046, 0...","[0.11055443013696531, 0.16901432262069438, -0....","[0.08388927357139964, 0.015696011889722028, -0...","[0.100664231672007, -0.0395144916172846, 0.041..."


In [57]:
for look_back in tqdm(chart.index):
    for holding_period in chart.columns:
        l_b, h_p = int(look_back[2:]), int(holding_period[2:])
        # print(f'Look Back {l_b}, Holding Period {h_p}')
        chart.at[look_back, holding_period] = calculate_return_mom(l_b, h_p, frequency='w')

100%|██████████| 5/5 [34:52<00:00, 418.60s/it]


In [38]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [69]:
fig = make_subplots(rows=5, cols=5)

for i in range(0,5):
    for j in range(0,5):
        idx = chart.index[i]
        clm = chart.columns[j]
        long_short_excess = chart.at[idx,clm]
        portfolio_value = [1] + [val+1 for val in long_short_excess]
        fig.append_trace(go.Scatter(y=portfolio_value, 
                                    line={"dash":"solid","color":"blue"}, 
                                    text=f"{idx}, {clm}",
                                    textposition='top left'),
                                    row=i+1, col=j+1)

In [70]:
fig.update_layout(height=750, width=1000, showlegend=False, title_text='Industry Momentum')
fig.show()

In [None]:
from scipy.stats.mstats import winsorize

In [76]:
PERIODS = total_returns.index

In [77]:
def get_cape(industry, date) -> float:
    '''calculates the absolute (i.e. raw) Shiller-CAPE ratio of an industry'''
    df_earnings = earnings.loc[:date]
    df_tot_rtns = total_returns.loc[:date].tail(1)

    # if there is insufficient data, raise error
    if len(df_earnings) < 20:
        raise Exception('Insufficient data, need at least 5 years (20 quarters) to calculate CAPE')
    
    df_earnings = df_earnings.tail(20)
    earning = df_earnings.mean()[industry]
    tot_rtn = df_tot_rtns[industry].values[0]
    
    cape = tot_rtn/earning
    return cape

In [78]:
def get_relative_cape(industry, date, n_period=40) -> float:
    '''calculates the relative Shiller-CAPE ratio of an industry'''
    periods = PERIODS[PERIODS<=date][-n_period:]
    if len(periods) < n_period:
        raise Exception('Insufficient data, need at least 10 years to calculate Relative CAPE')

    capes = [get_cape(industry, period) for period in periods]
    capes = list(winsorize(capes, limits=[0.05,0.05]))
    relative_cape = capes[-1] / np.mean(capes)

    return relative_cape

In [79]:
def get_relative_cape_rank(industry, date, n_period=40) -> float:
    '''calculates the numeric rank of an industry's relative Shiller-CAPE ratio among peers'''
    rel_capes = [get_relative_cape(sector, date, n_period) for sector in SECTORS]
    rel_capes_dict = dict(zip(SECTORS, rel_capes))

    series = pd.Series(rel_capes_dict).fillna(99)
    df = pd.DataFrame(series, columns=['rel_cape'])
    df.sort_values(by=['rel_cape'], inplace=True)
    df['rank'] = np.arange(1,len(df)+1)
    
    rank = df.at[industry, 'rank']
    return rank

In [93]:
def stock_selection_cape(funds, date, n_period=20) -> Portfolio:
    long_include, short_include = [],[]
    
    for industry in SECTORS:
        rel_cape_rank = get_relative_cape_rank(industry, date, n_period) # NEED TO ADJUST TO 40
        if rel_cape_rank <= 8:
            long_include.append(industry)
        elif rel_cape_rank >= 17:
            short_include.append(industry)
    
    prices = industry_index.loc[closest_trading_day(date, industry_index.index, 'bfill')]
    
    long_portfolio = get_cap_portfolio(prices, long_include, funds)
    short_portfolio = get_cap_portfolio(prices, short_include, funds)

    return long_portfolio, short_portfolio

In [86]:
def calculate_return_cape(holding_period):
    current = START
    sub_end = current + relativedelta(months=holding_period, days=-1)
    long_short_excess = []

    while sub_end <= END:
        long_portfolio, short_portfolio = stock_selection_cape(100, current)

        long_start_val = long_portfolio.get_net_liquidation(current, industry_index)
        long_end_val = long_portfolio.get_net_liquidation(sub_end, industry_index)
        long_sub_return = (long_end_val / long_start_val) - 1

        short_start_val = short_portfolio.get_net_liquidation(current, industry_index)
        short_end_val = short_portfolio.get_net_liquidation(sub_end, industry_index)
        short_sub_return = (short_end_val / short_start_val) - 1

        long_short_excess.append(long_sub_return - short_sub_return)
        current += relativedelta(months=holding_period)
        sub_end = current + relativedelta(months=holding_period, days=-1)

    cumulative = np.prod([i+1 for i in long_short_excess])
    n_days = (sub_end - START).days
    annualized = (cumulative + 1) ** (365/n_days) - 1

    # t_stat = stats.ttest_1samp(long_short_excess, 0).statistic

    return long_short_excess
    return (annualized, t_stat)

In [96]:
chart_cape = pd.DataFrame(columns=['H=3','H=6','H=9','H=12','H=18','H=24'], index=[0])/

In [105]:
for c in tqdm(chart_cape.columns):
    holding_period = int(c[2:])
    long_short_excess = calculate_return_cape(holding_period)
    chart_cape.at[0, c] = long_short_excess

100%|██████████| 6/6 [08:54<00:00, 89.10s/it]


In [117]:
fig_cape = make_subplots(rows=1, cols=6)

for i in range(0,6):
    clm = chart_cape.columns[i]
    long_short_excess = chart_cape.at[0,clm]
    portfolio_value = [1] + [val+1 for val in long_short_excess]
    fig_cape.append_trace(go.Scatter(y=portfolio_value, 
                                line={"dash":"solid","color":"blue"}, 
                                text=clm,
                                textposition='top left'),
                                row=1, col=i+1)

fig_cape.update_layout(height=300, width=1200, showlegend=False, title_text='Shiller-CAPE')
fig_cape.show()

In [33]:
import plotly.express as px

In [9]:
fig_earning_price = make_subplots(rows=5, cols=5, subplot_titles=list(earnings.columns))

for i in range(0,5):
    for j in range(0,5):
        idx = i*5 + j

        if idx == 24:
            break

        industry = industry_index.columns[idx]
        industry_earnings = earnings.groupby(earnings.index.year).sum()[industry]
        # industry_earnings.index = industry_earnings.index
        industry_prices = industry_index.groupby(industry_index.index.year).median()[industry]

        # print(industry_earnings / industry_earnings.values[0])
        # print(industry_prices / industry_prices.values[0])

        fig_earning_price.append_trace(go.Scatter(y=industry_earnings / industry_earnings.values[0], 
                                       line={"dash":"solid","color":"blue"}, 
                                       text=f"{industry}",
                                       textposition='top left'),
                                       row=i+1, col=j+1)

        fig_earning_price.append_trace(go.Scatter(y=industry_prices / industry_prices.values[0], 
                                       line={"dash":"solid","color":"red"}, 
                                       text=f"{industry}",
                                       textposition='top left'),
                                       row=i+1, col=j+1)

In [12]:
fig_earning_price.update_layout(height=1000, width=1200, showlegend=False, title_text='Industry Price Earnings')
fig_earning_price.show()

In [14]:
earnings.groupby(by=[earnings.index.year, earnings.index.quarter]).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,Energy,Materials,Capital Goods,Commercial & Professional Services,Transportation,Automobiles & Components,Consumer Durables & Apparel,Consumer Services,Media,Retailing,...,"Pharmaceuticals, Biotechnology & Life Sciences",Banks,Diversified Financials,Insurance,Real Estate,Software & Services,Technology Hardware & Equipment,Semiconductors & Semiconductor Equipment,Communication Services,Utilities
Date,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2000,2,5.972076,15.186744,8.751967,4.178666,17.614799,11.288770,8.609824,14.719118,10.934229,45.271197,...,18.151786,9.665824,26.748105,0.000000,13.418073,17.230575,26.938839,12.719782,23.307415,32.322856
2000,3,12.650682,17.330448,13.302690,26.588816,26.231553,22.204382,13.029329,17.093985,17.031509,44.529205,...,20.929642,19.683987,28.758792,0.000000,17.180330,19.142693,32.480169,14.090676,23.387686,36.069569
2000,4,5.420317,9.949109,16.833495,2.214421,15.640733,-0.757291,4.268903,7.045339,10.898168,24.696177,...,11.594049,6.628335,14.267026,0.000000,7.118425,9.849835,18.433704,5.367999,6.068384,21.091991
2001,1,5.425082,9.397958,16.101879,2.067485,15.773734,-0.767993,3.662832,6.984535,10.601983,26.265183,...,11.780025,5.905304,15.631068,0.000000,6.947157,8.531303,19.139223,5.106072,6.012362,20.029150
2001,2,32.445388,19.164030,20.311506,3.400954,30.655776,7.511990,2.941433,14.663898,21.082712,50.745355,...,23.286014,5.781357,21.702638,0.000000,15.625334,14.388864,20.644381,11.624610,11.548501,36.010432
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019,4,55.504284,64.928326,29.991791,9.725006,48.476079,143.035403,113.079633,83.693537,18.302389,33.341361,...,56.221647,179.527864,100.977190,91.525689,101.848691,20.491856,27.763703,10.701228,19.435516,63.492594
2020,1,75.447263,91.069692,43.242339,20.530826,69.997857,208.131795,174.648288,144.598968,31.344137,119.329159,...,82.410630,269.750507,137.916612,119.436579,133.476820,28.212436,48.156367,16.046148,26.689540,101.936071
2020,2,96.587563,113.876539,51.893573,20.904650,77.705658,235.360942,191.938093,134.377770,30.951326,110.413514,...,105.746442,336.574753,143.879705,138.814944,264.662237,34.764301,49.634442,16.076070,24.620587,105.721097
2020,3,5.425099,15.393829,-46.048617,2.742255,-0.042462,18.856786,15.663129,4.692017,3.829467,-13.555367,...,28.308544,104.600824,43.436494,27.167240,16.900303,1.279970,8.871469,5.548792,-142.318740,14.656557


In [5]:
volumes = pd.read_csv('D:\\Repositories\\cicc\\Small Cap\\data\\volume.csv', parse_dates=['date'], index_col='date')

In [6]:
month_begin_index = [day+relativedelta(days=1) for day in pd.date_range(start='20090101',end='20201231',freq='M',closed='left')]

In [7]:
monthly_volume = volumes.groupby(by=[volumes.index.year, volumes.index.month]).sum()

In [8]:
monthly_volume[['000001.SZ','000002.SZ']]

Unnamed: 0_level_0,Unnamed: 1_level_0,000001.SZ,000002.SZ
date,date,Unnamed: 2_level_1,Unnamed: 3_level_1
2008,12,219750,541250
2009,1,10456340,9653950
2009,2,15504150,38998450
2009,3,17616620,33098510
2009,4,12859220,32849550
...,...,...,...
2020,8,45163120,48345200
2020,9,32926200,44867300
2020,10,35899400,29616800
2020,11,37790100,46511800


In [9]:
industry_volume = pd.DataFrame(index=month_begin_index, columns=earnings.columns)

In [10]:
df_comp = pd.read_csv('D:\Repositories\cicc\Industry Momentum + CAPE\data\WIND_II_index_comp.csv', parse_dates=['included','excluded'], dtype={'code':str})
df_mktcap = pd.read_csv('D:\Repositories\cicc\Industry Momentum + CAPE\data\market_cap.csv', parse_dates=['date'], index_col='date')

In [11]:
df_comp['excluded'].fillna(pd.Timestamp('20991231'), inplace=True)

In [15]:
monthly_volume.columns

Index(['000001.SZ', '000002.SZ', '000003.SZ', '000004.SZ', '000005.SZ',
       '000006.SZ', '000007.SZ', '000008.SZ', '000009.SZ', '000010.SZ',
       ...
       '688668.SH', '688678.SH', '688679.SH', '688686.SH', '688698.SH',
       '688699.SH', '688777.SH', '688788.SH', '688981.SH', 'T00018.SH'],
      dtype='object', length=4270)

In [64]:
for c in tqdm(industry_volume.columns):
    # filter industry
    industry = df_comp.query(f"industry=='{c}'")
    for i in industry_volume.index:
        include = str((i-relativedelta(months=1)).date()).replace('-','')
        exclude = str(i.date()).replace('-','')
        # select cross-section
        industry = industry.query(f"included <= {include} & excluded > {exclude}")
        comp = list(industry['ticker'])
        comp = [stock for stock in comp if stock in monthly_volume.columns]
        # get cap weights...?
        # calculate volume
        vol = monthly_volume[comp]
        v = vol.loc[(i-relativedelta(months=1)).year, (i-relativedelta(months=1)).month]
        industry_volume.at[i, c] = v.sum()

100%|██████████| 24/24 [00:09<00:00,  2.48it/s]


In [30]:
industry_volume.to_csv('D:/Repositories/cicc/Industry Momentum + CAPE/data/monthly_industry_volume.csv')

In [65]:
industry_volume.tail(50)

Unnamed: 0,Energy,Materials,Capital Goods,Commercial & Professional Services,Transportation,Automobiles & Components,Consumer Durables & Apparel,Consumer Services,Media,Retailing,...,"Pharmaceuticals, Biotechnology & Life Sciences",Banks,Diversified Financials,Insurance,Real Estate,Software & Services,Technology Hardware & Equipment,Semiconductors & Semiconductor Equipment,Communication Services,Utilities
2016-11-01,109180000.0,626701000.0,580787000.0,11545100.0,130846000.0,136425000.0,129221000.0,48414900.0,48545000.0,103392000.0,...,158372000.0,78020500.0,102876000.0,21973858,226864000.0,90853500.0,270624000.0,31196800.0,22696610,119684000.0
2016-12-01,216905000.0,1366760000.0,1122490000.0,23779800.0,249907000.0,219491000.0,248521000.0,48823000.0,67739900.0,192199000.0,...,248508000.0,191337000.0,232728000.0,54286440,346378000.0,123350000.0,359871000.0,45388100.0,33991110,182085000.0
2017-01-01,133227000.0,900311000.0,900081000.0,17940400.0,185265000.0,169156000.0,280784000.0,43466800.0,36608700.0,166648000.0,...,184778000.0,179066000.0,116940000.0,46952910,275716000.0,90362600.0,247385000.0,27445200.0,73428100,149342000.0
2017-02-01,92740200.0,641280000.0,596235000.0,9014620.0,152402000.0,172898000.0,138868000.0,25881900.0,24879800.0,103214000.0,...,121504000.0,98411700.0,62854800.0,31008167,146476000.0,59593700.0,188884000.0,20296500.0,31399310,110533000.0
2017-03-01,107765000.0,873784000.0,664371000.0,20549700.0,187560000.0,164284000.0,157405000.0,25564600.0,32960500.0,100470000.0,...,155285000.0,121650000.0,94583200.0,45371778,0.0,70860200.0,180766000.0,20754900.0,21950930,113732000.0
2017-04-01,113839000.0,979337000.0,867873000.0,24123300.0,255942000.0,186978000.0,223745000.0,39699900.0,78984600.0,97651700.0,...,213285000.0,142450000.0,110392000.0,56121750,0.0,122447000.0,313707000.0,40432400.0,46469150,150338000.0
2017-05-01,100410000.0,894324000.0,816185000.0,19303500.0,278306000.0,150875000.0,166623000.0,34540000.0,26172000.0,65452700.0,...,162792000.0,124159000.0,75641600.0,54255570,0.0,78022600.0,250911000.0,25878300.0,0,252798000.0
2017-06-01,101652000.0,621492000.0,609413000.0,9893640.0,159867000.0,96986900.0,149658000.0,19032200.0,24062800.0,57724500.0,...,134835000.0,182683000.0,73889100.0,94481650,0.0,82304200.0,220624000.0,23834900.0,0,245236000.0
2017-07-01,92967200.0,638438000.0,601248000.0,10161900.0,171529000.0,137104000.0,188572000.0,23852500.0,22439600.0,76551400.0,...,179220000.0,226546000.0,87194100.0,116837730,0.0,106502000.0,263671000.0,35129400.0,0,173907000.0
2017-08-01,174028000.0,1382900000.0,704504000.0,11202600.0,207425000.0,143485000.0,178243000.0,23748900.0,24642100.0,86835300.0,...,173000000.0,287041000.0,128205000.0,118369230,0.0,150316000.0,340175000.0,39912900.0,0,124763000.0


In [63]:
df_comp.query("industry=='Real Estate'")

Unnamed: 0,ticker,code,industry,included,excluded
1,000002.SZ,624040,Real Estate,1991-01-29,2017-03-01
2,000002.SZ,626010,Real Estate,2017-03-01,2099-12-31
6,000005.SZ,624040,Real Estate,1990-12-10,2015-12-04
8,000006.SZ,624040,Real Estate,1992-04-27,2017-03-01
9,000006.SZ,626010,Real Estate,2017-03-01,2099-12-31
...,...,...,...,...,...
5277,603506.SH,626010,Real Estate,2017-03-01,2099-12-31
5372,603682.SH,624040,Real Estate,NaT,2017-03-01
5373,603682.SH,626010,Real Estate,2017-03-01,2017-10-30
6649,A20782.SH,624040,Real Estate,NaT,2017-03-01


In [62]:
df_comp['included'].replace(pd.Timestamp('20170302'),pd.Timestamp('20170301'),inplace=True)

In [43]:
fig_vol = make_subplots(rows=5, cols=5, subplot_titles=list(earnings.columns))

for i in range(0,5):
    for j in range(0,5):
        idx = i*5 + j

        if idx == 24:
            break

        industry = industry_index.columns[idx]
        industry_vol = industry_volume[industry]
        industry_prices = industry_index[industry].loc['20090201':'20201231']

        fig_vol.append_trace(go.Scatter(y=industry_vol/industry_vol.values[0], 
                                       line={"dash":"solid","color":"blue"}, 
                                       text=f"{industry}",
                                       textposition='top left'),
                                       row=i+1, col=j+1)

        # fig_vol.append_trace(go.Scatter(y=industry_prices, 
        #                                line={"dash":"solid","color":"red"}, 
        #                                text=f"{industry}",
        #                                textposition='top left'),
        #                                row=i+1, col=j+1)

fig_vol.update_layout(height=1000, width=1200, showlegend=False, title_text='Industry Price Volume')
fig_vol.show()

In [66]:
px.line(industry_volume['Energy'])

In [69]:
px.line(industry_index['Energy'].loc['20090201':'20201231'])