In [2]:
import pandas as pd
from xbbg import blp
import numpy as np

In [25]:
universe = pd.read_csv("impax_universe_all.csv", header=[0], index_col=[0], low_memory=False)

In [3]:
financial_data = pd.read_csv("historical_financial_data.csv", header=0, index_col=0)

In [26]:
# market cap > 500m
universe = universe.query("`CUR_MKT_CAP_USD`>1e3")
figi_list = universe.ID_BB_GLOBAL

In [27]:
training_desc = universe.filter(["SECURITY_NAME", "PARSEKYABLE_DES_SOURCE", 
                "GICS_SUB_INDUSTRY", "ID_ISIN", "UD_ECONOMIC_CORRELATION", 
                "UD_ALGO_RATING", "ID_BB_GLOBAL"])

In [28]:
training_desc

Unnamed: 0,SECURITY_NAME,PARSEKYABLE_DES_SOURCE,GICS_SUB_INDUSTRY,ID_ISIN,UD_ECONOMIC_CORRELATION,UD_ALGO_RATING,ID_BB_GLOBAL
1,ITOCHU Corp,8001 JP Equity,20107010,JP3143600009,Cyclical,2B,BBG000B9WJ55
5,Enerpac Tool Group Corp,ATU US Equity,20106020,US2927651040,Cyclical,3,BBG000B9WX45
8,Tatneft PJSC,ATAD LI Equity,10102020,US8766292051,Commodity,7+,BBG000B9X7K3
9,Ameren Corp,AEE US Equity,55103010,US0236081024,Defensive,2A,BBG000B9X8C0
10,Woodside Petroleum Ltd,WPL AU Equity,10102020,AU000000WPL2,Commodity,3+,BBG000B9XBS6
...,...,...,...,...,...,...,...
22510,Apogee Therapeutics Inc,APGE US Equity,35201010,US03770N1019,Defensive,,BBG01H51WYQ5
22511,BGC Group Inc,BGC US Equity,40203020,US0889291045,Cyclical,,BBG01H9FTGX5
22512,Atlanta Braves Holdings Inc,BATRA US Equity,50202010,US0477261046,Cyclical,,BBG01HCDRG86
22513,Atlanta Braves Holdings Inc,BATRK US Equity,50202010,US0477263026,Cyclical,,BBG01HCX3Y34


In [205]:
bbg_fields = dict(
    return_fields = {"return_com_eqy", "normalized_roe", "operating_roic", "return_on_asset"},
    margin_fields = {"ebitda_margin", "gross_margin", "ebit_margin", "eff_tax_rate",
            "fcf_margin_after_oper_lea_pymt"},
    is_fields = {"sales_rev_turn", "net_income", "is_rd_expend", 
                 "ardr_selling_general_admin_exp", 
            "is_selling_expenses", "is_opex_adjusted", "tot_int_exp"
            "cf_cap_expend_prpty_add", "cf_cash_from_oper"},
    leverage_fields = {"total_debt_to_tot_eqy", "net_debt_to_shrhldr_eqty", 
            "net_debt_to_ebitda", "fixed_charge_coverage_ratio"},
    bs_ratios = {"invent_days", "acct_rcv_days", "days_accounts_payable", 
            "cash_conversion_cycle", },
    est_fields = {"best_sales", "best_gross_margin", "best_net_income"},
    best_overrides = [{"best_fperiod_override": "1FY"}, {"best_fperiod_override": "2FY"}, {"best_fperiod_override": "3FY"}]
    )

In [206]:
import itertools
default_columns = pd.MultiIndex.from_tuples(
    (itertools.chain(*[[(k, i) for i in v] for k, v in bbg_fields.items() 
                       if k not in ("est_fields", "default_override", "best_overrides")])))

In [287]:
from typing import Collection, Sequence, Literal, Dict
import datetime as dt


TODAY = dt.datetime.today().date()

def get_hist_financials(
        tickers: Collection[str], 
        start_date: dt.date=dt.date(1995, 1, 1), 
        end_date: dt.date=TODAY):
    hist_fields = set()
    for fld_name, fld in bbg_fields.items():
        if fld_name not in ("est_fields", "default_override", "best_overrides"):
            hist_fields = hist_fields.union(fld)
    hist_financials = blp.bdh(tickers, hist_fields, start_date=start_date, 
                    end_date=end_date, 
                    Per="Y",
                    # **bbg_fields.get("default_override")
                    )
    hist_financials.index = hist_financials.index.astype("datetime64[ns]").to_series().apply(lambda d: pd.Period(d, freq="Y"))
    hist_financials = hist_financials.rename_axis("year", axis=0).rename_axis(["figi", "field"], axis=1)
    hist_financials = hist_financials.reset_index().groupby("year").mean().stack()\
        .unstack(0).reindex(default_columns.get_level_values(1))
    hist_financials.index = default_columns[default_columns.get_level_values(1).isin(hist_financials.index)]

    # calculated ratios fields
    temp_ratios = hist_financials.loc['is_fields'].drop(['sales_rev_turn'], axis=0) / hist_financials.loc['is_fields'].loc['sales_rev_turn']
    temp_ratios.index = pd.MultiIndex.from_product((["margins"], temp_ratios.index.to_series().apply(lambda x: f"{x}_to_sales").values))
    
    # calculated growth fields
    temp_growth = hist_financials.loc['is_fields'].loc[["sales_rev_turn", "net_income"]]
    temp_growth = temp_growth.stack(1).unstack(0).pct_change(periods=1).stack(1).unstack(0)
    temp_growth.index = pd.MultiIndex.from_product((["growth"], temp_growth.index.to_series().apply(lambda x: f"{x}_growth").values))

    res = pd.concat([hist_financials.drop("is_fields", axis=0), temp_growth])
    return res

# def get_estimates(
#         tickers,
#         start_date: dt.date=dt.date(1995, 1, 1), 
#         end_date: dt.date=TODAY):


def get_price_multiples(
        tickers: Collection[str], 
        start_date: dt.date=dt.date(2000, 1, 1), 
        end_date: dt.date=TODAY):
    df = blp.bdh(tickers, ["px_last", "best_cur_ev_to_ebitda", 
                "fcf_yield_with_cur_entp_val", "best_pe_next_ear",
                "px_to_book_ratio", "px_to_sales_ratio"],
                start_date=start_date,
                end_date=end_date,
                Per="W", 
                )
    df.index = df.index.astype("datetime64[ns]").to_period(freq="W")
    df = df.ffill()
    return df
    
def get_future_returns(ref_date: dt.date, price_df: pd.DataFrame):
    df = price_df.copy(deep=True)
    df.index = df.index.astype("datetime64[ns]")
    base_date_ind = df.index.to_series().lt(np.datetime64(ref_date)).sum()
    _3m_ind = df.index.to_series().lt(np.datetime64(ref_date + dt.timedelta(days=90))).sum()
    _6m_ind = df.index.to_series().lt(np.datetime64(ref_date + dt.timedelta(days=180))).sum()
    _1yr_ind = df.index.to_series().lt(np.datetime64(ref_date + dt.timedelta(days=365))).sum()
    _3yr_ind = df.index.to_series().lt(np.datetime64(ref_date + dt.timedelta(days=365 * 3))).sum()
    base_price = df.iloc[base_date_ind]
    _3m_returns= (df.iloc[ _3m_ind - 5 : _3m_ind + 5] / base_price).mean()
    # _3m_returns_std = (df.iloc[ _3m_ind - 5 : _3m_ind + 5] / base_price).std()
    _6m_returns = (df.iloc[ _6m_ind - 10 : _6m_ind + 10] / base_price).mean()
    _1yr_returns = (df.iloc[ _1yr_ind - 20 : _1yr_ind + 20] / base_price).mean()
    _3yr_returns = (df.iloc[ _3yr_ind - 60 : _3yr_ind + 60] / base_price).mean()
    res = pd.concat([_3m_returns, _6m_returns, _1yr_returns, _3yr_returns], axis=1)
    res.columns = ["3m", "6m", "1yr", "3yr"]
    return res


In [243]:
hist_financial_data = get_hist_financials(training_desc.ID_BB_GLOBAL)
hist_financial_data.to_csv("historical_financial_data.csv")

  hist_financials = hist_financials.reset_index().groupby("year").mean().stack()\
  res = pd.concat([hist_financials.drop("is_fields", axis=0), temp_growth])


In [288]:
px = get_price_multiples(training_desc.ID_BB_GLOBAL)

In [290]:
px.to_csv("price_multiples.csv")

In [282]:
get_future_returns(ref_date=dt.date(2020, 1, 1,), price_df=px.swaplevel(0, 1, axis=1).T.sort_index().T["px_last"])

Unnamed: 0,3m,6m,1yr,3yr
BBG000B9WJ55,0.860051,0.932058,1.185313,1.710271
BBG000B9WX45,0.675251,0.706862,0.878951,0.895932
BBG000B9X7K3,0.603979,0.607167,0.547157,0.102561
BBG000B9X8C0,0.983623,0.988006,1.034759,1.132035
BBG000B9XBS6,0.610354,0.605076,0.637324,0.939246


In [247]:
hist_financial_data.T.describe()

Unnamed: 0_level_0,return_fields,return_fields,return_fields,return_fields,margin_fields,margin_fields,margin_fields,margin_fields,margin_fields,leverage_fields,leverage_fields,leverage_fields,leverage_fields,bs_ratios,bs_ratios,bs_ratios,bs_ratios,growth,growth
Unnamed: 0_level_1,operating_roic,normalized_roe,return_on_asset,return_com_eqy,ebit_margin,fcf_margin_after_oper_lea_pymt,gross_margin,eff_tax_rate,ebitda_margin,net_debt_to_shrhldr_eqty,fixed_charge_coverage_ratio,total_debt_to_tot_eqy,net_debt_to_ebitda,acct_rcv_days,cash_conversion_cycle,invent_days,days_accounts_payable,net_income_growth,sales_rev_turn_growth
count,170431.0,165938.0,172051.0,166963.0,154535.0,158876.0,125566.0,150609.0,154815.0,175617.0,115641.0,0.0,143948.0,142833.0,117540.0,123848.0,0.0,180437.0,179149.0
mean,9.530262,32.40306,4.19689,12.034905,20367.24,-501.4898,35.244851,32.785736,-242.7814,65.88647,3370.511,,2.201944,107.0416,-724.1481,227.4354,,,inf
std,102.63404,8672.205,41.079256,33.07452,8105591.0,46063.64,77.797456,404.642361,19392.79,3198.851,935098.6,,216.393965,7508.852,236442.5,12002.44,,,
min,-29974.3568,-191671.2,-9486.1953,-2463.47,-3073570.0,-8156138.0,-23750.0,0.0,-2983840.0,-1202880.0,-18125.0,,-6164.08,-51.939,-80768400.0,-288660.2,,-inf,-46.13227
25%,4.13105,6.067925,1.2294,5.73765,4.7061,-2.02835,19.9931,16.5077,8.8453,-17.5361,2.9906,,-0.5231,30.2604,23.68703,35.8873,,-0.2480225,0.0
50%,9.1584,11.92835,4.2517,11.806,10.2518,4.27,32.0237,25.2581,16.0143,21.6211,6.1977,,1.0024,51.2821,67.88165,70.02835,,0.06356746,0.08182736
75%,16.39285,19.444,8.6102,19.45345,18.70665,11.81846,48.695725,33.7721,27.76095,74.5006,16.3362,,2.8613,76.5405,127.6559,122.3219,,0.3862939,0.2187474
max,4885.8349,3526470.0,10421.8905,2409.8624,3186375000.0,98301.1,168.8357,95150.0,89208.84,263063.0,317375600.0,,80254.8174,2624120.0,470835.7,2901009.0,,inf,inf


# 2. EDA

## 2.1 Factor analysis


In [1]:
import pandas as pd
import numpy as np
from azure.data.tables import TableServiceClient
from azure.core.credentials import AzureNamedKeyCredential


creds = AzureNamedKeyCredential("impaxhkstorage", "pDOWrrWDxp38C05/pzkPWtgFaBEMRQWHUtYWaxXD6zPdoHoy8wFIjv2fqoSg6Tm882lR6WgvKoSH+ASteXearQ==")
services = TableServiceClient(endpoint="https://impaxhkstorage.table.core.windows.net/",credential=creds)

In [5]:
from typing import List, Dict, Any, Hashable
from collections import OrderedDict

sample_entity = OrderedDict({
    "PartitionKey": "BBG000B9WJ55",
    "RowKey": 1995,
    ('return_fields', 'operating_roic'): 1.5405, 
    ('return_fields', 'normalized_roe'): 1.7776, 
    ('return_fields', 'return_on_asset'): 0.1258, 
    ('return_fields', 'return_com_eqy'): 1.7776, 
    ('margin_fields', 'ebit_margin'): 0.5304, 
    ('margin_fields', 'fcf_margin_after_oper_lea_pymt'): np.nan, 
    ('margin_fields', 'gross_margin'): 3.7416, 
    ('margin_fields', 'eff_tax_rate'): 78.6963, 
    ('margin_fields', 'ebitda_margin'): 0.7058, 
    ('leverage_fields', 'net_debt_to_shrhldr_eqty'): 784.5313, 
    ('leverage_fields', 'fixed_charge_coverage_ratio'): np.nan, 
    ('leverage_fields', 'total_debt_to_tot_eqy'): np.nan, 
    ('leverage_fields', 'net_debt_to_ebitda'): 38.9812, 
    ('bs_ratios', 'acct_rcv_days'): 48.8258, 
    ('bs_ratios', 'cash_conversion_cycle'): 34.0, 
    ('bs_ratios', 'invent_days'): 23.2395, 
    ('bs_ratios', 'days_accounts_payable'): np.nan, 
    ('growth', 'net_income_growth'): np.nan, 
    ('growth', 'sales_rev_turn_growth'): np.nan}
    )

test_input = pd.Series(sample_entity)
test_input.name = tuple(test_input[["RowKey", "PartitionKey"]])
test_input.drop(["RowKey", "PartitionKey"], inplace=True)

def create_entity(input: pd.Series) -> Dict[Hashable, Any]:
    input_dict = input.to_dict()
    if isinstance(input.name, tuple):
        input_dict.update(
            {}
        )

In [25]:
from typing import Sequence
isinstance((0, 1), Sequence)

True

(1995, 'BBG000B9WJ55')

In [13]:
sample_entity.move_to_end("RowKey", last=False)