In [None]:
%load_ext autoreload
%autoreload 2

import logging

import pandas as pd
import vendors_lime.datastream_liquidity.universe_utils as vldlunut

import helpers.hdbg as hdbg
import helpers.hprint as hprint
import helpers.hsql as hsql

In [None]:
hdbg.init_logger(verbosity=logging.INFO)

_LOG = logging.getLogger(__name__)

# _LOG.info("%s", env.get_system_signature()[0])

hprint.config_notebook()

# Load liquidity files

In [None]:
connection = hsql.get_connection(
    "dp-research.redshift.eglp.com",
    "refdata",
    5439,
    "cf_dev_gsaggese",
    "tIELZucge1bT",
)

In [None]:
date = "2022-03-01"

# apply_categorical_filters = False
apply_categorical_filters = True
add_rankings = False
df = vldlunut.generate_liquidity_df(
    date,
    connection,
    apply_categorical_filters=apply_categorical_filters,
    add_rankings=add_rankings,
)

print("df=", df.shape)
display(df.head(3))

In [None]:
df.head()

In [None]:
print("region=", df["region"].unique())

print("trade_date=", df["trade_date"].unique())

print("num(asset_ids)=", df["asset_id"].nunique())

print("num(infocode)=", df["infocode"].nunique())

print("country=\n%s" % df["country"].value_counts())

print("is_major_sec=\n%s" % df["is_major_sec"].value_counts())

print("is_prim_qt=\n%s" % df["is_prim_qt"].value_counts())

print("sectype=\n%s" % df["sectype"].value_counts())

In [None]:
# mask = df["sectype"] == "NA:P "
# mask = df["sectype"] == "NA:F "
mask = df["is_prim_qt"] == False
df[mask]
print(df[mask]["ticker"])

In [None]:
mask = df["ticker"] == "SPY"
# mask = (df["ticker"] == "AAPL")
display(df[mask])
display(df[mask]["sectype"])

In [None]:
df.iloc[0]

In [None]:
df.columns.to_list()

In [None]:
df["spread_usd_21d"].hist(bins=101)

In [None]:
# col = "spread_bps_21d"
col = "spread_usd_21d"
df_val = df[df[col] <= 0.1]

# print(df_val)

df_val[col].hist(bins=101)

print(df_val[col].sum())

# df["spread_bps_21d"].hist(bins=101)

df["spread_bps_21d"].hist(bins=101)
# Generate liquidity plots

In [None]:
df[df["spread_bps_63d"] < 200]["spread_bps_63d"].hist(log=False, bins=101)

In [None]:
df[df["spread_usd_63d"] < 0.2]["spread_usd_63d"].hist(log=False, bins=101)

In [None]:
df["mkt_cap_usd_avg_90d"].hist(log=True, bins=100)

# Apply filters

In [None]:
filtered_df = vldlunut.apply_threshold_filters(df)

In [None]:
filtered_df.describe()

In [None]:
filtered_df[filtered_df["ticker"].isna()]

In [None]:
tickers = filtered_df["ticker"].dropna().to_list()

In [None]:
assert 0

# Get universe dataframe at datetime

In [None]:
universe = vldlunut.get_filtered_universe_dfs([date], connection)[0]

In [None]:
dates = pd.date_range(
    start="2017-01-01", end="2022-03-01", freq=pd.offsets.BMonthBegin()
)

In [None]:
universe_dfs = vldlunut.get_filtered_universe_dfs(dates, connection)

In [None]:
df = universe_dfs[10]

In [None]:
df.head()

In [None]:
df = vldlunut.combine_universe_dfs(universe_dfs)

In [None]:
df.iloc[0:10, 0:10]

In [None]:
asset_ids = df.columns.to_list()

In [None]:
len(asset_ids)

In [None]:
(df.sum(axis=0) / df.shape[0]).hist(bins=20)

In [None]:
df.sum(axis=1).plot(ylim=(0, None), title="Universe size count")

# Save universe

In [None]:
# Write union of EGIDs to a csv
assert 0
pd.Series(data=df.columns).to_csv("universe_20210810.csv", index=False)

# Read universe

In [None]:
import pandas as pd

In [None]:
universe = pd.read_csv("s3://eglp-spm-sasm/data/universe_20210810.csv")

In [None]:
universe

In [None]:
universe["asset_id"].tolist()