In [1]:
import pandas as pd

In [2]:
# Data provider: Refinitv
def extract_constituent(index: str, target_date: str) -> pd.DataFrame:
    target_date = pd.to_datetime(target_date)

    adjustment = pd.read_csv(f"data/raw/Adjustment/{index}.csv")
    constituent = pd.read_csv(f"data/raw/Constituent/{index}.csv")

    adjustment["Date"] = pd.to_datetime(adjustment["Date"], format="mixed")
    adjustment = adjustment.sort_values("Date", ascending=False)

    # Populate the point-in-time constituent list
    for _, row in adjustment.iterrows():
        if row["Date"] <= target_date:
            break

        match row["Adjustment"]:
            case "+":
                constituent = constituent[constituent["ISIN"] != row["ISIN"]].reset_index(drop=True)

            case "-":
                constituent.loc[len(constituent), ["Company", "ISIN"]] = [row["Company"], row["ISIN"]]

    constituent.insert(0, "Date", target_date)

    return constituent

In [71]:
# Data provider: US Treasury Department
rf = pd.concat(
    [
        pd.read_csv(f"data/raw/Risk-free rate/{i}.csv", parse_dates=["Date"])
        for i in range(2020, 2024 + 1)
    ]
).set_index("Date").groupby(pd.Grouper(freq="ME")).tail(1).reset_index()

# Generate the universe by the selected representative indexes
for i in ["CSI300", "HSI", "SP500", "SXXP", "TOPIX"]:
    df = []

    for j in pd.date_range("2020-01-01", periods=60, freq="BME"):
        # BME to actual trading days hotfix#1
        if j == pd.Timestamp("2021-05-31"):
            j = pd.Timestamp("2021-05-28")

        # BME to actual trading days hotfix#2
        elif j == pd.Timestamp("2024-03-29"):
            j = pd.Timestamp("2024-03-28")

        df.append(extract_constituent(i, j))

    pd.concat(df).merge(rf, on="Date", how="left").to_csv(f"data/{i}.csv", index=False)

In [None]:
# Map the book-to-market ratio
# ...

In [209]:
# https://docfinder.bnpparibas-am.com/api/files/2ae286f3-f2f9-4b39-b416-473360e0c9a7/512
# In standard of BNP Paribas, the time horizon backtested is 5 years
#
# 1. Metrics
# 2. Skewness test
# 3. Monte-carlo simulation
# 4. EFficient frontier