In [1]:
# %%
import datetime

import numpy as np
import pandas as pd
import requests

# %%
etf50_df = pd.read_csv("data/ETF50.csv", dtype={"STOCK_ID": str})
etf50_id = etf50_df.loc[:, "STOCK_ID"]

for stock_index in etf50_id[:]:
    # 個股融資融劵表(TaiwanStockMarginPurchaseShortSale)
    url = "https://api.finmindtrade.com/api/v4/data"
    parameter = {
        "dataset": "TaiwanStockMarginPurchaseShortSale",
        "start_date": datetime.datetime(1990, 1, 1, 0, 0).strftime("%Y-%m-%d"),
        "end_date": datetime.datetime.today().strftime("%Y-%m-%d"),
        "data_id": stock_index,
    }

    data = requests.get(url, params=parameter)
    data = data.json()
    TSMPS_df = pd.DataFrame(data["data"])

    # TSMPS_df = TSMPS_df.drop(columns=["Note"])
    TSMPS_df.rename(
        {
            "MarginPurchaseTodayBalance": "MarginPurchaseBalance",
            "ShortSaleTodayBalance": "ShortSaleBalance",
        },
        axis=1,
        inplace=True,
    )

    TSMPS_df = TSMPS_df[["date", "MarginPurchaseBalance", "ShortSaleBalance"]]

    # 個股三大法人買賣表(TaiwanStockInstitutionalInvestorsBuySell)
    url = "https://api.finmindtrade.com/api/v4/data"
    parameter = {
        "dataset": "TaiwanStockInstitutionalInvestorsBuySell",
        "start_date": datetime.datetime(1990, 1, 1, 0, 0).strftime("%Y-%m-%d"),
        "end_date": datetime.datetime.today().strftime("%Y-%m-%d"),
        "data_id": stock_index,
    }

    data = requests.get(url, params=parameter)
    data = data.json()
    TSIBS_df = pd.DataFrame(data["data"])

    TSIBS_df = (
        TSIBS_df.pivot(
            index=["date", "stock_id"], columns="name", values=["buy", "sell"]
        )
        .stack(0)
        .unstack()
    )
    TSIBS_df.columns = TSIBS_df.columns.to_flat_index().map(
        lambda x: "_".join(x)
    )
    TSIBS_df = TSIBS_df.reset_index()

    if TSIBS_df.date[0] < "2016-01-01":
        mask = TSIBS_df.date > "2016-01-01"
        self_ratio = np.mean(
            [
                (
                    TSIBS_df[mask].Dealer_self_buy
                    / (
                        TSIBS_df[mask].Dealer_self_buy
                        + TSIBS_df[mask].Dealer_Hedging_buy
                    )
                ).mean(),
                (
                    TSIBS_df[mask].Dealer_self_sell
                    / (
                        TSIBS_df[mask].Dealer_self_sell
                        + TSIBS_df[mask].Dealer_Hedging_sell
                    )
                ).mean(),
            ]
        )

        TSIBS_df.Dealer_self_buy.fillna(
            TSIBS_df.Dealer_buy * self_ratio, inplace=True
        )
        TSIBS_df.Dealer_self_sell.fillna(
            TSIBS_df.Dealer_sell * self_ratio, inplace=True
        )
        TSIBS_df.Dealer_Hedging_buy.fillna(
            TSIBS_df.Dealer_buy * (1 - self_ratio), inplace=True
        )
        TSIBS_df.Dealer_Hedging_sell.fillna(
            TSIBS_df.Dealer_sell * (1 - self_ratio), inplace=True
        )

    # TSIBS_df = TSIBS_df.drop(
    #     columns=[
    #         "stock_id",
    #         "Dealer_buy",
    #         "Dealer_sell",
    #         "Foreign_Dealer_Self_buy",
    #         "Foreign_Dealer_Self_sell",
    #     ]
    # )

    TSIBS_df = TSIBS_df[
        [
            "date",
            "Dealer_Hedging_buy",
            "Dealer_Hedging_sell",
            "Dealer_self_buy",
            "Dealer_self_sell",
            "Foreign_Investor_buy",
            "Foreign_Investor_sell",
            "Investment_Trust_buy",
            "Investment_Trust_sell",
        ]
    ]

    # 外資持股表(TaiwanStockShareholding)
    url = "https://api.finmindtrade.com/api/v4/data"
    parameter = {
        "dataset": "TaiwanStockShareholding",
        "start_date": datetime.datetime(1990, 1, 1, 0, 0).strftime("%Y-%m-%d"),
        "end_date": datetime.datetime.today().strftime("%Y-%m-%d"),
        "data_id": stock_index,
    }

    data = requests.get(url, params=parameter)
    data = data.json()
    TSS_df = pd.DataFrame(data["data"])

    TSS_df = TSS_df[
        ["date", "ForeignInvestmentShares", "ForeignInvestmentSharesRatio"]
    ]

    A = TSMPS_df.set_index(["date"])
    B = TSIBS_df.set_index(["date"])
    C = TSS_df.set_index(["date"])

    BI_df = pd.concat([A, B, C], axis=1)
    BI_df.index.name = "Date"

    BI_df = BI_df.dropna()
    store_filename = f"./data/BI/{stock_index}.csv"

    BI_df.to_csv(
        store_filename,
        index=True,
        header=True,
    )

# %%


def load_BI(
    stock_index,
    start_year=1990,
    end_year=datetime.datetime.today().year,
):
    stock_df = pd.read_csv(f"data/BI/{stock_index}.csv", index_col="Date")

    mask = (
        stock_df.index
        >= datetime.datetime(start_year, 1, 1, 0, 0).strftime("%Y-%m-%d")
    ) & (
        stock_df.index
        <= datetime.datetime(end_year, 1, 1, 0, 0).strftime("%Y-%m-%d")
    )

    stock_df = stock_df[mask]

    return stock_df


load_BI(stock_index="0050")
# %%


OSError: Cannot save file into a non-existent directory: 'data/BI'