# 数据分析

In [4]:
import talib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [2]:
def fisher_transform(series: pd.Series, period: int = 10) -> pd.Series:
    """计算费舍尔转换指标"""
    highest = series.rolling(period, min_periods=1).max()
    lowest = series.rolling(period, min_periods=1).min()
    values = np.zeros(len(series))
    fishers = np.zeros(len(series))

    for i in range(1, len(series)):
        values[i] = (
            0.66
            * (
                (series.iloc[i] - lowest.iloc[i]) / (highest.iloc[i] - lowest.iloc[i])
                - 0.5
            )
            + 0.67 * values[i - 1]
        )
        values[i] = max(min(values[i], 0.999), -0.999)
        fishers[i] = (
            0.5 * np.log((1 + values[i]) / (1 - values[i])) + 0.5 * fishers[i - 1]
        )

    return pd.Series(fishers, index=series.index)


def normalize(
    series: pd.Series, period: int = 200, method: str = "zscore"
) -> pd.Series:
    """将时间序列标准化

    Args:
        series: pd.series, 时间序列
        period: int, 回溯窗口
        method: str, 标准化方法，'zscore' or 'ft'

    Returns:
        pd.series，包含标准化数据的时间序列
    """
    if method == "zscore":
        rolling_mean = series.rolling(period).mean()
        rolling_sd = series.rolling(period).std()
        return (series - rolling_mean) / rolling_sd
    elif method == "ft":
        return fisher_transform(series, period)
    else:
        raise ValueError(f"Invalid method '{method}'")


def find_trend_periods(series: pd.Series) -> list:
    """找到连续的1的开始时间和结束时间"""
    periods = []
    start = None

    for i in range(len(series)):
        if series.iloc[i] == 1 and start is None:
            start = series.index[i]
        elif series.iloc[i] == 0 and start is not None:
            end = series.index[i - 1]
            periods.append((start, end))
            start = None

    if start is not None:
        end = series.index[-1]
        periods.append((start, end))

    return periods

## STH Realized Price

In [15]:
# 读取 btcusd 价格
ohlcv = pd.read_csv("./data/btcusd.csv", index_col="datetime", parse_dates=True)

# 读取 sth realized price
metric = pd.read_csv(
    "./data/sth_realized_price.csv", index_col="datetime", parse_dates=True
)

# 合并和清洗数据
df = (
    pd.concat([ohlcv["close"], metric], axis=1, join="outer")
    .rename(columns={"close": "price"})
    .dropna()
)
df

Unnamed: 0_level_0,price,sth_realized_price
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2014-09-17,457.334015,548.908069
2014-09-18,424.440002,545.791160
2014-09-19,394.795990,542.147384
2014-09-20,408.903992,539.970448
2014-09-21,398.821014,538.805193
...,...,...
2025-03-26,86900.882812,93217.144967
2025-03-27,87177.101562,93239.685609
2025-03-28,84353.148438,93125.100057
2025-03-29,82597.585938,93612.585069


In [34]:
# 参数
period = 10  # 标准化指标的窗口
threshold = 2.0  # 生成信号的阈值

# 计算价格偏离实现价格的距离，并进行标准化
data = df.copy()
data["diff"] = data["price"] - data["sth_realized_price"]
data["normalized_diff"] = normalize(data["diff"], period, method="ft")
data.dropna(inplace=True)

peak_periods = find_trend_periods(data["normalized_diff"] >= threshold)
valley_periods = find_trend_periods(data["normalized_diff"] <= -threshold)

data

Unnamed: 0_level_0,price,sth_realized_price,diff,normalized_diff
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2014-09-17,457.334015,548.908069,-91.574054,0.000000
2014-09-18,424.440002,545.791160,-121.351157,-0.342828
2014-09-19,394.795990,542.147384,-147.351394,-0.791374
2014-09-20,408.903992,539.970448,-131.066456,-0.953753
2014-09-21,398.821014,538.805193,-139.984179,-1.142673
...,...,...,...,...
2025-03-26,86900.882812,93217.144967,-6316.262154,0.948100
2025-03-27,87177.101562,93239.685609,-6062.584046,1.227384
2025-03-28,84353.148438,93125.100057,-8771.951619,0.779494
2025-03-29,82597.585938,93612.585069,-11014.999131,0.166175


In [37]:
# 创建图表，观察价格和指标的关系
fig = make_subplots(
    rows=3,
    cols=1,
    shared_xaxes=True,
    vertical_spacing=0.05,
    subplot_titles=(
        "<b>Bitcoin Price(USD)</b>",
        "<b>STH Realized Price Diff</b>",
        "<b>Normalized Indicator</b>",
    ),
)

# 比特币价格
fig.add_trace(
    go.Scatter(x=data.index, y=data["price"], name="Bitcoin price"), row=1, col=1
)

# 添加极值区域背景
for x0, x1 in peak_periods:
    fig.add_vrect(
        x0=x0,
        x1=x1,
        fillcolor="#FF6B6B",
        opacity=0.2,
        line_width=0,
        row=1,
        col=1,
    )

for x0, x1 in valley_periods:
    fig.add_vrect(
        x0=x0,
        x1=x1,
        fillcolor="#38A169",
        opacity=0.2,
        line_width=0,
        row=1,
        col=1,
    )

# 原始指标
fig.add_trace(
    go.Scatter(x=data.index, y=data["sth_realized_price"], name="STH Realized price"),
    row=1,
    col=1,
)

# 价格偏离实现价格的偏差
fig.add_trace(
    go.Scatter(x=data.index, y=data["diff"], fill="tozeroy", name="Deviation"),
    row=2,
    col=1,
)

# 标准化指标
fig.add_trace(
    go.Scatter(x=data.index, y=data["normalized_diff"], name="Normalized Deviation"),
    row=3,
    col=1,
)
for level in [-2, 2]:
    fig.add_hline(
        y=level, row=3, col=1, line_dash="dash", line_color="grey", line_width=0.8
    )

# 更新图表
fig.update_layout(
    title="STH Realized Price",
    width=1000,
    height=1000,
    template="plotly_white",
    showlegend=False,
)

fig.show()

## STH SOPR

In [3]:
# 读取 btcusd 价格
ohlcv = pd.read_csv("./data/btcusd.csv", index_col="datetime", parse_dates=True)

# 读取 sth sopr
metric = pd.read_csv("./data/sth_sopr.csv", index_col="datetime", parse_dates=True)

# 合并和清洗数据
df = (
    pd.concat([ohlcv["close"], metric], axis=1, join="outer")
    .rename(columns={"close": "price"})
    .dropna()
)
df

Unnamed: 0_level_0,price,sth_sopr
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2014-09-17,457.334015,0.979258
2014-09-18,424.440002,0.917006
2014-09-19,394.795990,0.932153
2014-09-20,408.903992,0.918946
2014-09-21,398.821014,0.940105
...,...,...
2025-03-25,87471.703125,1.000149
2025-03-26,86900.882812,0.997364
2025-03-27,87177.101562,0.997759
2025-03-28,84353.148438,0.993669


In [21]:
# # 参数
# period = 10  # 标准化指标的窗口
# threshold = 4.0  # 生成信号的阈值

# # 计算价格偏离实现价格的距离，并进行标准化
# data = df.copy()
# data["smooth_sopr"] = data["sth_sopr"].rolling(10, min_periods=1).mean()
# data["normalized_sopr"] = normalize(data["smooth_sopr"], period, method="ft")
# data.dropna(inplace=True)

# peak_periods = find_trend_periods(data["normalized_sopr"] >= threshold)
# valley_periods = find_trend_periods(data["normalized_sopr"] <= -threshold)

# 参数
period = 200
upper_factor = 2.0
lower_factor = 1.5

# 将布林带应用到指标
data = df.copy()
bband_upper, _, bband_lower = talib.BBANDS(
    data["sth_sopr"], period, upper_factor, lower_factor
)
data["upper_band"] = bband_upper
data["lower_band"] = bband_lower
data.dropna(inplace=True)

peak_periods = find_trend_periods(data["sth_sopr"] > data["upper_band"])
valley_periods = find_trend_periods(data["sth_sopr"] < data["lower_band"])

data

Unnamed: 0_level_0,price,sth_sopr,upper_band,lower_band
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015-04-04,253.697006,1.001629,1.051142,0.897621
2015-04-05,260.597992,1.003521,1.051418,0.897626
2015-04-06,255.492004,1.000321,1.051738,0.898115
2015-04-07,253.179993,1.009242,1.052238,0.898414
2015-04-08,245.022003,0.995501,1.052494,0.898893
...,...,...,...,...
2025-03-25,87471.703125,1.000149,1.030789,0.983107
2025-03-26,86900.882812,0.997364,1.030571,0.983489
2025-03-27,87177.101562,0.997759,1.030090,0.984169
2025-03-28,84353.148438,0.993669,1.029647,0.984766


In [31]:
# 创建图表，观察价格和指标的关系
fig = make_subplots(
    rows=2,
    cols=1,
    shared_xaxes=True,
    vertical_spacing=0.05,
    subplot_titles=(
        "<b>Bitcoin Price(USD)</b>",
        "<b>STH SOPR</b>",
    ),
)

# 比特币价格
fig.add_trace(
    go.Scatter(x=data.index, y=data["price"], name="Bitcoin price"), row=1, col=1
)

# 添加极值区域背景
for x0, x1 in peak_periods:
    line_width = 0 if x0 < x1 else 1.2
    fig.add_vrect(
        x0=x0,
        x1=x1,
        fillcolor="#FF6B6B",
        opacity=0.4,
        line_width=line_width,
        row=1,
        col=1,
    )

for x0, x1 in valley_periods:
    line_width = 0 if x0 < x1 else 1.2
    fig.add_vrect(
        x0=x0,
        x1=x1,
        fillcolor="#38A169",
        opacity=0.4,
        line_width=line_width,
        row=1,
        col=1,
    )

# 指标
fig.add_trace(
    go.Scatter(x=data.index, y=data["sth_sopr"], name="STH SOPR"),
    row=2,
    col=1,
)

# 布林带通道
fig.add_trace(
    go.Scatter(x=data.index, y=data["upper_band"], name="Upper band"),
    row=2,
    col=1,
)
fig.add_trace(
    go.Scatter(x=data.index, y=data["lower_band"], name="Lower band"),
    row=2,
    col=1,
)

# 更新图表
fig.update_layout(
    title="STH SOPR",
    width=1000,
    height=800,
    template="plotly_white",
    showlegend=False,
)

fig.show()