In [8]:
import pandas as pd
import numpy as np
import esig  # 簽名特徵計算
from sklearn.metrics.pairwise import rbf_kernel
from sklearn.preprocessing import StandardScaler

# 讀取 CSV
file_path = "../1.資料前處理/merged_stocks_sorted.csv"
df = pd.read_csv(file_path)

# 轉換日期格式
df['日期'] = pd.to_datetime(df['日期'])

# 處理數據類型（去除千分位並轉為 float）
for col in ['成交股數', '成交金額', '成交筆數']:
    df[col] = df[col].astype(str).str.replace(',', '').astype(float)

# 選擇數值特徵
features = ['開盤價', '收盤價', '最高價', '最低價', '成交股數', 'MACD', 'RSI']
df_features = df[features]

# **計算簽名特徵**
path = df_features.diff().dropna().values  # 計算變化量
sig_transform = esig.stream2sig(path, 2)  # 計算簽名特徵，order=2
print("Signature features computed.")

# ---------------------
# **計算 MMD**
# ---------------------

# 分割兩個不同時間段
df1 = df[df['日期'] < '2016-01-01']
df2 = df[df['日期'] >= '2016-01-01']

# 確保兩組數據長度一致
min_len = min(len(df1), len(df2))
df1 = df1.iloc[:min_len]
df2 = df2.iloc[:min_len]

# 取出數值特徵
X1 = df1[features].dropna().values
X2 = df2[features].dropna().values

# **標準化數據**
scaler = StandardScaler()
X1 = scaler.fit_transform(X1)
X2 = scaler.transform(X2)

# 計算 RBF Kernel
gamma = 1.0 / X1.shape[1]  # 調整 gamma 參數
K_XX = rbf_kernel(X1, X1, gamma=gamma)
K_YY = rbf_kernel(X2, X2, gamma=gamma)
K_XY = rbf_kernel(X1, X2, gamma=gamma)

# 計算 MMD
mmd = np.mean(K_XX) + np.mean(K_YY) - 2 * np.mean(K_XY)
print(f"MMD between 2010-2015 and 2016-2023: {mmd:.6f}")


Signature features computed.
MMD between 2010-2015 and 2016-2023: 0.051860


In [6]:
import esig

sig_transform = esig.stream2sig(path, 2)
print(sig_transform)


[ 1.00000000e+00  3.82700000e+02  3.92000000e+02  3.93100000e+02
  3.76700000e+02 -4.54076950e+07  1.98694344e+01  1.51532080e+00
  7.32296450e+04 -6.68333705e+05  5.30555701e+06 -5.50129112e+06
 -2.92308799e+13 -1.83342951e+06 -4.49765520e+06  8.18352105e+05
  7.68320000e+04  6.04867046e+06 -4.76602350e+06 -2.92176331e+13
 -1.82591665e+06 -4.51358076e+06 -5.15511765e+06 -5.89457527e+06
  7.72638050e+04 -1.07259676e+07 -2.93107525e+13 -1.90216801e+06
 -4.58462284e+06  5.64545421e+06  4.91368990e+06  1.08740484e+07
  7.09514450e+04 -2.91085524e+13 -1.75968212e+06 -4.42191969e+06
  2.92135024e+13  2.91998333e+13  2.92929027e+13  2.90914473e+13
  1.03092938e+15  1.94789186e+11  5.43044297e+11  1.84103354e+06
  1.83370547e+06  1.90997868e+06  1.76716694e+06 -1.95691411e+11
  1.97397212e+02 -1.05820234e+05  4.49823512e+06  4.51417476e+06
  4.58521851e+06  4.42249051e+06 -5.43113104e+11  1.05850342e+05
  1.14809856e+00]


In [5]:
import esig
print(dir(esig))




In [3]:
!pip show esig

Name: esig
Version: 1.0.0
Summary: This package provides "rough path" tools for analysing vector time series.
Home-page: 
Author: 
Author-email: Terry Lyons <software@lyonstech.net>
License: 
Location: /home/r11011101/anaconda3/envs/myenv/lib/python3.10/site-packages
Requires: numpy, pyrecombine, roughpy
Required-by: 
