In [None]:
import numpy as np
import pandas as pd
import ast
from dtaidistance import dtw
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score

# 读取变长切片数据
df = pd.read_csv('SIPR/res/sipr_hs300_k12_l4-8_dba_kmpp_subsequences.csv', index_col=0)
var_length_sequences = [np.array(ast.literal_eval(x)) for x in df['0']]

# 重构原始时间序列（将所有变长切片按顺序拼接）
original_series = np.concatenate(var_length_sequences)

# 提取固定长度切片（这里选择长度5作为示例）
def extract_fixed_length_subsequences(series, length=8):
    """从时间序列中提取固定长度的子序列"""
    return [series[i:i+length] for i in range(len(series) - length + 1)]

fixed_length_sequences = extract_fixed_length_subsequences(original_series, length=5)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# 读取CSV文件
csv_file = 'SIPR/res/sipr_hs300_k12_l8-16_dba_kmpp_subsequences.csv'
data = pd.read_csv(csv_file, header=None, skiprows=1)

# 只取前10条子序列
data = data.iloc[:10]

# 合并子序列
full_sequence = []
for seq in data[1]:
    # 去掉字符串格式的中括号并分割成浮点数
    numbers = list(map(float, seq.strip('[]').split()))
    full_sequence.extend(numbers)

# 假设 full_sequence 就是原始完整序列
step_size = 8
split_sequences = [full_sequence[i:i + step_size] for i in range(0, len(full_sequence), step_size)]

# 去掉空的子序列
split_sequences = [seq for seq in split_sequences if len(seq) == step_size]

plt.figure(figsize=(24, 6))  # 修改 figure 大小以适应并列的子图

# 设置全局字体属性
plt.rcParams.update({
    'font.size': 14,
    'font.weight': 'bold'
})

# 绘制从CSV文件读取的子序列
plt.subplot(1, 2, 1)
start = 0
for seq in data[1]:
    # 去掉字符串格式的中括号并分割成浮点数
    numbers = list(map(float, seq.strip('[]').split()))
    end = start + len(numbers)
    plt.plot(range(start, end), numbers, label=f'Part {start}-{end}')
    start = end  # 更新起始位置
plt.title('Split Sequences with Varying Step', fontsize=16, fontweight='bold')
plt.xlabel('Index', fontsize=14, fontweight='bold')
plt.ylabel('Value', fontsize=14, fontweight='bold')
plt.legend()

plt.subplot(1, 2, 2)
start = 0
for seq in split_sequences:
    end = start + len(seq)
    plt.plot(range(start, end), seq, label=f'Part {start}-{end}')
    start = end  # 更新起始位置
plt.title('Split Sequences with Fixed Step Size of 8', fontsize=16, fontweight='bold')
plt.xlabel('Index', fontsize=14, fontweight='bold')
plt.ylabel('Value', fontsize=14, fontweight='bold')
plt.legend()

plt.tight_layout()
plt.show()
