# 数据导入与方法定义

In [None]:
import os
import pandas as pd
from datetime import datetime
import panel as pn
import hvplot.pandas

# 初始化 Panel
pn.extension()

In [2]:

# 指定目录路径
dir_path = 'data/raw/sat3'

# 获取目录中的文件列表
file_list = os.listdir(dir_path)

# 加载目录中的CSV文件
csv_files = [f for f in file_list if f.endswith('.csv')]
dataframes = {}

for file in csv_files:
    file_path = os.path.join(dir_path, file)
    dataframes[file] = pd.read_csv(file_path)
    

# 处理每个CSV数据框，设置时间索引
for file, df in dataframes.items():
    # 添加列名（因为原始数据无表头）
    df.columns = ['timestamp', 'value']
    
    # 将时间戳列转换为datetime类型并设置为索引
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df.set_index('timestamp', inplace=True)

    # 删除重复值
    df = df.loc[~df.index.duplicated(keep='first')]
    
    # 更新dataframes字典
    dataframes[file] = df


# 原始数据分析

In [None]:
# 折线图
# 创建文件选择器控件
file_selector = pn.widgets.Select(
    name='选择数据文件',
    options=list(dataframes.keys()),
    value=list(dataframes.keys())[0]
)

# 创建交互式函数
@pn.depends(file=file_selector)
def plot_data(file):
    df = dataframes[file]
    plot = df.hvplot.line(
        downsample=True,
        height=500,
    )
    tabulator = pn.widgets.Tabulator(df, buttons={'Print': "<i class='fa fa-print'></i>"})
    return pn.Column(plot, tabulator)

# 创建交互式面板
dashboard = pn.Column(
    pn.pane.Markdown("## 数据可视化"),
    pn.Column(
        file_selector,
        plot_data
    )
)

# 显示面板
dashboard

In [None]:
time_diffs={}
# 对每个dataframe绘制时间间隔分布图
for file, df in dataframes.items():
    # 计算时间间隔（毫秒）
    time_diffs[file] = df.index.to_series().diff().dt.total_seconds().dropna().to_frame()
    # print(time_diffs[file].min())
# time_diffs

# 创建文件选择器控件
file_selector = pn.widgets.Select(
    name='选择数据文件',
    options=list(time_diffs.keys()),
    value=list(time_diffs.keys())[0]
)

# 创建交互式函数
@pn.depends(file=file_selector)
def plot_data(file):
    df = time_diffs[file]
    plot = df.hvplot.hist(
        bins=1000,
        logy=True,
        # yscale='log',
        height=500,
    )
    df.index.name = None
    df.reset_index(drop=False)
    tabulator = pn.widgets.Tabulator(df)
    return pn.Column(plot, tabulator)

# 创建交互式面板
dashboard = pn.Column(
    pn.pane.Markdown("## 数据可视化"),
    pn.Column(
        file_selector,
        plot_data
    )
)

# 显示面板
dashboard

In [None]:
   
value_diffs={}
# 对每个dataframe绘制数值间隔分布图
for file, df in dataframes.items():
    value_diffs[file] = df['value'].diff().dropna()

# 创建文件选择器控件
file_selector = pn.widgets.Select(
    name='选择数据文件',
    options=list(value_diffs.keys()),
    value=list(value_diffs.keys())[0]
)

# 创建交互式函数
@pn.depends(file=file_selector)
def plot_data(file):
    df = value_diffs[file]
    plot = df.hvplot.hist(
        bins=1000,
        height=500,
    )
    tabulator = pn.widgets.Tabulator(df, buttons={'Print': "<i class='fa fa-print'></i>"})
    return pn.Column(plot, tabulator)

# 创建交互式面板
dashboard = pn.Column(
    pn.pane.Markdown("## 数据可视化"),
    pn.Column(
        file_selector,
        plot_data
    )
)

# 显示面板
dashboard

# 数据清洗

# 数据增强

In [None]:
from pykalman import KalmanFilter
import numpy as np
import matplotlib.pyplot as plt

# 获取第一个DataFrame的副本
first_file = list(dataframes.keys())[0]
df_copy = dataframes[first_file].copy()

# 提取value列数据作为观测值
observations = df_copy['value'].values

# 创建卡尔曼滤波器
kf = KalmanFilter(
    initial_state_mean=observations[0],
    transition_matrices=[1],
    observation_matrices=[1],
    transition_covariance=0.01,
    observation_covariance=1.0,
    initial_state_covariance=1.0
)

# 应用卡尔曼滤波
state_means, state_covs = kf.smooth(observations)

# 将滤波结果添加到DataFrame中
df_copy['kalman_filtered'] = state_means

# 绘制原始数据和滤波后的数据
plt.figure(figsize=(12, 6))
plt.plot(df_copy.index, df_copy['value'], 'b-', label='原始数据')
plt.plot(df_copy.index, df_copy['kalman_filtered'], 'r-', label='卡尔曼滤波后数据')
plt.title(f"文件 {first_file} 的卡尔曼滤波结果")
plt.xlabel("时间")
plt.ylabel("数值")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

print(f"原始数据和滤波后数据的前5行：")
print(df_copy.head())

In [None]:
from scipy.signal import savgol_filter
import matplotlib.pyplot as plt

# 获取第一个数据框
file = list(dataframes.keys())[-1]
df_copy = dataframes[file].copy()
# print(df_copy.head())

# 应用Savitzky-Golay滤波
# 参数: 窗口长度=51, 多项式阶数=3
df_copy['sg_filtered'] = savgol_filter(df_copy['value'], window_length=20, polyorder=3)

print(df_copy.head(100))

# 绘制原始数据和滤波后的数据比较
plt.figure(figsize=(12, 6))
plt.plot(df_copy.index, df_copy['value'], 'b-', label='原始数据')
plt.plot(df_copy.index, df_copy['sg_filtered'], 'r-', label='滤波后数据')
plt.title(f"{first_file} 数据的Savitzky-Golay滤波")
plt.xlabel('时间')
plt.ylabel('数值')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# 多维数据

In [5]:
import pandas as pd
from collections import defaultdict

# 按时间戳模式对数据框进行分组
def group_by_timestamp_pattern(dataframes):
    # 创建一个字典，按时间戳模式分组
    grouped = defaultdict(list)
    
    # 对每个数据框，提取其时间戳模式（可以使用第一个时间戳的格式作为标识）
    for file, df in dataframes.items():
        # 获取第一个时间戳作为标识
        if not df.empty:
            timestamp_pattern = df.index[0].strftime('%Y-%m-%d %H:%M:%S')
            grouped[timestamp_pattern].append((file, df))
    
    return grouped

# 合并具有相同时间戳模式的数据框
def merge_dataframes_by_timestamp(dataframes):
    # 按时间戳模式分组
    grouped = group_by_timestamp_pattern(dataframes)
    
    # 存储合并后的结果
    merged_results = {}
    
    # 对每组进行合并
    for pattern, dfs in grouped.items():
        # 如果只有一个数据框，则不需要合并
        if len(dfs) <= 1:
            continue
        
        # 创建一个空的数据框来存储合并结果
        merged_df = pd.DataFrame()
        
        # 对每个数据框进行合并
        for file, df in dfs:
            # 将当前数据框的value列重命名为文件名，然后合并
            df_renamed = df.rename(columns={'value': file})
            
            if merged_df.empty:
                merged_df = df_renamed
            else:
                # 使用外连接合并，保留所有时间戳
                merged_df = merged_df.join(df_renamed, how='outer')
        
        # 存储合并结果
        merged_results[pattern] = merged_df
    
    return merged_results

# 使用示例（不会执行）：
merged_datasets = merge_dataframes_by_timestamp(dataframes)

In [None]:
import pandas as pd
import numpy as np
import panel as pn
import hvplot.pandas

# 初始化Panel
pn.extension()

def align_and_merge_datasets(dataframes):
    if not dataframes:
        return pd.DataFrame()
    
    # 获取第一个数据集作为基准
    first_key = list(dataframes.keys())[0]
    reference_df = dataframes[first_key].copy()
    target_index = reference_df.index
    
    # 重命名第一个数据集的列，添加前缀以避免列名冲突
    reference_df = reference_df.rename(columns={col: f"{first_key}_{col}" for col in reference_df.columns})
    
    # 初始化合并后的DataFrame
    aligned_df = reference_df.copy()
    
    # 对其他数据集进行对齐
    for key in list(dataframes.keys())[1:]:
        print(key)
        df = dataframes[key].copy()
        
        # 使用最近邻方法对齐到目标索引
        if df.index[0] != target_index[0]:
            aligned = df.reindex(target_index, method='nearest')
        else:
            aligned = df
        df_renamed = aligned.rename(columns={'value': key})
        # 合并到结果DataFrame
        aligned_df = aligned_df.join(df_renamed, how='outer')
    
    return aligned_df

pn.extension('tabulator')
# 使用示例
alignment_data = align_and_merge_datasets(dataframes)


In [None]:
pn.Row(pn.widgets.Tabulator(alignment_data))

In [None]:
import pandas as pd
import numpy as np

def slice_time_series(series, method, value):
    """
    根据固定长度或时间周期对时间序列进行切片。

    参数:
    series (pd.Series 或 pd.DataFrame): 输入的时间序列。对于'period'方法，必须具有DatetimeIndex。
    method (str): 切片方法。可选值为 'length' 或 'period'。
    value (int 或 str): 
        - 如果 method 是 'length'，则为一个整数，表示每个切片的固定长度。
        - 如果 method 是 'period'，则为一个字符串，表示时间周期（例如 'D' 表示每天，'W' 表示每周，'M' 表示每月）。请参阅 pandas 偏移别名。

    返回:
    list: 一个包含切片后段（pd.Series 或 pd.DataFrame）的列表。
    """
    slices = []
    if method == 'length':
        if not isinstance(value, int) or value <= 0:
            raise ValueError("对于 'length' 方法，value 必须是正整数。")
        
        length = value
        num_slices = (len(series) + length - 1) // length # 计算需要多少个切片
        for i in range(num_slices):
            start_index = i * length
            end_index = start_index + length
            slice_segment = series.iloc[start_index:end_index]
            if not slice_segment.empty:
                slices.append(slice_segment)
                
    elif method == 'period':
        if not isinstance(series.index, pd.DatetimeIndex):
            raise TypeError("对于 'period' 方法，序列必须具有 DatetimeIndex。")
        if not isinstance(value, str):
             raise ValueError("对于 'period' 方法，value 必须是表示频率的字符串（例如 'D', 'W'）。")
            
        freq = value
        # 使用 Grouper 按指定频率分组
        grouped = series.groupby(pd.Grouper(freq=freq))
        for _, group_df in grouped:
            # 仅添加非空组
            if not group_df.empty:
                slices.append(group_df)
                
    else:
        raise ValueError("无效的方法。请选择 'length' 或 'period'。")
        
    return slices



# 3

In [None]:
import panel as pn
import hvplot.pandas
import numpy as np

# 初始化 Panel
pn.extension()

# 创建可视化界面
def create_visualization(merged_datasets):
    # 创建数据集选择器
    dataset_selector = pn.widgets.Select(
        name='选择数据集',
        options=list(merged_datasets.keys()),
        value=list(merged_datasets.keys())[0] if merged_datasets else None
    )
    
    # 创建列选择器
    # 首先获取第一个数据集的列作为初始值
    initial_columns = []
    if merged_datasets and dataset_selector.value in merged_datasets:
        initial_columns = list(merged_datasets[dataset_selector.value].columns)
    
    column_selector = pn.widgets.MultiSelect(
        name='选择列',
        options=initial_columns,
        value=initial_columns[:2] if len(initial_columns) >= 2 else initial_columns,
        size=min(10, len(initial_columns))
    )
    
    # 更新列选择器选项的回调函数
    def update_column_options(event):
        dataset = event.new
        if dataset in merged_datasets:
            new_columns = list(merged_datasets[dataset].columns)
            column_selector.options = new_columns
            column_selector.value = new_columns[:2] if len(new_columns) >= 2 else new_columns
    
    # 监听数据集选择器的变化
    dataset_selector.param.watch(update_column_options, 'value')
    
    # 创建绘图面板
    plot_pane = pn.pane.HoloViews(height=500)
    
    # 创建统计信息面板
    stats_pane = pn.pane.DataFrame(width=800)
    
    # 更新绘图和统计信息的函数
    def update_plot_and_stats(event):
        dataset = dataset_selector.value
        columns = column_selector.value
        
        if dataset is None or not columns or dataset not in merged_datasets:
            plot_pane.object = None
            stats_pane.object = None
            return
        
        df = merged_datasets[dataset]
        
        # 只选择已勾选的列
        selected_df = df[columns]
        
        # 更新图表
        plot = selected_df.hvplot.line(
            responsive=True,
            height=500,
            title=f'数据集: {dataset}',
            xlabel='时间',
            ylabel='数值',
            legend='top',
            downsample=True,
        )
        plot_pane.object = plot
        
        # 更新统计信息
        stats = selected_df.describe().T
        stats['非空值数'] = selected_df.count()
        stats['空值数'] = selected_df.isna().sum()
        stats_pane.object = stats
    
    # 监听列选择器和数据集选择器的变化
    column_selector.param.watch(update_plot_and_stats, 'value')
    dataset_selector.param.watch(update_plot_and_stats, 'value')
    
    # 初始更新一次
    update_plot_and_stats(None)
    
    # 组装面板
    dashboard = pn.Column(
        pn.pane.Markdown("## 多维时间序列可视化"),
        pn.Row(
                dataset_selector,
                column_selector,
                pn.pane.Markdown("### 统计信息"),
                stats_pane
            ),
        plot_pane,
    )
    
    return dashboard

# 使用示例：
viz_dashboard = create_visualization(merged_datasets)
viz_dashboard

In [None]:
import pandas as pd
import numpy as np
import panel as pn
import hvplot.pandas
import sys
import os

from nodes.cluster.TICC.TICC_solver import TICC

# 初始化Panel
pn.extension()

def apply_ticc_to_dataframe(merged_datasets):
    # 创建数据集选择器
    dataset_selector = pn.widgets.Select(
        name='选择数据集',
        options=list(merged_datasets.keys()),
        value=list(merged_datasets.keys())[0] if merged_datasets else None
    )
    
    # 创建列选择器
    initial_columns = []
    if merged_datasets and dataset_selector.value in merged_datasets:
        initial_columns = list(merged_datasets[dataset_selector.value].columns)
    
    column_selector = pn.widgets.MultiSelect(
        name='选择列',
        options=initial_columns,
        value=initial_columns[:min(5, len(initial_columns))] if initial_columns else [],
        size=min(10, len(initial_columns))
    )
    
    # TICC参数设置
    window_size = pn.widgets.IntSlider(name='窗口大小', start=1, end=10, value=5)
    number_of_clusters = pn.widgets.IntSlider(name='聚类数', start=2, end=10, value=3)
    lambda_param = pn.widgets.FloatSlider(name='Lambda参数', start=0, end=1, value=0.1, step=0.01)
    beta = pn.widgets.FloatSlider(name='Beta参数', start=0, end=10, value=5, step=0.1)
    maxIters = pn.widgets.IntSlider(name='最大迭代次数', start=10, end=100, value=30, step=5)
    
    # 更新列选择器选项的回调函数
    def update_column_options(event):
        dataset = event.new
        if dataset in merged_datasets:
            new_columns = list(merged_datasets[dataset].columns)
            column_selector.options = new_columns
            column_selector.value = new_columns[:min(5, len(new_columns))] if new_columns else []
    
    # 监听数据集选择器的变化
    dataset_selector.param.watch(update_column_options, 'value')
    
    # 创建结果显示区域
    result_pane = pn.pane.HoloViews(height=500)
    cluster_info_pane = pn.pane.DataFrame(width=800)
    
    # 运行TICC的按钮
    run_button = pn.widgets.Button(name='运行TICC聚类', button_type='primary')
    
    # TICC运行状态
    status = pn.pane.Markdown("准备就绪，请选择数据和参数")
    
    def run_ticc(event):
        dataset = dataset_selector.value
        columns = column_selector.value
        
        if dataset is None or not columns or dataset not in merged_datasets:
            status.object = "请选择数据集和至少一列数据"
            return
        
        try:
            status.object = "正在运行TICC聚类..."
            
            # 获取选定的数据
            df = merged_datasets[dataset][columns].copy()
            
            # 处理缺失值并重置索引
            df = df.ffill().bfill()
            
            # 创建TICC实例
            ticc = TICC(
                window_size=window_size.value,
                number_of_clusters=number_of_clusters.value,
                lambda_parameter=lambda_param.value,
                beta=beta.value,
                maxIters=maxIters.value,
                threshold=2e-5
            )
            
            # 运行TICC算法
            result_df, _ = ticc.fit(df)
            
            # 检查返回类型
            if isinstance(result_df, pd.DataFrame):
                # 如果返回DataFrame，直接使用
                cluster_assignments = result_df['cluster'].values
                # 添加索引
                result_df.index = df.index
            else:
                # 如果返回numpy数组，创建新的DataFrame
                cluster_assignments = result_df
                result_df = df.copy()
                result_df['cluster'] = cluster_assignments
            
            # 可视化结果 - 按聚类分组
            cluster_plots = []
            for i in range(number_of_clusters.value):
                cluster_data = result_df[result_df['cluster'] == i]
                if len(cluster_data) > 0:
                    plot = cluster_data.hvplot.line(
                        y=columns,
                        responsive=True,
                        height=300,
                        title=f'聚类 {i}',
                        xlabel='时间',
                        ylabel='数值',
                        legend='top'
                    )
                    cluster_plots.append(plot)
            
            if cluster_plots:
                # 合并所有图表
                combined_plot = pn.Column(*cluster_plots)
                result_pane.object = combined_plot
            else:
                result_pane.object = pn.pane.Markdown("没有找到有效的聚类结果")
            
            # 显示聚类统计信息
            unique_clusters = np.unique(cluster_assignments)
            cluster_stats = pd.DataFrame({
                '聚类标签': unique_clusters,
                '点数量': [sum(cluster_assignments == i) for i in unique_clusters],
                '占比(%)': [sum(cluster_assignments == i) / len(cluster_assignments) * 100 for i in unique_clusters]
            })
            
            cluster_info_pane.object = cluster_stats
            
            status.object = "TICC聚类完成！"
            
        except Exception as e:
            status.object = f"错误: {str(e)}"
            import traceback
            traceback.print_exc()
    
    # 绑定按钮事件
    run_button.on_click(run_ticc)
    
    # 组装面板
    dashboard = pn.Column(
        pn.pane.Markdown("## TICC时间序列聚类分析"),
        pn.Row(
            pn.Column(
                dataset_selector,
                column_selector,
                pn.pane.Markdown("### TICC参数设置"),
                window_size,
                number_of_clusters,
                lambda_param,
                beta,
                maxIters,
                run_button,
                status
            ),
            pn.Column(
                pn.pane.Markdown("### 聚类结果"),
                result_pane,
                pn.pane.Markdown("### 聚类统计"),
                cluster_info_pane
            )
        )
    )
    
    return dashboard

# 使用示例：
ticc_dashboard = apply_ticc_to_dataframe(merged_datasets)
ticc_dashboard

# 3

In [None]:
import pandas as pd
import numpy as np
import panel as pn
import hvplot.pandas
import sys
import os

# 导入MC2PCA算法
sys.path.append(os.path.join(os.getcwd(), 'nodes/cluster'))
from MC2PCA import mc2pca_clustering

# 初始化Panel
pn.extension()

def apply_mc2pca_to_dataframe(merged_datasets):
    # 创建数据集选择器
    dataset_selector = pn.widgets.Select(
        name='选择数据集',
        options=list(merged_datasets.keys()),
        value=list(merged_datasets.keys())[0] if merged_datasets else None
    )
    
    # 创建列选择器
    initial_columns = []
    if merged_datasets and dataset_selector.value in merged_datasets:
        initial_columns = list(merged_datasets[dataset_selector.value].columns)
    
    column_selector = pn.widgets.MultiSelect(
        name='选择列',
        options=initial_columns,
        value=initial_columns[:min(5, len(initial_columns))] if initial_columns else [],
        size=min(10, len(initial_columns))
    )
    
    # MC2PCA参数设置
    k_clusters = pn.widgets.IntInput(name='聚类数量(K)', value=2)
    ncp = pn.widgets.IntInput(name='主成分数量(ncp)', value=2)
    itermax = pn.widgets.IntInput(name='最大迭代次数', value=100)
    conv_crit = pn.widgets.FloatInput(name='收敛阈值', value=1e-5)
    
    # 更新列选择器选项的回调函数
    def update_column_options(event):
        dataset = event.new
        if dataset in merged_datasets:
            new_columns = list(merged_datasets[dataset].columns)
            column_selector.options = new_columns
            column_selector.value = new_columns[:min(5, len(new_columns))] if new_columns else []
    
    # 监听数据集选择器的变化
    dataset_selector.param.watch(update_column_options, 'value')
    
    # 创建结果显示区域
    result_pane = pn.pane.HoloViews(height=500)
    cluster_info_pane = pn.pane.DataFrame(width=800)
    
    # 运行MC2PCA的按钮
    run_button = pn.widgets.Button(name='运行MC2PCA聚类', button_type='primary')
    
    # MC2PCA运行状态
    status = pn.pane.Markdown("准备就绪，请选择数据和参数")
    
    def run_mc2pca(event):
        dataset = dataset_selector.value

        if dataset is None or dataset not in merged_datasets:
            status.object = "请选择数据集"
            return
        
        try:
            status.object = "正在运行MC2PCA聚类..."
            
            # 获取选定的数据
            df = merged_datasets[dataset].copy()
            
            # 处理缺失值
            df = df.ffill().bfill()
            
            # 训练模型并获取聚类结果
            result_df = mc2pca_clustering(df, k_clusters.value, ncp.value, itermax.value, conv_crit.value)
            
            # 可视化结果 - 按聚类分组
            cluster_plots = []
            for i in range(k_clusters.value):
                cluster_data = result_df[result_df['cluster'] == i]
                if len(cluster_data) > 0:
                    plot = cluster_data.hvplot.line(
                        y=columns,
                        responsive=True,
                        height=300,
                        title=f'聚类 {i}',
                        xlabel='时间',
                        ylabel='数值',
                        legend='top'
                    )
                    cluster_plots.append(plot)
            
            if cluster_plots:
                # 合并所有图表
                combined_plot = pn.Column(*cluster_plots)
                result_pane.object = combined_plot
            else:
                result_pane.object = pn.pane.Markdown("没有找到有效的聚类结果")
            
            # 显示聚类统计信息
            unique_clusters = np.unique(result_df['cluster'])
            cluster_stats = pd.DataFrame({
                '聚类标签': unique_clusters,
                '点数量': [sum(result_df['cluster'] == i) for i in unique_clusters],
                '占比(%)': [sum(result_df['cluster'] == i) / len(result_df['cluster']) * 100 for i in unique_clusters]
            })
            
            cluster_info_pane.object = cluster_stats
            
            status.object = "MC2PCA聚类完成！"
            
        except Exception as e:
            status.object = f"错误: {str(e)}"
            import traceback
            traceback.print_exc()
    
    # 绑定按钮事件
    run_button.on_click(run_mc2pca)
    
    # 组装面板
    dashboard = pn.Column(
        pn.pane.Markdown("## MC2PCA时间序列聚类分析"),
        pn.Row(
            pn.Column(
                dataset_selector,
                column_selector,
                pn.pane.Markdown("### MC2PCA参数设置"),
                k_clusters,
                ncp,
                itermax,
                conv_crit,
                run_button,
                status
            ),
            pn.Column(
                pn.pane.Markdown("### 聚类结果"),
                result_pane,
                pn.pane.Markdown("### 聚类统计"),
                cluster_info_pane
            )
        )
    )
    
    return dashboard

# 使用示例：
mc2pca_dashboard = apply_mc2pca_to_dataframe(merged_datasets)
mc2pca_dashboard

In [None]:
import pandas as pd

def slice_dataframe(df, by='length', value=100):
    """
    将DataFrame按指定长度或时长切片，返回DataFrame列表。

    参数:
    df: 输入的DataFrame，索引可以为整数或DatetimeIndex
    by: 'length'（按行数）或 'timedelta'（按时长）
    value: 
        - 当by='length'时，value为每个切片的行数（int）
        - 当by='timedelta'时，value为pandas可识别的时间长度字符串（如'10T', '1H'）

    返回:
    df_list: 切片后的DataFrame列表
    """
    df_list = []
    if by == 'length':
        n = int(value)
        for i in range(0, len(df), n):
            df_list.append(df.iloc[i:i+n])
    elif by == 'timedelta':
        if not isinstance(df.index, pd.DatetimeIndex):
            raise ValueError("索引必须为DatetimeIndex才能按时长切片")
        value = pd.to_timedelta(value)
        start = df.index[0]
        end = df.index[-1]
        current = start
        while current < end:
            next_time = current + value
            df_slice = df[(df.index >= current) & (df.index < next_time)]
            if not df_slice.empty:
                df_list.append(df_slice)
            current = next_time
    else:
        raise ValueError("by参数必须为'length'或'timedelta'")
    return df_list

# # 示例用法
# df_slices = slice_dataframe(df, by='length', value=200)
# df_slices = slice_dataframe(df, by='timedelta', value='30T')