In [4]:
import os
import pandas as pd
import fnmatch

def read_csv_with_encoding(file, encodings):
    for encoding in encodings:
        try:
            return pd.read_csv(file, encoding=encoding)
        except UnicodeDecodeError:
            continue
    raise ValueError(f"None of the encodings worked for file: {file}")


def combine_csv_in_folder(folder_path, file_pattern):
    dataframes = []  # 创建一个空列表来收集数据帧
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if fnmatch.fnmatch(file, file_pattern):  # 使用fnmatch来进行模式匹配
                file_path = os.path.join(root, file)
                df = read_csv_with_encoding(file_path, ['utf-8', 'gb2312', 'gbk'])
                dataframes.append(df)  # 将数据帧添加到列表中
    return pd.concat(dataframes, ignore_index=True) if dataframes else pd.DataFrame()


root_folders = ['/Users/Wezhang/Downloads/data']
file_pattern = 'ss2405.csv'

# Create an empty DataFrame to store combined data
combined_csv = pd.DataFrame()

# Process each root folder and concatenate the results
for folder in root_folders:
    folder_csv = combine_csv_in_folder(folder, file_pattern)
    if not folder_csv.empty:
        combined_csv = pd.concat([combined_csv, folder_csv], ignore_index=True)

# Save the combined data to a new CSV file
combined_csv.to_csv("/Users/Wezhang/workspace/laplace/taobao_combined.csv", index=False, encoding='utf-8', errors='ignore')

print("All files combined successfully!")


All files combined successfully!


In [5]:
import pandas as pd
data = pd.read_csv("/Users/Wezhang/workspace/laplace/taobao_combined.csv")
code = "SHFE.ss2405."
# code = "KQ.m@SHFE.ss."

# 遍历所有列名并处理
new_columns = []
for col in data.columns:
    if col.startswith(code):
        new_columns.append(col.replace(code, ""))
    else:
        new_columns.append(col)
data.columns = new_columns
data = data.sort_values(by='datetime')
data.to_csv("future_taobao_ss2405.csv",index=False)


In [6]:
import pandas as pd
data = pd.read_csv("future_taobao_ss2405.csv")
data = data.drop(columns=['average'])
data.to_csv("future_taobao_ss2405.csv", index=False)

In [None]:
#合并主力合约
import pandas as pd

# 定义文件路径
files = {
    "ss2401": "future_taobao_ss2401.csv",
    "ss2402": "future_taobao_ss2402.csv",
    "ss2403": "future_taobao_ss2403.csv",
    "ss2405": "future_taobao_ss2405.csv"
}

# 读取数据
dataframes = {key: pd.read_csv(value) for key, value in files.items()}

# 转换datetime列为datetime类型
for df in dataframes.values():
    df['datetime'] = pd.to_datetime(df['datetime'])

# 根据日期规则筛选数据
ss2401_data = dataframes['ss2401'][dataframes['ss2401']['datetime'] <= '2023-12-11']
ss2402_data = dataframes['ss2402'][(dataframes['ss2402']['datetime'] > '2023-12-11') & (dataframes['ss2402']['datetime'] <= '2024-01-12')]
ss2403_data = dataframes['ss2403'][(dataframes['ss2403']['datetime'] > '2024-01-12') & (dataframes['ss2403']['datetime'] <= '2024-02-08')]
ss2405_data = dataframes['ss2405'][dataframes['ss2405']['datetime'] > '2024-02-08']

# 合并数据
concatenated_data = pd.concat([ss2401_data, ss2402_data, ss2403_data, ss2405_data])

# 排序数据
concatenated_data = concatenated_data.sort_values(by='datetime')

# 导出到新的CSV文件
concatenated_data.to_csv("future_taobao_ssMain.csv", index=False)
