In [1]:
import pandas as pd
import numpy as np
import chardet

import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

In [2]:
# 读取CSV文件到DataFrame
df1 = pd.read_excel('OutputData/interpolated_linear_original_data.xlsx')
df1

Unnamed: 0,日期,横坐标,纵坐标,高程,涌水量
0,2022/06/10,400093.253600,3.044617e+06,340,40.0
1,2022/06/11,400093.253600,3.044617e+06,340,40.0
2,2022/06/12,400093.253600,3.044617e+06,340,40.0
3,2022/06/13,400093.253600,3.044617e+06,340,40.0
4,2022/06/14,400093.253600,3.044617e+06,340,40.0
...,...,...,...,...,...
6520,2024/05/07,401223.552021,3.044156e+06,670,17.6
6521,2024/05/08,401223.552021,3.044156e+06,670,17.2
6522,2024/05/09,401223.552021,3.044156e+06,670,16.8
6523,2024/05/10,401223.552021,3.044156e+06,670,16.4


In [3]:
file_path = 'OutputData/降雨量总体数据.csv'
# 检测文件编码
with open(file_path, 'rb') as f:
    result = chardet.detect(f.read())

# 使用检测到的编码
encoding = result['encoding']
df2 = pd.read_csv(file_path, encoding=encoding)
df2

Unnamed: 0,日期,日降雨量
0,2023-01-01,1.747288
1,2023-01-02,3.160397
2,2023-01-03,0.857112
3,2023-01-04,1.711757
4,2023-01-05,1.941543
...,...,...
641,2024-10-03,0.172667
642,2024-10-04,0.345333
643,2024-10-05,0.518000
644,2024-10-06,1.082000


In [4]:
# 将两个DataFrame中的日期列从字符串转换为日期类型
df1['日期'] = pd.to_datetime(df1['日期'])
df2['日期'] = pd.to_datetime(df2['日期'])

# 使用merge函数合并两个DataFrame，根据日期列进行内连接（inner join）
merged_df = pd.merge(df1, df2[['日期', '日降雨量']], on='日期', how='inner')

#获取除了'涌水量'以外的所有列
columns = merged_df.columns.tolist()
columns.remove('涌水量')

# 将'涌水量'列添加到列表的末尾
columns.append('涌水量')

# 使用新的列顺序重新排列DataFrame
df = merged_df[columns]
# 将所有数值列（int、float 等类型）保留 3 位小数
df = df.round(3)

#df['日期'] = df['日期'].dt.strftime('%Y-%m-%d')  # 转为 'YYYY-MM-DD' 格式

# 将修改后的DataFrame保存到新的CSV文件中，或者覆盖原文件
df.to_excel('OutputData/merged_data.xlsx', index=False)
df.to_csv('OutputData/merged_data.csv', index=False)

In [5]:
df

Unnamed: 0,日期,横坐标,纵坐标,高程,日降雨量,涌水量
0,2023-01-01,400093.254,3044617.466,340,1.747,201.500
1,2023-01-02,400093.254,3044617.466,340,3.160,201.125
2,2023-01-03,400093.254,3044617.466,340,0.857,200.750
3,2023-01-04,400093.254,3044617.466,340,1.712,200.375
4,2023-01-05,400093.254,3044617.466,340,1.942,200.000
...,...,...,...,...,...,...
4149,2024-05-07,401223.552,3044155.756,670,10.717,17.600
4150,2024-05-08,401223.552,3044155.756,670,5.388,17.200
4151,2024-05-09,401223.552,3044155.756,670,3.532,16.800
4152,2024-05-10,401223.552,3044155.756,670,0.221,16.400


# 制作web导入格式的数据集

In [6]:
# 在第0列位置插入"序号"列，初始值为 None
df.insert(0, "序号", None)

# 按索引填充序号值（如 0, 1, 2, 3...）
df.loc[:, "序号"] = df.index  # 通过 loc 安全赋值索引值

df

Unnamed: 0,序号,日期,横坐标,纵坐标,高程,日降雨量,涌水量
0,0,2023-01-01,400093.254,3044617.466,340,1.747,201.500
1,1,2023-01-02,400093.254,3044617.466,340,3.160,201.125
2,2,2023-01-03,400093.254,3044617.466,340,0.857,200.750
3,3,2023-01-04,400093.254,3044617.466,340,1.712,200.375
4,4,2023-01-05,400093.254,3044617.466,340,1.942,200.000
...,...,...,...,...,...,...,...
4149,4149,2024-05-07,401223.552,3044155.756,670,10.717,17.600
4150,4150,2024-05-08,401223.552,3044155.756,670,5.388,17.200
4151,4151,2024-05-09,401223.552,3044155.756,670,3.532,16.800
4152,4152,2024-05-10,401223.552,3044155.756,670,0.221,16.400


In [7]:
# 导出到xlsx用于web网站导入
df.to_excel('OutputData/waterinfo_data.xlsx', index=False)