In [1]:
import pandas as pd
data = pd.read_csv('zhangdan.csv', sep='\t')
data

Unnamed: 0,编号,部门名,用户名,日期,CPU(核时),加速卡(卡时),最大存储,应用,费用(元)
0,1,高通量编辑与筛选平台实验室,赵募强,2024-12-02,4.67,0.00,,1,0.46
1,2,高通量编辑与筛选平台实验室,赵募强,2024-12-04,0.43,0.00,,1,0.04
2,3,高通量编辑与筛选平台实验室,赵募强,2024-12-05,0.10,0.00,,1,0.01
3,4,高通量编辑与筛选平台实验室,赵募强,2024-12-06,11.14,0.00,,1,1.09
4,5,高通量编辑与筛选平台实验室,赵募强,2024-12-09,2.14,0.00,,1,0.19
...,...,...,...,...,...,...,...,...,...
3141,3142,生物设计中心平台实验室,王若宇,2025-02-13,0.15,0.01,,1,0.01
3142,3143,生物设计中心平台实验室,杨春贺,2025-02-13,0.00,12.53,,1,62.63
3143,3144,生物设计中心平台实验室,李洋,2025-02-14,0.04,0.02,,1,0.02
3144,3145,生物设计中心平台实验室,王德行,2025-02-14,49.09,4.09,,1,8.99


In [9]:
data["日期"] = pd.to_datetime(data["日期"], format="%Y-%m-%d", errors="coerce")

# 2. 计算从 2024-12-01 开始的“周序号”
data["周序号"] = ((data["日期"] - pd.to_datetime("2024-12-01")).dt.days // 7) + 1
data.head()

Unnamed: 0,编号,部门名,用户名,日期,CPU(核时),加速卡(卡时),最大存储,应用,费用(元),周序号
0,1,高通量编辑与筛选平台实验室,赵募强,2024-12-02,4.67,0.0,,1,0.46,1
1,2,高通量编辑与筛选平台实验室,赵募强,2024-12-04,0.43,0.0,,1,0.04,1
2,3,高通量编辑与筛选平台实验室,赵募强,2024-12-05,0.1,0.0,,1,0.01,1
3,4,高通量编辑与筛选平台实验室,赵募强,2024-12-06,11.14,0.0,,1,1.09,1
4,5,高通量编辑与筛选平台实验室,赵募强,2024-12-09,2.14,0.0,,1,0.19,2


In [10]:
# 3. 分组聚合，例如按["部门名","用户名","周序号"]进行汇总
#    这里示例对CPU(核时)、加速卡(卡时)、费用(元)三列做加和
#    你可以根据自己的实际字段名进行调整
result = (
    data.groupby(["部门名", "用户名", "周序号"], as_index=False)
    .agg({
        "CPU(核时)": "sum",
        "加速卡(卡时)": "sum",
        "费用(元)": "sum",
        # 如果有其他字段想要不同方式聚合(如最后值、最小值等)，也可在这里补充
        # "最大存储": "last",
        # "应用": "last",
    })
)

In [15]:
import pandas as pd

# 确保 "日期" 列是 datetime 类型
data["日期"] = pd.to_datetime(data["日期"], format="%Y-%m-%d", errors="coerce")

# 计算周序号：从数据中最早的日期或 2024-12-01 开始计算
start_date = max(pd.to_datetime("2024-12-01"), data["日期"].min())
data["周序号"] = ((data["日期"] - start_date).dt.days // 7) + 1


# 计算每个"周序号"的开始日期和结束日期
data["开始日期"] = start_date + (data["周序号"] - 1) * pd.Timedelta(days=7)
data["结束日期"] = data["开始日期"] + pd.Timedelta(days=6)

# 分组聚合
result = (
    data.groupby(["部门名", "用户名", "周序号", "开始日期", "结束日期"], as_index=False)
    .agg({
        "CPU(核时)": "sum",
        "加速卡(卡时)": "sum",
        "费用(元)": "sum",
        "最大存储": "last",
        "应用": "max",
    })
)
result["检测日期"] = start_date + pd.to_timedelta((result["周序号"] - 1) * 7, unit="D")
# 按周序号、部门名、用户名排序
result.sort_values(["部门名", "用户名","周序号"], inplace=True, ignore_index=True)

# 显示结果
result

Unnamed: 0,部门名,用户名,周序号,开始日期,结束日期,CPU(核时),加速卡(卡时),费用(元),最大存储,应用,检测日期
0,合成生物学技术研究组,刘雅静,7,2025-01-18,2025-01-24,4.61,0.00,0.45,,1,2025-01-18
1,合成生物学技术研究组,董莎莎,1,2024-12-07,2024-12-13,0.90,3.30,16.42,1.4TB,1,2024-12-07
2,合成生物学技术研究组,董莎莎,2,2024-12-14,2024-12-20,0.07,2.39,16.62,1.4TB,1,2024-12-14
3,合成生物学技术研究组,董莎莎,3,2024-12-21,2024-12-27,3.97,4.93,19.17,1.4TB,1,2024-12-21
4,合成生物学技术研究组,董莎莎,4,2024-12-28,2025-01-03,0.00,2.41,15.68,1.4TB,1,2024-12-28
...,...,...,...,...,...,...,...,...,...,...,...
520,高通量编辑与筛选平台实验室,赵募强,3,2024-12-21,2024-12-27,183.96,0.00,9.16,,1,2024-12-21
521,高通量编辑与筛选平台实验室,赵募强,4,2024-12-28,2025-01-03,8.14,0.00,0.40,,1,2024-12-28
522,高通量编辑与筛选平台实验室,赵募强,5,2025-01-04,2025-01-10,9.15,0.00,0.45,,1,2025-01-04
523,高通量编辑与筛选平台实验室,赵募强,6,2025-01-11,2025-01-17,52.80,0.00,2.53,,1,2025-01-11


In [18]:
result.to_csv('jiance.csv', sep='\t', index=None)