In [None]:
import pandas as pd
import numpy as np
import re

In [None]:

def group_count_save(file_path, output_file_path):
    """
    Reads a csv file, groups the data by '学号/卡号/工号', counts the number of occurrences,
    removes unnecessary columns, renames the '课程' column as '0', and saves it to a new csv file.

    Parameters:
    file_path (str): File path of the input csv file.
    output_file_path (str): File path where the grouped and counted dataframe should be saved.
    """

    # Read the csv file into a dataframe, skipping the first row
    df = pd.read_csv(file_path, encoding='GBK', skiprows=1)

    # Group the data by '学号/卡号/工号' and count the number of occurrences
    grouped = df.groupby("学号/卡号/工号").count()

    # Remove unnecessary columns
    grouped.drop(['所在组织','Unnamed: 2','Unnamed: 3','Unnamed: 4','学习时间'], axis=1, inplace=True)

    # Rename the '课程' column as '0'
    grouped.rename(columns={'课程': '0'}, inplace=True)

    # Save the grouped and counted dataframe to a new csv file
    grouped.to_csv(output_file_path, encoding='GBK')


In [None]:

def clean_data(file_path):
    """
    Cleans the data by removing non-Chinese characters from the '学号/卡号/工号' column and 
    renaming it as '姓名'. Saves the cleaned dataframe to a new csv file and returns the counts 
    of unique names in the dataframe.

    Parameters:
    file_path (str): File path of the csv file to be cleaned.
    
    Returns:
    Pandas Series: A series object containing the counts of unique names in the dataframe.
    """
    pattern = re.compile(r'[^\u4e00-\u9fa5]')
    df1 = pd.read_csv(file_path, encoding='GBK')
    df1['学号/卡号/工号'].astype(object)
    df1['姓名'] = df1.iloc[:, 0].apply(lambda x: re.sub(pattern, '', x))
    df1['姓名'].replace('', np.nan, inplace=True)
    df1.dropna(how='any', subset=['姓名'], inplace=True)
    df1.rename(columns={'学号/卡号/工号': '姓名'}, inplace=True)
    df1.to_csv('re.csv', encoding='GBK', index=False)
    # name_counts = df1['姓名'].value_counts(dropna=False)
    # return name_counts


In [None]:

def merge_and_save(file_path1, file_path2, output_file_path):
    """
    Reads two csv files, merges them based on the '姓名' column, fills missing values with 0, 
    renames the '0' column as '计数', and saves the merged dataframe to a new csv file.

    Parameters:
    file_path1 (str): File path of the first csv file.
    file_path2 (str): File path of the second csv file.
    output_file_path (str): File path where the merged dataframe should be saved.
    """
    
    # Read the two csv files into separate dataframes
    df1 = pd.read_csv(file_path1, encoding='GBK')
    df2 = pd.read_csv(file_path2, encoding='GBK')
    
    # Merge the two dataframes on the '姓名' column
    merged_df = pd.merge(df2['姓名'], df1[['姓名', '0']], on='姓名', how='left')
    
    # Replace missing values in the '0' column with 0
    merged_df['0'].replace(np.nan, 0, inplace=True)
    
    # Rename the '0' column as '计数'
    merged_df.rename(columns={'0': '计数'}, inplace=True)
    
    # Save the merged dataframe to a new csv file
    merged_df.to_csv(output_file_path, encoding='GBK', index=False)


# 缺个文件名修改，在一开始要统一文件名为input.csv

In [None]:
group_count_save('input.csv','temp.csv')
clean_data('temp.csv')
merge_and_save('result.csv','re.csv','大学习统计.csv')