In [34]:
import pandas as pd

def process_filenaming_seq_num(data):
    """Function to process the sequence number for filenaming after model prediction.

    Args:
        data (pd.DataFrame): datframe after filenaming predictions.

    Returns:
        data: _description_
    """
    # initialize predicted tags counter
    seq_counter = 1
    conclusion_counter = None
    appendix_counter = None
    bibliography_counter = None
    author_counter = None
    try:
        # iterate over the rows of the data frame
        for index, row in data.iterrows():
            # check if the row represents a chapter
            if row["file_name_post_processing_label"] == "Chapter" \
                and row["upload_to_lv2_toc"]:
                # update the sequence number for the chapter
                new_sequence_number = str(seq_counter).zfill(2)
                # define the seq number
                data.at[index, "file_name_toc"] = row["file_name_toc"].replace("_C", "_" + new_sequence_number + "_C")
                # increament the counter
                seq_counter += 1

                # process the sequence number as expected
                split_str = data.at[index, "file_name_toc"].split("_")
                # replace the old generated sequence number
                new_str=data.at[index, "file_name_toc"].replace("_"+split_str[1], '')
                # split as new str to define the new filenaming order
                split_new_str = new_str.split("_")
                # join the new filename
                data.at[index, "file_name_toc"] = "_".join([split_str[0], split_str[3], split_str[2], split_str[4]])

            # check if the row represents a conclusion
            elif row["file_name_post_processing_label"] == "Conclusion" \
                and row["upload_to_lv2_toc"]:
                # update the sequence number for the conclusion
                if conclusion_counter is None:
                    # define the Conclusion counter
                    conclusion_counter = seq_counter
                # pad the sequence number
                new_sequence_number = str(conclusion_counter).zfill(2)
                # define the seq number
                data.at[index, "file_name_toc"] = row["file_name_toc"].replace("_CONC", "_" + new_sequence_number + "_CONC")
                # increament the counter
                conclusion_counter += 1

                # process the sequence number as expected
                split_str = data.at[index, "file_name_toc"].split("_")
                # replace the old generated sequence number
                new_str=data.at[index, "file_name_toc"].replace("_"+split_str[1], '')
                # split as new str to define the new filenaming order
                split_new_str = new_str.split("_")
                # join the new filename
                data.at[index, "file_name_toc"] = "_".join([split_str[0], split_str[3], split_str[2], split_str[4]])

            # check if the row represents an appendix
            elif row["file_name_post_processing_label"] == "Appendix" \
                and row["upload_to_lv2_toc"]:
                # update the sequence number for the appendix
                if appendix_counter is None:
                    # define the Appnedix counter
                    appendix_counter = conclusion_counter
                # pad the sequence number
                new_sequence_number = str(appendix_counter).zfill(2)
                # define the seq number
                data.at[index, "file_name_toc"] = row["file_name_toc"].replace("_A", "_" + new_sequence_number + "_A")
                # increament the counter
                appendix_counter += 1

                # process the sequence number as expected
                split_str = data.at[index, "file_name_toc"].split("_")
                # replace the old generated sequence number
                new_str=data.at[index, "file_name_toc"].replace("_"+split_str[1], '')
                # split as new str to define the new filenaming order
                split_new_str = new_str.split("_")
                # join the new filename
                data.at[index, "file_name_toc"] = "_".join([split_str[0], split_str[3], split_str[2], split_str[4]])

            # check if the row represents a bibliography
            elif row["file_name_post_processing_label"] == "Bibliography" \
                and row["upload_to_lv2_toc"]:
                # update the sequence number for the bibliography
                if bibliography_counter is None:
                    # define the BIB counter
                    bibliography_counter = appendix_counter
                # pad the sequence number
                new_sequence_number = str(bibliography_counter).zfill(2)
                # define the seq number
                data.at[index, "file_name_toc"] = row["file_name_toc"].replace("_BIB", "_" + new_sequence_number + "_BIB")
                # increament the counter
                bibliography_counter += 1

                # process the sequence number as expected
                split_str = data.at[index, "file_name_toc"].split("_")
                # replace the old generated sequence number
                new_str=data.at[index, "file_name_toc"].replace("_"+split_str[1], '')
                # split as new str to define the new filenaming order
                split_new_str = new_str.split("_")
                # join the new filename
                data.at[index, "file_name_toc"] = "_".join([split_str[0], split_str[3], split_str[2], split_str[4]])

            # check if the row represents "About the Author"
            elif row["file_name_post_processing_label"] == "About the Author" \
                and row["upload_to_lv2_toc"]:
                # update the sequence number for "About the Author"
                if author_counter is None:
                    # define the About the Author counter
                    author_counter = bibliography_counter
                # pad the sequence number
                new_sequence_number = str(author_counter).zfill(2)
                # define the seq number
                data.at[index, "file_name_toc"] = row["file_name_toc"].replace("_ABT", "_" + new_sequence_number + "_ABT")
                # increament the counter
                author_counter += 1

                # process the sequence number as expected
                split_str = data.at[index, "file_name_toc"].split("_")
                # replace the old generated sequence number
                new_str=data.at[index, "file_name_toc"].replace("_"+split_str[1], '')
                # split as new str to define the new filenaming order
                split_new_str = new_str.split("_")
                # join the new filename
                data.at[index, "file_name_toc"] = "_".join([split_str[0], split_str[3], split_str[2], split_str[4]])

    except Exception as error:
        print("Error found: ", str(error))
    return data


if __name__ == "__main__":
    input_file_path = "/Users/senthil/Desktop/Senthil/my_testing/filenaming_seq/filenaming_df.csv"
    data = pd.read_csv(input_file_path)

rev_filename = process_filenaming_seq_num(data)


In [35]:
rev_filename

Unnamed: 0.1,Unnamed: 0,file_path,file_name_extension,file_type,file_name_toc,file_name_post_processing_label,upload_to_lv2_toc
0,6,/Users/surenderkumar/Documents/DST_Hub/ai-prod...,.docx,manuscript,AP_01_EIBR_C010,Chapter,True


In [90]:
import pandas as pd


df = pd.read_csv("filenames_list.csv")

filtered_df = (df[df["project_id"]=="251374"])

filtered_df


Unnamed: 0,client,project_id,zip_package_name,file_name
