In [1]:
import pandas as pd
import re



# OSF identifier
data_id = "hupb2"

# load CSV file into DataFrame directly from OSF URL
data_df = pd.read_csv(f"https://osf.io/{data_id}/download")

# create a column that removes the slice notation
def strip_slice_notation(step_name):
    # remove any substring like '(1/2)' or '(2/2)'
    return re.sub(r'\(\d+/\d+\)', '', step_name).strip()

data_df["what_base"] = data_df["what"].apply(strip_slice_notation)

# choose which columns uniquely define a 'group' for combining slices (all for now)
group_cols = ["replicate", "what_base", "phylo_source_path", "revision", "dstream_S", "dstream_value_bitwidth", "num_tips", "SLURM_JOB_ID", "date", "hostname"]

# sum the duration column, could specify others in the future
summed_df = (data_df.groupby(group_cols, as_index=False).agg({"duration (s)": "sum"}))

# change name back to 'what'
summed_df.rename(columns={"what_base": "what"}, inplace=True)


summed_df


Unnamed: 0,replicate,what,phylo_source_path,revision,dstream_S,dstream_value_bitwidth,num_tips,SLURM_JOB_ID,date,hostname,duration (s)
0,0,".sort_by(""dstream_Tbar"").over(partition_by=""ds...",/mnt/gs21/scratch/mmore500/2024-12-25/lex12+as...,f9c054a01fd961b22731cde6fb22de84e23871d9,64,1,10000000,48009066,2025-01-03T09:03:19.002531,vim-000,127.662951
1,0,_construct_result_dataframe,/mnt/gs21/scratch/mmore500/2024-12-25/lex12+as...,f9c054a01fd961b22731cde6fb22de84e23871d9,64,1,10000000,48009066,2025-01-03T09:03:19.002531,vim-000,0.143548
2,0,_join_user_defined_columns,/mnt/gs21/scratch/mmore500/2024-12-25/lex12+as...,f9c054a01fd961b22731cde6fb22de84e23871d9,64,1,10000000,48009066,2025-01-03T09:03:19.002531,vim-000,4.418865
3,0,alifestd_assign_contiguous_ids,/mnt/gs21/scratch/mmore500/2024-12-25/lex12+as...,f9c054a01fd961b22731cde6fb22de84e23871d9,64,1,10000000,48009066,2025-01-03T09:03:19.002531,vim-000,6.244194
4,0,alifestd_collapse_unifurcations,/mnt/gs21/scratch/mmore500/2024-12-25/lex12+as...,f9c054a01fd961b22731cde6fb22de84e23871d9,64,1,10000000,48009066,2025-01-03T09:03:19.002531,vim-000,13.043239
5,0,alifestd_delete_trunk_asexual,/mnt/gs21/scratch/mmore500/2024-12-25/lex12+as...,f9c054a01fd961b22731cde6fb22de84e23871d9,64,1,10000000,48009066,2025-01-03T09:03:19.002531,vim-000,38.236595
6,0,collapse_dropped_unifurcations,/mnt/gs21/scratch/mmore500/2024-12-25/lex12+as...,f9c054a01fd961b22731cde6fb22de84e23871d9,64,1,10000000,48009066,2025-01-03T09:03:19.002531,vim-000,27.489405
7,0,dstream.dataframe.explode_lookup_unpacked,/mnt/gs21/scratch/mmore500/2024-12-25/lex12+as...,f9c054a01fd961b22731cde6fb22de84e23871d9,64,1,10000000,48009066,2025-01-03T09:03:19.002531,vim-000,26.973438
8,0,dstream.dataframe.unpack_data_packed,/mnt/gs21/scratch/mmore500/2024-12-25/lex12+as...,f9c054a01fd961b22731cde6fb22de84e23871d9,64,1,10000000,48009066,2025-01-03T09:03:19.002531,vim-000,2.170286
9,0,extend_tree_searchtable_cpp_from_exploded,/mnt/gs21/scratch/mmore500/2024-12-25/lex12+as...,f9c054a01fd961b22731cde6fb22de84e23871d9,64,1,10000000,48009066,2025-01-03T09:03:19.002531,vim-000,1058.474615
