In [52]:
import os
import tqdm

import pandas as pd

from utils import DiskCachedDataset
from sklearn.preprocessing import LabelEncoder

In [53]:
DATASET_PATH = "experiments/datasets/256-256-32-frames-clips"

In [54]:
dataset = DiskCachedDataset(DATASET_PATH)

In [55]:
print(len(dataset))

2967


In [56]:
classes = ["nothing", "chrono", "grimpe", "lecture", "brossage"]

number_of_classes = len(classes)

encoder = LabelEncoder()

encoder = encoder.fit(classes)

In [57]:
dataset_summary = {classes[i]: i for i in range(number_of_classes)}

In [58]:
# for sample in tqdm.tqdm(dataset, desc="[counting-classes]:"):
#     dataset_summary[classes[sample[1]]] += 1

In [59]:
fps = 25
clip_size = 32

In [60]:
dataset_summary_in_seconds = {k: (v * clip_size) / fps for k, v in dataset_summary.items()} 
dataset_summary_in_minutes = {k: (v * clip_size) / fps / 60 for k, v in dataset_summary.items()} 

In [61]:
dataset_summary_in_seconds

{'nothing': 0.0,
 'chrono': 1.28,
 'grimpe': 2.56,
 'lecture': 3.84,
 'brossage': 5.12}

In [62]:
dataset_summary_in_minutes

{'nothing': 0.0,
 'chrono': 0.021333333333333333,
 'grimpe': 0.042666666666666665,
 'lecture': 0.064,
 'brossage': 0.08533333333333333}

In [63]:
# NOTE: create summary dataframe
df_summary = pd.DataFrame({
    'Class Name': pd.Series(dataset_summary.keys(), dtype='str'),
    'Number of Clips (32)': pd.Series(dataset_summary.values()).astype('int'),
    'Number of Frames': pd.Series([v * clip_size for v in dataset_summary.values()]).astype('int'),
    'Duration in Seconds': pd.Series(dataset_summary_in_seconds.values()).astype('int'),
    'Duration in Minutes': pd.Series(dataset_summary_in_minutes.values()).astype('int'),
})

# NOTE: add percentage column
total_clips = df_summary['Number of Clips (32)'].sum()
df_summary['percentage'] = (df_summary['Number of Clips (32)'] / total_clips * 100).round(2)

# NOTE: add total row
df_summary = pd.concat([df_summary, df_summary.sum(numeric_only=True).to_frame().T.assign(**{'Class Name': 'Total'})])

df_summary['percentage'] = df_summary['percentage'].apply(lambda x: f"{x}%")

In [64]:
df_summary

Unnamed: 0,Class Name,Number of Clips (32),Number of Frames,Duration in Seconds,Duration in Minutes,percentage
0,nothing,0.0,0.0,0.0,0.0,0.0%
1,chrono,1.0,32.0,1.0,0.0,10.0%
2,grimpe,2.0,64.0,2.0,0.0,20.0%
3,lecture,3.0,96.0,3.0,0.0,30.0%
4,brossage,4.0,128.0,5.0,0.0,40.0%
0,Total,10.0,320.0,11.0,0.0,100.0%


In [65]:
df_summary_2  = pd.DataFrame({
    'Class Name': pd.Series(dataset_summary.keys(), dtype='str'),
    'Number of Segments': pd.Series(dataset_summary.values()).astype('int'),
    'Total Number of Frames': pd.Series([v * clip_size for v in dataset_summary.values()]).astype('int'),
    'Average Segment Duration in Seconds': pd.Series(dataset_summary_in_seconds.values()).astype('int'),
    'Total in Seconds': pd.Series(dataset_summary_in_minutes.values()).astype('int'),
    'Proportion': '',
})

In [66]:
BASE_ANNOTATIONS_PATH = "/Volumes/secondary-disk/extracted-version-6/annotations"

In [67]:
full_videos_annotations_paths = list(map(lambda path: os.path.join(BASE_ANNOTATIONS_PATH, path), filter(lambda file_name: '.DS_Store' not in file_name, os.listdir(BASE_ANNOTATIONS_PATH))))

In [68]:
full_videos_annotations_paths

['/Volumes/secondary-disk/extracted-version-6/annotations/climb_11-climber_MoubeAdrian-bloc_2-angle_face.csv',
 '/Volumes/secondary-disk/extracted-version-6/annotations/climb_11-climber_MoubeAdrian-bloc_2-angle_profile.csv',
 '/Volumes/secondary-disk/extracted-version-6/annotations/climb_12-climber_MrideEsteban-bloc_2-angle_face.csv',
 '/Volumes/secondary-disk/extracted-version-6/annotations/climb_12-climber_MrideEsteban-bloc_2-angle_profile.csv',
 '/Volumes/secondary-disk/extracted-version-6/annotations/climb_13-climber_FonneLana-bloc_2-angle_face.csv',
 '/Volumes/secondary-disk/extracted-version-6/annotations/climb_13-climber_FonneLana-bloc_2-angle_profile.csv',
 '/Volumes/secondary-disk/extracted-version-6/annotations/climb_14-climber_PlancheLeo-bloc_2-angle_face.csv',
 '/Volumes/secondary-disk/extracted-version-6/annotations/climb_14-climber_PlancheLeo-bloc_2-angle_profile.csv',
 '/Volumes/secondary-disk/extracted-version-6/annotations/climb_15-climber_ChatagonMael-bloc_2-angle_fac

In [69]:
full_videos_annotations_paths

annotation = pd.read_csv(full_videos_annotations_paths[0])

print(annotation)

      action  starting-timestamp  ending-timestamp
0    lecture                 0.0            9317.0
1   brossage              9317.0           24432.0
2   brossage             25392.0           35629.0
3    lecture             35630.0           54344.0
4     grimpe             54344.0          101770.0
5    lecture            111289.0          115807.0
6     chrono            115807.0          116566.0
7   brossage            116567.0          158475.0
8     chrono            158276.0          160315.0
9   brossage            162234.0          178429.0
10    chrono            178070.0          178829.0
11    chrono            195945.0          196904.0
12   lecture            197065.0          209341.0
13    grimpe            209341.0          247210.0


In [70]:
def analyze_annotations(file_paths):
   all_annotations = pd.concat([pd.read_csv(path) for path in file_paths])
   all_annotations['duration'] = all_annotations['ending-timestamp'] - all_annotations['starting-timestamp']
   all_annotations['frames'] = all_annotations['duration'] * 25 / 1000  # Convert ms to frames
   
   all_annotations['action'] = all_annotations['action'].str.capitalize()
   
   summary = all_annotations.groupby('action').agg({
       'action': 'count',
       'duration': ['mean', 'sum'],
       'frames': 'sum'
   }).reset_index()
   
   summary.columns = ['Class Name', 'Number of Segments', 'Average Segment Duration (s)', 'Total Duration (m)', 'Number of Frames']
   summary['Average Segment Duration (s)'] = (summary['Average Segment Duration (s)'] / 1000).round(2)
   summary['Total Duration (m)'] = (summary['Total Duration (m)'] / 60000).round(2)
   summary['Number of Frames'] = summary['Number of Frames'].round(0).astype(int)
   
   total_time = summary['Total Duration (m)'].sum()
   summary['Proportion'] = summary['Total Duration (m)'].apply(lambda x: f"{(x / total_time * 100).round(2)}\\%")
   
   total_row = pd.DataFrame({
       'Class Name': ['Total'],
       'Number of Segments': [summary['Number of Segments'].sum()],
       'Average Segment Duration (s)': [summary['Average Segment Duration (s)'].mean().round(2)],
       'Total Duration (m)': [total_time.round(2)],
       'Number of Frames': [summary['Number of Frames'].sum()],
       'Proportion': ['100.00\\%']
   })
   
   return pd.concat([summary, total_row], ignore_index=True)

In [71]:
summary_df = analyze_annotations(full_videos_annotations_paths)

summary_df

Unnamed: 0,Class Name,Number of Segments,Average Segment Duration (s),Total Duration (m),Number of Frames,Proportion
0,Brossage,34,18.84,10.68,16017,18.81\%
1,Chrono,74,1.73,2.13,3199,3.75\%
2,Grimpe,70,16.52,19.27,28903,33.94\%
3,Lecture,96,15.44,24.7,37054,43.5\%
4,Total,274,13.13,56.78,85173,100.00\%


In [72]:
summary_df_without_total = summary_df[:-1]  # Remove total row
new_row = pd.DataFrame([{
    'Class Name': '-',
    'Number of Segments': '-',
    'Average Segment Duration (s)': '-',
    'Total Duration (m)': '-',
    'Number of Frames': '-',
    'Proportion': '-'
}])
summary_df = pd.concat([summary_df_without_total, new_row, summary_df.tail(1)], ignore_index=True)

In [73]:
summary_df

Unnamed: 0,Class Name,Number of Segments,Average Segment Duration (s),Total Duration (m),Number of Frames,Proportion
0,Brossage,34,18.84,10.68,16017,18.81\%
1,Chrono,74,1.73,2.13,3199,3.75\%
2,Grimpe,70,16.52,19.27,28903,33.94\%
3,Lecture,96,15.44,24.7,37054,43.5\%
4,-,-,-,-,-,-
5,Total,274,13.13,56.78,85173,100.00\%


In [74]:
def latex_with_resize(df: pd.DataFrame, path):
    # numeric_cols = ['Average Segment Duration (s)', 'Total Duration (m)']
    # df[numeric_cols] = df[numeric_cols].round(2).apply(lambda x: x.map('{:05.2f}'.format))
    latex_content = df.to_latex(
        column_format='|l|l|l|l|l|l|',
        # float_format="%.2f",
        float_format='{:05.2f}'.format,
        escape=False,
        index=False,
    )
    
    # lines = latex_content.split('\n')
    # lines.insert(-4, '\\hline') 
    # latex_content = '\n'.join(lines)
    
    with open(path, 'w') as f:
        f.write('\\resizebox{\\textwidth}{!}{\n')
        f.write(latex_content)
        f.write('}')

In [75]:
latex_with_resize(summary_df, "report/assets/tables/dataset.summary.tex")

In [76]:
# summary_df.to_latex(
#     "report/assets/tables/dataset.summary.tex",
#     column_format='|l|l|l|l|l|',
#     # caption='Dataset Summary Statistics',
#     bold_rows=False,
#     escape=False,
#     index=False,
#     header=["\\multicolumn{1}{|c|}{\\textbf{Class Name}}", 
#             "\\multicolumn{1}{c|}{\\textbf{Number of Segments}}", 
#             "\\multicolumn{1}{c|}{\\textbf{Average Segment Duration (s)}}", 
#             "\\multicolumn{1}{c|}{\\textbf{Total Duration (m)}}", 
#             "\\multicolumn{1}{c|}{\\textbf{Proportion}}"]
# )

# # summary_df.to_csv("experiments/results/dataset.summary.csv", index=False)
# # summary_df.to_latex(
# #     "report/assets/tables/dataset.summary.tex",
# #     index=False,
# #     column_format='|l|l|l|l|l|',
# #     escape=False,
# #     header=[
# #         '\\textbf{Class Name}',
# #         '\\textbf{Number of Segments}', 
# #         '\\textbf{Average Segment Duration (s)}',
# #         '\\textbf{Total Duration (m)}',
# #         '\\textbf{Proportion}'
# #     ],
# #     columns=[
# #         'Class Name',
# #         'Number of Segments',
# #         'Average Segment Duration (s)',
# #         'Total Duration (m)',
# #         'Proportion'
# #     ]
# # )