In [1]:
from pathlib import Path
import pandas as pd
import re

In [2]:
# VERSION SETTING
version_dict = {
    "original" : ["kd_tree", "angular"],
    "modified" : ["01_ring", "02_xyz", "03_group8", "03_group4", "03_group2"]
}

selected_type = "modified"
version = version_dict[selected_type][2]

config_label = "test"
version = f"{version}_{config_label}"
print(f"Selected version: {version}")

Selected version: 03_group8_test


In [3]:
def parse(file_path):
    rows = []
    path_obj = Path(file_path)
    filename = path_obj.stem # Without suffix

    # 1. Parse the scene name and frame index from the file name.
    # scene-0000_00.log
    name_match = re.match(r"scene-(\d+)_(\d+)", filename)
    if name_match:
        scene_name = f"scene-{name_match.group(1)}"
        frame_index = int(name_match.group(2))
    else:
        scene_name = "unknown"
        frame_index = -1
    
    # 2. Patterns inside the log file.
    patterns = {
        'slice_number': r"Slice number:\s*(\d+)",
        'positions_bitstream_size_bytes': r"positions bitstream size\s*(\d+)\s*B",
        'positions_bitstream_size_bpp': r"positions bitstream size.*?\((\d+\.\d+)\s*bpp\)",
        'positions_processing_time_user': r"positions processing time \(user\):\s*(\d+\.\d+)\s*s",
        'total_frame_size_bytes': r"Total frame size\s*(\d+)\s*B",
        'total_bitstream_size_bytes': r"Total bitstream size\s*(\d+)\s*B",
        'processing_time_wall': r"Processing time \(wall\):\s*(\d+\.\d+)\s*s",
        'processing_time_user': r"Processing time \(user\):\s*(\d+\.\d+)\s*s",
    }

    try:
        with open(file_path, 'r') as f:
            content = f.read()
        
        # 1. Find the index of "Slice number:"
        slice_starts = [m.start() for m in re.finditer(r"Slice number:", content)]

        # 2. Iterate through the slice blocks
        for i, start_idx in enumerate(slice_starts):
            #. From the start index to the end index until next slice block starts
            end_idx = slice_starts[i+1] if i + 1 < len(slice_starts) else len(content)
            block = content[start_idx:end_idx]

            row = {
                'scene_name': scene_name,
                'frame_index': frame_index
            }

            for key, pattern in patterns.items():
                match = re.search(pattern, block)
                if match:
                    val = match.group(1)
                    if '.' in val:
                        row[key] = float(val)
                    else:
                        row[key] = int(val)
            
            rows.append(row)

    except FileNotFoundError:
        print(f"File not found: {file_path}")
        return []
    
    return rows

In [4]:
experiment_dir = Path("/home/noh/pgc/experiments")
csv_dir = experiment_dir / "csv"
csv_dir.mkdir(exist_ok=True)

scene_dirs = experiment_dir / version

print(f"Search logs for each scenes in: {scene_dirs}")
print(f"Save result to csv in: {csv_dir}")

Search logs for each scenes in: /home/noh/pgc/experiments/03_group8_test
Save result to csv in: /home/noh/pgc/experiments/csv


In [5]:
all_data = []
parse_count = 0

for scene_dir in scene_dirs.iterdir():
    print(f"[Start searching {scene_dir}]")
    if scene_dir.is_dir():
        log_files = sorted(scene_dir.glob("*.log"))
        
        for log_file in log_files:
            file_data = parse(log_file)
            parse_count += 1
            print(f"    >> Finished parsing {log_file}")
            if file_data:
                all_data.extend(file_data)
        print("\n")

df_all_data = pd.DataFrame(all_data)

if not df_all_data.empty:
    df_all_data = df_all_data.sort_values(by=['scene_name', 'frame_index']).reset_index(drop=True)
    df_all_data = df_all_data.round(3)

print(f"[Finished parsing {parse_count} files]")



[Start searching /home/noh/pgc/experiments/03_group8_test/scene-0916]
    >> Finished parsing /home/noh/pgc/experiments/03_group8_test/scene-0916/scene-0916_00.log
    >> Finished parsing /home/noh/pgc/experiments/03_group8_test/scene-0916/scene-0916_01.log
    >> Finished parsing /home/noh/pgc/experiments/03_group8_test/scene-0916/scene-0916_02.log
    >> Finished parsing /home/noh/pgc/experiments/03_group8_test/scene-0916/scene-0916_03.log
    >> Finished parsing /home/noh/pgc/experiments/03_group8_test/scene-0916/scene-0916_04.log
    >> Finished parsing /home/noh/pgc/experiments/03_group8_test/scene-0916/scene-0916_05.log
    >> Finished parsing /home/noh/pgc/experiments/03_group8_test/scene-0916/scene-0916_06.log
    >> Finished parsing /home/noh/pgc/experiments/03_group8_test/scene-0916/scene-0916_07.log
    >> Finished parsing /home/noh/pgc/experiments/03_group8_test/scene-0916/scene-0916_08.log
    >> Finished parsing /home/noh/pgc/experiments/03_group8_test/scene-0916/scene-09

In [7]:
file_name = f"all_data_{version}.csv"
csv_path = csv_dir / file_name

df_all_data.to_csv(csv_path, index=False)

print(f"Successfully saved to: {csv_path}")

Successfully saved to: /home/noh/pgc/experiments/csv/all_data_03_group8_test.csv


In [6]:
import matplotlib.pyplot as plt

df_scene_avg = (
    df_all_data.groupby(['scene_name'])
    .mean(numeric_only=True)
    .drop(columns=['frame_index', 'slice_number'])
    .reset_index()
    .round(3)
)
df_scene_avg

Unnamed: 0,scene_name,positions_bitstream_size_bytes,positions_bitstream_size_bpp,positions_processing_time_user,total_frame_size_bytes,total_bitstream_size_bytes,processing_time_wall,processing_time_user
0,scene-0061,46764.462,10.775,0.221,46845.462,46845.462,0.327,0.239
1,scene-0103,41822.5,9.636,0.208,41903.5,41903.5,0.306,0.226
2,scene-0553,37835.878,8.72,0.18,37916.878,37916.878,0.274,0.197
3,scene-0655,42294.805,9.748,0.2,42375.805,42375.805,0.297,0.218
4,scene-0757,42307.122,9.748,0.216,42388.122,42388.122,0.321,0.235
5,scene-0796,51041.2,11.758,0.226,51122.2,51122.2,0.331,0.245
6,scene-0916,49618.854,11.433,0.197,49699.805,49699.805,0.287,0.215
7,scene-1077,47785.634,11.009,0.223,47866.634,47866.634,0.331,0.244
8,scene-1094,49467.9,11.4,0.226,49548.9,49548.9,0.329,0.246
9,scene-1100,52122.25,12.012,0.402,52203.25,52203.25,0.569,0.436
