In [2]:
import os
import polars as pl
import numpy as np

from glob import glob
from tqdm import tqdm

In [3]:
mesh_500_analysis_dir = '/media/koi/Expansion/data/mesh_500_analysis'
objarverse_analysis_dir = '/media/koi/Expansion/data/objarverse_analysis'

In [5]:
mesh_500_csvs = glob(os.path.join(mesh_500_analysis_dir, '*.csv'))
print(f'Found {len(mesh_500_csvs)} csv files in mesh_500_analysis_dir')

Found 2 csv files in mesh_500_analysis_dir


In [7]:
objarverse_csvs = glob(os.path.join(objarverse_analysis_dir, '*/*/*.csv'))
print(f'Found {len(objarverse_csvs)} csv files in objarverse_analysis_dir')

Found 220 csv files in objarverse_analysis_dir


In [8]:
csvs = mesh_500_csvs + objarverse_csvs
print(f'Total csv files to process: {len(csvs)}')

Total csv files to process: 222


In [9]:
# Read and merge all CSV files
dataframes = []
for csv_file in tqdm(csvs):
    df = pl.read_csv(csv_file)
    dataframes.append(df)
merged_df = pl.concat(dataframes)

print(f'Merged DataFrame has {merged_df.height} rows and {merged_df.width} columns.')

Merged DataFrame has 259380 rows and 16 columns.


In [10]:
merged_df.head()

filename,num_vertices,num_faces,num_internal_edges,num_boundary_edges,min_connectivity,max_connectivity,avg_connectivity,num_components,smallest_component_vertices,smallest_component_faces,largest_component_vertices,largest_component_faces,mesh_volume,mesh_area,file_path
str,i64,i64,i64,i64,i64,i64,f64,i64,i64,i64,i64,i64,f64,f64,str
"""076_256f61e812734f0b8ba77ccd2a…",46,88,132,0,3,9,5.73913,1,46,88,46,88,0.028892,0.636182,"""/mnt/clean/mesh_500_clean/trai…"
"""054_0602da67f463429289caa9147b…",20,34,49,4,3,10,5.3,1,20,36,20,36,0.0,0.618794,"""/mnt/clean/mesh_500_clean/trai…"
"""003_eca381f3d9dc45daa92b043ae2…",80,142,207,12,4,14,5.475,3,12,20,34,64,0.0,1.142529,"""/mnt/clean/mesh_500_clean/trai…"
"""094_53b7928e7739438c85c0f692b7…",123,242,363,0,3,10,5.902439,1,123,242,123,242,0.30359,2.426761,"""/mnt/clean/mesh_500_clean/trai…"
"""015_5f690cdf8e0743b7bddbd87eab…",20,36,54,0,4,6,5.4,1,20,36,20,36,0.071738,2.225983,"""/mnt/clean/mesh_500_clean/trai…"


In [16]:
# Filter all rows where 'mesh_volume' > 0.001 and 'mesh_area' > 0.001
vf_filtered_df = merged_df.filter((pl.col('mesh_volume') > 0.001) & (pl.col('mesh_area') > 0.001))
print(f'Filtered DataFrame has {vf_filtered_df.height} rows.')

Filtered DataFrame has 85399 rows.


In [24]:
# Filter all rows where 'num_vertices' > 100 and 'num_vertices' < 512
vertex_filtered_df = vf_filtered_df.filter((pl.col('num_vertices') > 100) & (pl.col('num_vertices') < 1024))
print(f'Vertex Filtered DataFrame has {vertex_filtered_df.height} rows.')

Vertex Filtered DataFrame has 26735 rows.
