In [1]:
import os
import polars as pl
import numpy as np

from glob import glob
from tqdm import tqdm

In [2]:
mesh_500_analysis_dir = '/media/koi/Expansion/data/mesh_500_analysis'
objarverse_analysis_dir = '/media/koi/Expansion/data/objarverse_analysis'
shapenet_analysis_dir = '/media/koi/Expansion/data/shapenet_analysis'

In [3]:
mesh_500_csvs = glob(os.path.join(mesh_500_analysis_dir, '*.csv'))
print(f'Found {len(mesh_500_csvs)} csv files in mesh_500_analysis_dir')

Found 2 csv files in mesh_500_analysis_dir


In [4]:
objarverse_csvs = glob(os.path.join(objarverse_analysis_dir, '*/*/*.csv'))
print(f'Found {len(objarverse_csvs)} csv files in objarverse_analysis_dir')

Found 1353 csv files in objarverse_analysis_dir


In [5]:
shapenet_csvs = glob(os.path.join(shapenet_analysis_dir, '*.csv'))
print(f'Found {len(shapenet_csvs)} csv files in shapenet_analysis_dir')

Found 1 csv files in shapenet_analysis_dir


In [6]:
csvs = mesh_500_csvs + objarverse_csvs + shapenet_csvs
print(f'Total csv files to process: {len(csvs)}')

Total csv files to process: 1356


In [7]:
# Read and merge all CSV files
dataframes = []
for csv_file in tqdm(csvs):
    df = pl.read_csv(csv_file)
    dataframes.append(df)
merged_df = pl.concat(dataframes)

print(f'Merged DataFrame has {merged_df.height} rows and {merged_df.width} columns.')

100%|███████████████████████████████████████████████████████████████████████████| 1356/1356 [00:00<00:00, 3312.51it/s]

Merged DataFrame has 969798 rows and 16 columns.





In [8]:
merged_df.columns

['filename',
 'num_vertices',
 'num_faces',
 'num_internal_edges',
 'num_boundary_edges',
 'min_connectivity',
 'max_connectivity',
 'avg_connectivity',
 'num_components',
 'smallest_component_vertices',
 'smallest_component_faces',
 'largest_component_vertices',
 'largest_component_faces',
 'mesh_volume',
 'mesh_area',
 'file_path']

In [9]:
# Filter all rows where 'mesh_volume' > 0.001 and 'mesh_area' > 0.001
vf_filtered_df = merged_df.filter(pl.col('mesh_area') > 0.001)
print(f'Filtered DataFrame has {vf_filtered_df.height} rows.')

Filtered DataFrame has 969319 rows.


In [10]:
# Filter all rows where 'num_vertices' > 100 and 'num_vertices' < 512
vertex_filtered_df = vf_filtered_df.filter(pl.col('num_vertices') < 512)
print(f'Vertex Filtered DataFrame has {vertex_filtered_df.height} rows.')

Vertex Filtered DataFrame has 586462 rows.


In [11]:
# Filter all rows where 'max_connectivity' <= 8
mac_filtered_df = vertex_filtered_df.filter(pl.col('max_connectivity') <= 8)
print(f'Connectiivity Filtered DataFrame has {mac_filtered_df.height} rows.')

Connectiivity Filtered DataFrame has 210169 rows.


In [12]:
# Filter all rows where 'min_connectivity' >= 2
mic_filtered_df = mac_filtered_df.filter(pl.col('min_connectivity') >= 2)
print(f'Connectivity Filtered DataFrame has {mic_filtered_df.height} rows.')

Connectivity Filtered DataFrame has 210168 rows.


In [13]:
com_filtered_df = mic_filtered_df.filter(pl.col('num_components') <= 10)
print(f'Component Filtered DataFrame has {com_filtered_df.height} rows.')

Component Filtered DataFrame has 195775 rows.
