In [18]:
%load_ext autoreload
%autoreload 2
import os
from pathlib import Path
import numpy as np
import pandas as pd
import pyvista as pv
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KDTree
from tqdm.auto import tqdm
from downsample import random_downsample_points

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
root_dir = Path("/home/max/Desktop/python_projects/SimJeb/")
metadata_dir = root_dir / "SimJEB_metadata"
meshes_dir = root_dir / "SimJEB_volmesh"

In [3]:
df = pd.read_csv(metadata_dir / "all_bracket_metadata.csv")
with open(metadata_dir / "outliers_interfaces.txt") as f:
    outliers = f.readlines()
outliers = set([int(e.strip()) for e in outliers])

In [4]:
df.head()

Unnamed: 0,id,num_vertices,num_faces,volume,surface_area,average_edge_length,genus,max_ver_xdisp,max_ver_ydisp,max_ver_zdisp,...,category,num_tets,mass,download_file,link_name,author,author_id,test_split_0,test_split_1,test_split_2
0,0,42360,84764,307642.703591,73622.298369,1.347077,12.0,0.176923,0.073638,0.288827,...,block,570111,1.375163,FINAL BRACKET AARON WEISSBART 8.9.2013 1930 pa...,ge-bracket-001-1,Aaron Weissbart,aaron.weissbart,False,True,False
1,4,35622,71368,104015.598723,39149.667547,1.048514,32.0,0.687567,0.426113,1.059176,...,beam,431759,0.46495,bracket.STEP,ripple-bracket-1,simon,simon-240,False,False,False
2,6,35186,70532,114996.439753,58499.675375,1.30674,41.0,1.180213,2.789576,3.713256,...,block,314815,0.514034,10813 new GE bracket.IGS,ge-bracket-26,Amartesh Sehgal,amartesh.sehgal-1,False,False,False
3,8,24577,49174,89326.268569,30458.408764,1.130023,6.0,1.044753,0.299482,1.285331,...,beam,293971,0.399288,bracket_15.igs,ge-engine-bracket-15-1,Mandli Peter,mandli.peter,False,False,False
4,9,38322,76720,78486.321312,38243.142004,0.989507,20.0,1.391756,0.360266,1.927059,...,beam,481030,0.350834,EngineBracketModified_PTJ.igs,modified-engine-bracket-2,Penn,penn-1,False,False,False


In [5]:
df = df[~df["id"].isin(outliers)]

In [6]:
df_train, df_test = train_test_split(df, stratify=df["category"])

In [7]:
df_train.to_csv(metadata_dir / "train_bracket_metadata.csv")
df_test.to_csv(metadata_dir / "test_bracket_metadata.csv")

In [8]:
df_train["category"].unique()

array(['flat', 'block', 'butterfly', 'arch', 'beam', 'other'],
      dtype=object)

In [9]:
df_test["category"].unique()

array(['flat', 'arch', 'butterfly', 'block', 'beam', 'other'],
      dtype=object)

In [10]:
cat_counts = df_train["category"].value_counts()

In [11]:
min_cat_count = min([v for _, v in cat_counts.items()])

In [12]:
min_cat_count

4

In [13]:
cat_to_points = {c: [] for c in df_train["category"].unique()}
for id_, cat in tqdm(zip(df_train["id"], df_train["category"])):
    mesh = pv.read(meshes_dir / f"{str(id_)}.vtk")
    cat_to_points[cat].append(mesh.points)
cat_to_points = {c: np.vstack(p) for c, p in cat_to_points.items()}

0it [00:00, ?it/s]

In [14]:
for c, pts in cat_to_points.items():
    cat_to_points[c] = random_downsample_points(
        pts, 
        int(min_cat_count * len(pts) / cat_counts[c])
    )

In [15]:
pts = pv.PolyData(np.vstack(
    [p for c, p in cat_to_points.items()]
))

In [17]:
len(pts.points)

2106278

In [20]:
kdt = KDTree(pts.points)
dd, ii = kdt.query(pts.points, k=100)
nd = np.mean(dd[:,1:], axis=1)
pts["neighbours_distance"] = nd

In [21]:
pts.save(meshes_dir / "train_volmesh_points_density.vtk")