In [27]:
import numpy as np
import os

In [28]:
# Function to read obj file and compute the number of vertices
def read_obj_file(file_path):
    vertices = []
    with open(file_path, 'r') as file:
        for line in file:
            if line.startswith('v '):
                components = line.split()[1:]
                vertex = [float(comp) for comp in components]
                vertices.append(vertex)

    return np.array(vertices)

# Sample
sample_cad_model = read_obj_file("/home/hrishi/Programming/Python/CAD-DR/abc-dataset/00030037_0ef34aa1b15748a5b4ad7c0e_trimesh_027.obj")
print(sample_cad_model)
print(sample_cad_model.shape)

[[  8.6       -0.        -6.25    ]
 [ -8.6       -0.        -6.25    ]
 [  8.6      -10.        -6.25    ]
 ...
 [ -2.526908  -5.088639   6.25    ]
 [  3.918603  -4.431913   6.25    ]
 [ -3.283615  -5.150998   6.25    ]]
(22323, 3)


In [29]:
import os

directory_path = '/home/hrishi/Programming/Python/CAD-DR/abc-dataset/'
file_paths = [os.path.join(directory_path, filename) for filename in os.listdir(directory_path) if os.path.isfile(os.path.join(directory_path, filename))]
print(len(file_paths))
print(file_paths[0])

60
/home/hrishi/Programming/Python/CAD-DR/abc-dataset/00030095_8ff08438f6684d8697a877b5_trimesh_000.obj


In [30]:
import pandas as pd

dataset = []
for i in file_paths:
    data = read_obj_file(str(i))
    data = pd.DataFrame(data)
    dataset.append(data)

print(dataset)
print(len(dataset))

[               0          1          2
0      10.673704  -7.620000 -31.138974
1      11.713963  -6.350000 -30.410440
2      20.479232  -6.350000 -25.349810
3      21.630290  -7.620000 -24.813186
4      20.274409  -6.350000 -24.956307
...          ...        ...        ...
10373  -0.442543 -18.123491 -12.644925
10374  -9.110233 -18.231166  -8.729054
10375  -3.033986 -18.221689 -12.250611
10376  -1.450289 -18.113988 -12.571921
10377 -10.164951 -18.115557  -7.537868

[10378 rows x 3 columns],                 0         1       2
0        8.950581 -6.249566  0.0000
1        8.565653 -6.489366  0.0000
2        8.950581 -6.249566  0.1000
3        8.565653 -6.489366  0.1000
4        9.211812 -7.493056  0.0000
...           ...       ...     ...
426744  25.196964 -2.248878  0.0357
426745  26.411565 -3.674364  0.0357
426746  24.639435 -3.674364  0.0357
426747  26.411565 -2.216136  0.0357
426748  24.639435 -2.216136  0.0357

[426749 rows x 3 columns],                0          1          2
0    

In [31]:
# Finding the number of vertices of each model in the dataset
max_vertices = 0
min_vertices = 1000000
for filename in os.listdir("abc-dataset"):
    vertices = len(read_obj_file("abc-dataset/" + str(filename)))
    # print(vertices) - can be used if we wish to see number of vertices of each model
    if (vertices > max_vertices):
        max_vertices = vertices
    if (vertices < min_vertices):
        min_vertices = vertices

In [32]:
# Maximum number of vertices
print(max_vertices)
print(min_vertices)
print(dataset[0])

635732
1950
               0          1          2
0      10.673704  -7.620000 -31.138974
1      11.713963  -6.350000 -30.410440
2      20.479232  -6.350000 -25.349810
3      21.630290  -7.620000 -24.813186
4      20.274409  -6.350000 -24.956307
...          ...        ...        ...
10373  -0.442543 -18.123491 -12.644925
10374  -9.110233 -18.231166  -8.729054
10375  -3.033986 -18.221689 -12.250611
10376  -1.450289 -18.113988 -12.571921
10377 -10.164951 -18.115557  -7.537868

[10378 rows x 3 columns]


In [34]:
import tensorflow as tf

if tf.test.is_gpu_available():
    print("GPU is available and being used by TensorFlow.")

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
GPU is available and being used by TensorFlow.


2023-10-08 12:35:59.905701: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-10-08 12:35:59.906657: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-10-08 12:35:59.907356: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

In [40]:
import numpy as np
import pandas as pd
import tensorflow as tf
from keras.layers import Input, Dense
from keras.models import Model
import matplotlib.pyplot as plt

# Assuming you have a list of Pandas DataFrames named 'dataframes'
# Convert them to a single NumPy array
dataframes = dataset  # Example: 10 DataFrames

# Combine the DataFrames into a single NumPy array by stacking them vertically
data = np.vstack([df.values for df in dataframes])

# Define the desired number of dimensions (features) you want to reduce to
desired_dimensions = 1900  # Adjust this as needed

# Define the number of encoding and decoding layers
num_encoding_layers = 50  # Adjust this as needed
num_decoding_layers = 50  # Adjust this as needed

# Build the autoencoder model
input_layer = Input(shape=(data.shape[1],))
encoded = input_layer

# Encoding layers
for _ in range(num_encoding_layers):
    encoded = Dense(512, activation='relu')(encoded)  # Adjust the number of units as needed

# Bottleneck layer (latent space)
encoded = Dense(desired_dimensions, activation='relu')(encoded)

# Decoding layers
decoded = encoded
for _ in range(num_decoding_layers):
    decoded = Dense(512, activation='relu')(decoded)  # Adjust the number of units as needed

# Output layer
decoded = Dense(data.shape[1], activation='linear')(decoded)  # Linear activation for reconstruction

# Create the autoencoder model
autoencoder = Model(input_layer, decoded)

# Compile the model
autoencoder.compile(optimizer='adam', loss='mse')  # Mean Squared Error (MSE) loss

# Train the autoencoder with the combined data
autoencoder.fit(data, data, epochs=10, batch_size=15)  # Adjust epochs and batch size as needed


Epoch 1/10
