In [55]:
import tensorflow as tf
import numpy as np
import math
import sys
import os
import keras
import tf_util
import pandas as pd
import random

In [56]:
def placeholder_inputs(batch_size, num_point):
    pointclouds_pl = tf.compat.v1.placeholder(tf.float32, shape=(batch_size, num_point, 3))
    labels_pl = tf.compat.v1.placeholder(tf.int32, shape=(batch_size))
    return pointclouds_pl, labels_pl


In [57]:
def get_model(point_cloud, is_training, bn_decay=None):
    """ Classification PointNet, input is BxNx3, output Bx40 """
    batch_size = point_cloud.get_shape()[0]
    num_point = point_cloud.get_shape()[1]
    end_points = {}
    input_image = tf.expand_dims(point_cloud, -1)
    
    # Point functions (MLP implemented as conv2d)
    net = tf_util.conv2d(input_image, 64, [1,3],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv1', bn_decay=bn_decay)
    net = tf_util.conv2d(net, 64, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv2', bn_decay=bn_decay)
    net = tf_util.conv2d(net, 64, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv3', bn_decay=bn_decay)
    net = tf_util.conv2d(net, 128, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv4', bn_decay=bn_decay)
    net = tf_util.conv2d(net, 1024, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv5', bn_decay=bn_decay)

    # Symmetric function: max pooling
    net = tf_util.max_pool2d(net, [num_point,1],
                             padding='VALID', scope='maxpool')
    
    # MLP on global point cloud vector
    net = tf.reshape(net, [batch_size, -1])
    net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training,
                                  scope='fc1', bn_decay=bn_decay)
    net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training,
                                  scope='fc2', bn_decay=bn_decay)
    net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training,
                          scope='dp1')
    net = tf_util.fully_connected(net, 40, activation_fn=None, scope='fc3')

    return net, end_points

In [58]:
def get_loss(pred, label, end_points):
    """ pred: B*NUM_CLASSES,
        label: B, """
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=label)
    classify_loss = tf.reduce_mean(loss)
    tf.summary.scalar('classify loss', classify_loss)
    return classify_loss

In [59]:
# if __name__=='__main__':
#     with tf.Graph().as_default():
#         inputs = tf.zeros((32,1024,3))
#         outputs = get_model(inputs, tf.constant(True))
#         print(outputs)

### Data Dealing:

In [60]:
dataset_directory = '/Users/anishayyagari/Documents/tester/pointnet implementation/data'

In [61]:
train_data = []
test_data = []
train_labels = []
test_labels = []

In [62]:
test_split_percentage = 20

In [63]:
for folder in os.listdir(dataset_directory):
    folder_path = os.path.join(dataset_directory, folder)
    
    if os.path.isdir(folder_path):
        # List all CSV files in the folder
        csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
        
        # Shuffle the list of CSV files
        random.shuffle(csv_files)
        
        # Calculate the number of files to use for testing
        num_test_samples = int(len(csv_files) * (test_split_percentage / 100.0))
        
        # Ensure that we have enough data for both train and test sets
        if num_test_samples >= len(csv_files):
            continue  # Skip this class if there's not enough data
        
        # Split the files into train and test
        train_files = csv_files[num_test_samples:]
        test_files = csv_files[:num_test_samples]
        
        # Load and append the data from each CSV file to the respective lists
        for file_name in train_files:
            file_path = os.path.join(folder_path, file_name)
            data = pd.read_csv(file_path)
            train_data.append(data)
            train_labels.append(folder)  # Append the folder name as the class label
        
        for file_name in test_files:
            file_path = os.path.join(folder_path, file_name)
            data = pd.read_csv(file_path)
            test_data.append(data)
            test_labels.append(folder) 

In [64]:
if len(train_data) == 0 or len(test_data) == 0:
    print("Not enough data for training or testing.")
else:
    # Concatenate the data into train and test DataFrames
    train_df = pd.concat(train_data, ignore_index=True)
    test_df = pd.concat(test_data, ignore_index=True)

    # Extract unique class labels
    class_labels = list(set(train_labels + test_labels))
    
    # Convert class labels to numerical values
    class_to_id = {label: i for i, label in enumerate(class_labels)}
    train_labels = [class_to_id[label] for label in train_labels]
    test_labels = [class_to_id[label] for label in test_labels]
    train_df.to_csv('train_data.csv', index=False)
    test_df.to_csv('test_data.csv', index=False)
    X_train = train_df.to_numpy(dtype=np.float32)
    Y_train = np.array(train_labels, dtype=np.int32)
    X_test = test_df.to_numpy(dtype=np.float32)
    Y_test = np.array(test_labels, dtype=np.int32)

    # Convert NumPy arrays to TensorFlow tensors
    X_train_tensor = tf.constant(X_train)
    Y_train_tensor = tf.constant(Y_train)
    X_test_tensor = tf.constant(X_test)
    Y_test_tensor = tf.constant(Y_test)
    print("X_train tensor shape:", X_train_tensor.shape)
    print("Y_train tensor shape:", Y_train_tensor.shape)
    print("X_test tensor shape:", X_test_tensor.shape)
    print("Y_test tensor shape:", Y_test_tensor.shape)

X_train tensor shape: (171183, 4)
Y_train tensor shape: (293,)
X_test tensor shape: (42109, 4)
Y_test tensor shape: (71,)


In [65]:
batch_size = 32
num_points = 1024

In [66]:
tf.compat.v1.disable_eager_execution()

In [67]:
pointclouds_pl, labels_pl = placeholder_inputs(batch_size, num_points)

In [68]:
is_training = tf.constant(True)  # Set to True for training, False for inference
pred, end_points = get_model(pointclouds_pl, is_training)

In [69]:
learning_rate = 0.001  # Adjust this as needed
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate)
train_op = optimizer.minimize(get_loss(pred, labels_pl, end_points))

In [70]:
sess = tf.compat.v1.Session()
sess.run(tf.compat.v1.global_variables_initializer())

In [71]:
num_epochs = 10  # Adjust the number of epochs as needed
num_batches = len(X_train) // batch_size

In [72]:
for epoch in range(num_epochs):
    # Shuffle the training data at the start of each epoch
#     shuffled_indices = np.arange(len(X_train))
#     np.random.shuffle(shuffled_indices)
#     X_train_shuffled = X_train[shuffled_indices]
#     Y_train_shuffled = Y_train[shuffled_indices]

    for batch_idx in range(num_batches):
        start_idx = batch_idx * batch_size
        end_idx = (batch_idx + 1) * batch_size

        print("Batch:", batch_idx)
        print("Start index:", start_idx)
        print("End index:", end_idx)

        # Ensure that end_idx does not exceed the size of your data
        if end_idx > len(X_train):
            end_idx = len(X_train)

        batch_data = X_train[start_idx:end_idx]
        batch_labels = Y_train[start_idx:end_idx]

        print("Batch data shape:", batch_data.shape)
        print("Batch labels shape:", batch_labels.shape)

        # Feed batch data into the model and run the training operation
        feed_dict = {
            pointclouds_pl: batch_data,
            labels_pl: batch_labels,
            is_training: True  # Set to True for training
        }
        _, loss_val = sess.run([train_op, get_loss(pred, labels_pl, end_points)], feed_dict=feed_dict)

        # Print loss for monitoring
        print(f"Epoch {epoch+1}/{num_epochs}, Batch {batch_idx+1}/{num_batches}, Loss: {loss_val:.4f}")

# Training complete
print("Training finished.")

# Optionally, save the trained model
saver = tf.compat.v1.train.Saver()
saver.save(sess, 'pointnet_model')

# Don't forget to close the session when done
sess.close()







Batch: 0
Start index: 0
End index: 32
Batch data shape: (32, 4)
Batch labels shape: (32,)


ValueError: Cannot feed value of shape (32, 4) for Tensor Placeholder_6:0, which has shape (32, 1024, 4)