In [1]:
import open3d as o3d
import os
import matplotlib.pyplot as plt
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import re
import os
import random
import csv

from mpl_toolkits.mplot3d import Axes3D


Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [2]:
# Folder containing PCD files
RAW_PCD_FOLDER = 'label_cloud_project/datastore/pointclouds/raw_pcds'
CROPPED_PCD_FOLDER = 'label_cloud_project/datastore/pointclouds/cropped_pcds'  
LABEL_CLOUD_LABEL_FOLDER = 'label_cloud_project/datastore/labels/label_cloud_labels'
YOLO_LABEL_FOLDER = 'label_cloud_project/datastore/labels/yolo_labels'  
BEV_IMAGE_FOLDER = 'label_cloud_project/datastore/images/birds_eye_view_images'

### Create Train/Test .csv's for YOLO model

In [7]:
def create_train_test_split(image_folder, label_folder, split_ratio, csv_dir):
    # Get the list of image and label filenames
    image_filenames = os.listdir(image_folder)
    label_filenames = os.listdir(label_folder)
    
    # Create a dictionary to map image base names to label files
    label_file_map = {}
    for label_file in label_filenames:
        base_name = label_file.split('.')[0]  # Assumes the common part is before the first dot
        label_file_map[base_name] = label_file

    # Shuffle the image filenames
    random.shuffle(image_filenames)
    
    # Calculate the split index
    split_index = int(len(image_filenames) * split_ratio)
    
    # Split the image filenames into train and test sets
    train_image_filenames = image_filenames[:split_index]
    test_image_filenames = image_filenames[split_index:]
    
    # Function to find the corresponding label file
    def find_label_file(image_file):
        base_name = image_file.split('.')[0]  # Assumes the common part is before the first dot
        return label_file_map.get(base_name, None)

    # Create the train.csv file
    train_csv_path = os.path.join(csv_dir, 'train.csv')
    with open(train_csv_path, 'w', newline='') as train_file:
        writer = csv.writer(train_file)
        for image_filename in train_image_filenames:
            label_filename = find_label_file(image_filename)
            if label_filename:
                writer.writerow([image_filename, label_filename])
    
    # Create the test.csv file
    test_csv_path = os.path.join(csv_dir, 'test.csv')
    with open(test_csv_path, 'w', newline='') as test_file:
        writer = csv.writer(test_file)
        for image_filename in test_image_filenames:
            label_filename = find_label_file(image_filename)
            if label_filename:
                writer.writerow([image_filename, label_filename])
# Example usage:
image_folder = 'label_cloud_project/processed_data_batches/batch_1/images'
label_folder = 'label_cloud_project/processed_data_batches/batch_1/labels'
split_ratio = 0.8
csv_directory = 'label_cloud_project/processed_data_batches/batch_1/splits'

image_folder = 'model_training/PASCAL_VOC/images/'
label_folder = 'model_training/PASCAL_VOC/labels/'
split_ratio = 0.8
csv_directory = 'model_training/PASCAL_VOC/'
create_train_test_split(image_folder, label_folder, split_ratio, csv_directory)

### Specific renaming / refactoring functions

In [None]:
def remove_one_zero(label_folder):
    for filename in os.listdir(label_folder):
        # Extract the number part from the filename
        number_part = filename.split('.')[0]

        # Remove the first '0' if it exists
        #if number_part.startswith('0'):
        #    number_part = number_part[1:]

        # Construct the new filename
        new_filename = f"{number_part}.json"

        # Full paths to the files
        original_file = os.path.join(label_folder, filename)
        new_file = os.path.join(label_folder, new_filename)
        print(new_file)
        
        # Rename the file
        os.rename(original_file, new_file)
        print(f"Renamed '{filename}' to '{new_filename}'")

remove_one_zero(LABEL_CLOUD_LABEL_FOLDER)
#clean_filenames(label_folder)

## Crop Point Clouds

In [None]:
def crop_point_cloud(pcd_path, distance_threshold):
    # Load PCD file
    point_cloud = o3d.io.read_point_cloud(pcd_path)
    
    # Convert point cloud to NumPy array
    points = np.asarray(point_cloud.points)
    
    # Calculate Euclidean distance from the origin
    distances = np.linalg.norm(points, axis=1)
    
    # Filter points within the distance threshold
    cropped_points = points[distances <= distance_threshold]
    
    # Create a new Open3D point cloud
    cropped_pcd = o3d.geometry.PointCloud()
    cropped_pcd.points = o3d.utility.Vector3dVector(cropped_points)
    
    return cropped_pcd



# Euclidean distance threshold 'd'
distance_threshold = 5.0  # Adjust this value as needed

# Ensure the output folder exists, if not, create it
os.makedirs(CROPPED_PCD_FOLDER, exist_ok=True)

# Crop all PCD files in the folder
for filename in os.listdir(RAW_PCD_FOLDER):
    print(filename)
    if filename.endswith(".pcd"):
        pcd_path = os.path.join(RAW_PCD_FOLDER, filename)
        cropped_pcd = crop_point_cloud(pcd_path, distance_threshold)
            
        # Save cropped PCD file to the specified output folder
        output_filename = os.path.join(CROPPED_PCD_FOLDER, f"{filename}")
        print(output_filename)
        o3d.io.write_point_cloud(output_filename, cropped_pcd)


## Get BEV images | Inference: leave output folder empty | Training: secifiy output folder to store images

In [None]:

from joblib import Parallel, delayed

# Function to generate bird's-eye view images and save to disk if output folder is provided
def generate_and_save_birds_eye_view(points, output_folder, filename, x_bins, y_bins, x_range, y_range, dpi=200):
    # Filter points
    mask = ((points[:, 0] >= x_range[0]) & (points[:, 0] <= x_range[1]) & 
            (points[:, 1] >= y_range[0]) & (points[:, 1] <= y_range[1]))
    points = points[mask]
    
    # Compute histogram
    hist, x_edges, y_edges = np.histogram2d(points[:, 0], points[:, 1], bins=(x_bins, y_bins))
    hist_normalized = np.log(hist + 1)
    
    # Generate a plot
    fig, ax = plt.subplots(figsize=(7, 7), dpi=dpi)
    ax.imshow(hist_normalized.T, cmap='viridis', origin='lower', extent=[x_range[0], x_range[1], y_range[0], y_range[1]])
    ax.axis('off')
    
    # Save the image to disk if output folder is provided
    if output_folder:
        # Ensure the output folder exists
        os.makedirs(output_folder, exist_ok=True)
        
        # Save the image to disk
        output_path = os.path.join(output_folder, os.path.splitext(filename)[0] + '.png')
        plt.savefig(output_path, bbox_inches='tight', pad_inches=0)
        result = f"Image saved at {output_path}"
    else:
        result = "Output folder not provided, image not saved."
    
    plt.close(fig)
    
    return result

# Function to handle each PCD file
def process_and_save_file(filename, folder_path, output_folder, x_bins, y_bins, x_range, y_range, dpi=200):
    pcd_path = os.path.join(folder_path, filename)
    point_cloud = o3d.io.read_point_cloud(pcd_path)
    points = np.asarray(point_cloud.points)
    
    # Generate and save the bird's-eye view image
    result = generate_and_save_birds_eye_view(points, output_folder, filename, x_bins, y_bins, x_range, y_range, dpi)
    return result

# Define bins and ranges
x_bins = np.linspace(-5, 5, 250)
y_bins = np.linspace(-5, 5, 250)
x_range = [-5, 5]
y_range = [-5, 5]

# Process and save images for all PCD files in the folder if output folder is provided
for filename in os.listdir(CROPPED_PCD_FOLDER):
    if filename.endswith(".pcd"):
        result = process_and_save_file(filename, CROPPED_PCD_FOLDER, BEV_IMAGE_FOLDER, x_bins, y_bins, x_range, y_range)
        print(result)


## Transform labelCloud labels into YOLO Format and store

In [None]:
import json

def lidar_to_yolo(label_file, image_width, image_height, x_range, y_range, x_bins, y_bins):
    # Read the JSON label file
    with open(label_file, 'r') as f:
        labels = json.load(f)

    # Calculate the size of each bin
    x_bin_size = (x_range[1] - x_range[0]) / x_bins
    y_bin_size = (y_range[1] - y_range[0]) / y_bins

    # Initialize an empty list to hold YOLO-formatted labels
    yolo_labels = []

    # Loop over each object in the label file
    for obj in labels['objects']:
        # Get the centroid coordinates
        centroid_x = obj['centroid']['x']
        centroid_y = -obj['centroid']['y']

        # Calculate the pixel position of the centroid
        x_pixel = int((centroid_x - x_range[0]) / x_bin_size)
        y_pixel = int((centroid_y - y_range[0]) / y_bin_size)

        # Convert dimensions to pixels
        width_pixel = int(obj['dimensions']['length'] / x_bin_size)
        height_pixel = int(obj['dimensions']['width'] / y_bin_size)

        # Normalize the values by the image size
        class_id = 0
        x_center_normalized = (x_pixel / image_width)
        y_center_normalized = (y_pixel / image_height)
        width_normalized = width_pixel / image_width
        height_normalized = height_pixel / image_height

        # Append to list in YOLO format
        yolo_labels.append((class_id, x_center_normalized, y_center_normalized, width_normalized, height_normalized))

    return yolo_labels

# Example usage

image_width = 250  # Replace with your actual image width
image_height = 250  # Replace with your actual image height
x_range = [-5, 5]  # Must match the range used when creating the histogram
y_range = [-5, 5]
x_bins = 250  # Must match the number of bins used when creating the histogram
y_bins = 250

for filename in os.listdir(LABEL_CLOUD_LABEL_FOLDER):
    if not '_classes' in filename:
        label_file = os.path.join(LABEL_CLOUD_LABEL_FOLDER, filename)
        yolo_labels_output_folder = YOLO_LABEL_FOLDER
        os.makedirs(yolo_labels_output_folder, exist_ok=True)
        try:
            yolo_labels = lidar_to_yolo(label_file, image_width, image_height, x_range, y_range, x_bins, y_bins)
        except Exception as e:
            print(f"Error processing {label_file}: {e}")
            continue
        output_file = os.path.join(yolo_labels_output_folder, os.path.splitext(filename)[0] + ".txt")
        
        # Write YOLO labels to the output .txt file
        with open(output_file, 'w') as f:
            for label in yolo_labels:
                f.write(' '.join(map(str, label)) + '\n')


## Plotting an example | BEV + YOLO Labels

In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image

# Load your BEV image
bev_image_path = "label_cloud_project/datastore/images/birds_eye_view_images/000001.png"
bev_image = Image.open(bev_image_path)

# Create a Matplotlib figure and axis for plotting
fig, ax = plt.subplots(1)

# Display the BEV image
ax.imshow(bev_image)

# Define a function to convert YOLO format to box coordinates (x, y, width, height)
def yolo_to_bbox(yolo_label, image_width, image_height):
    class_id, x_center, y_center, width, height = map(float, yolo_label.split())
    x1 = (x_center - width / 2) * image_width
    y1 = (y_center - height / 2) * image_height
    x2 = (x_center + width / 2) * image_width
    y2 = (y_center + height / 2) * image_height
    return x1, y1, x2, y2

# Load and process your YOLO labels
yolo_labels_path = "label_cloud_project/datastore/labels/yolo_labels/000001.txt"
with open(yolo_labels_path, 'r') as f:
    yolo_labels = f.readlines()

# Iterate through YOLO labels and plot the bounding boxes
for yolo_label in yolo_labels:
    x1, y1, x2, y2 = yolo_to_bbox(yolo_label, bev_image.width, bev_image.height)
    
    # Create a rectangle patch for the bounding box
    bbox = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor='r', facecolor='none')
    
    # Add the rectangle to the plot
    ax.add_patch(bbox)

# Show the plot
plt.axis('off')  # Turn off axis labels and ticks
plt.show()

In [None]:
def check_label_files(label_folder):
    for filename in os.listdir(label_folder):
        if filename.endswith(".txt"):
            with open(os.path.join(label_folder, filename), 'r') as file:
                lines = file.readlines()
                for line in lines:
                    parts = line.split()
                    # Skip the label part and check the coordinates
                    for part in parts[1:]:
                        value = float(part)
                        if value <= 0 or value > 1:
                            print(f"Invalid value {value} in file {filename}")
                            return False
    return True

check_label_files(YOLO_LABEL_FOLDER)

### Check if CUDA is installed before training

In [3]:
import torch
print(torch.cuda.is_available())

True


In [4]:
import os

def find_files_with_invalid_values(directory):
    """
    Find .txt files in the specified directory containing values outside the [0, 1] range.

    Args:
        directory (str): The directory to search for .txt files.

    Returns:
        None
    """
    # Check if the directory exists
    if not os.path.exists(directory):
        print(f"Directory '{directory}' does not exist.")
        return

    # Iterate through files in the directory
    for filename in os.listdir(directory):
        if filename.endswith(".txt"):
            file_path = os.path.join(directory, filename)

            # Open and read the file
            with open(file_path, 'r') as file:
                lines = file.readlines()

            # Check for invalid values in each line
            for line_number, line in enumerate(lines, start=1):
                values = line.strip().split()
                for value in values:
                    try:
                        float_value = float(value)
                        print(float_value)
                        if float_value > 1.0 or float_value < 0.0:
                            print(f"Invalid value ({float_value}) found in file: {filename}, line {line_number}")
                            break  # Stop checking the line once an invalid value is found
                    except ValueError:
                        pass  # Ignore non-float values
# Example usage:
directory_to_search = "model_training_data\datasets\BEV_DATASET_MORE_CONES\labels"
find_files_with_invalid_values(directory_to_search)

0.0
0.424
0.432
0.024
0.024
0.0
0.612
0.436
0.024
0.024
0.0
0.62
0.34
0.024
0.024
0.0
0.432
0.372
0.024
0.024
0.0
0.42
0.284
0.024
0.024
0.0
0.58
0.2
0.024
0.024
0.0
0.292
0.156
0.024
0.024
0.0
0.228
0.072
0.024
0.024
0.0
0.504
0.108
0.024
0.024
0.0
0.428
0.432
0.024
0.024
0.0
0.432
0.372
0.024
0.024
0.0
0.424
0.292
0.024
0.024
0.0
0.616
0.44
0.024
0.024
0.0
0.62
0.34
0.024
0.024
0.0
0.576
0.212
0.024
0.024
0.0
0.284
0.152
0.024
0.024
0.0
0.228
0.076
0.024
0.024
0.0
0.504
0.108
0.024
0.024
0.0
0.424
0.428
0.024
0.024
0.0
0.428
0.376
0.024
0.024
0.0
0.612
0.436
0.024
0.024
0.0
0.62
0.336
0.024
0.024
0.0
0.428
0.288
0.024
0.024
0.0
0.576
0.208
0.024
0.024
0.0
0.288
0.156
0.024
0.024
0.0
0.224
0.08
0.024
0.024
0.0
0.492
0.124
0.024
0.024
0.0
0.42
0.024
0.024
0.024
0.0
0.432
0.428
0.024
0.024
0.0
0.432
0.376
0.024
0.024
0.0
0.428
0.292
0.024
0.024
0.0
0.616
0.432
0.024
0.024
0.0
0.62
0.336
0.024
0.024
0.0
0.576
0.212
0.024
0.024
0.0
0.292
0.156
0.024
0.024
0.0
0.496
0.116
0.024
0.024
0.0
0

In [3]:
import json
import os

def process_json_files(directory):
    for filename in os.listdir(directory):
        if filename.endswith(".json"):
            filepath = os.path.join(directory, filename)
            with open(filepath, 'r') as file:
                data = json.load(file)
            
            # Count the number of cone objects
            cone_count = sum(1 for obj in data['objects'] if obj['name'] == 'cone')
            
            # Remove cone objects from the end until only 6 cone objects remain
            while cone_count > 6:
                last_object = data['objects'][-1]
                if last_object['name'] == 'cone':
                    data['objects'].pop()
                    cone_count -= 1
                else:
                    break
            
            # Write the modified JSON back to the file
            with open(filepath, 'w') as file:
                json.dump(data, file, indent=4)

# Example usage:
directory = "label_cloud_project\datastore\labels\label_cloud_labels_testing"
process_json_files(directory)
