# Filter each stationary frame

In [185]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import json
import os
import itertools
from mpl_toolkits.mplot3d import Axes3D
from pathlib import Path

## Constants and such

In [187]:
# Set background map resolutions
azimuth_resolution = 1
height_resolution = 0.5
std_dev_cutoff = 1.5

In [188]:
DATA_DIR_ROOT = '../stationary_data'

## First, functions for making background map

In [190]:
# Create mappings for azimuth and height using integers
def create_mappings(azimuth_step=azimuth_resolution, height_step=height_resolution):
    azimuth_range = np.arange(-180, 180 + azimuth_step, azimuth_step)
    height_range = np.arange(-30, 10 + height_step, height_step)
    azimuth_map = {int(az * 10): idx for idx, az in enumerate(azimuth_range)}
    height_map = {int(ht * 10): idx for idx, ht in enumerate(height_range)}
    return azimuth_map, height_map

In [191]:
# Create the grid DataFrames
def create_grid_dataframes():
    azimuth_map, height_map = create_mappings()
    grid_shape = (len(height_map), len(azimuth_map))
    df_distances = pd.DataFrame({key: [[] for _ in range(len(height_map))] \
                                 for key in azimuth_map.keys()}, index=height_map.keys())
    df_intensities = pd.DataFrame({key: [[] for _ in range(len(height_map))] \
                                   for key in azimuth_map.keys()}, index=height_map.keys())
    return df_distances, df_intensities, azimuth_map, height_map

In [192]:
# Process file into grid
def process_files_to_grid(data_dir):
    # Create empty grid
    df_distances, df_intensities, azimuth_map, height_map = create_grid_dataframes()

    lidar_dir = Path(data_dir, 'velodyne_points')
    # For each file in the directory
    for file_path in lidar_dir.iterdir():
    # for file_path in itertools.islice(lidar_dir.iterdir(), 3):
        data = np.fromfile(file_path, dtype=np.float32).reshape(-1, 4)
        for x, y, z, intensity in data:
            # Convert to azimuth, height, distance format
            distance = np.sqrt(x**2 + y**2 + z**2)
            azimuth = np.degrees(np.arctan2(y, x))
            height = np.degrees(np.arctan2(z, np.sqrt(x**2 + y**2)))
            # Convert and scale
            azimuth_idx = int(np.floor((azimuth + 180) / \
                                       azimuth_resolution) * azimuth_resolution * 10) - 1800
            height_idx = int(np.floor((height + 30) / height_resolution)) - 300
            # Update DataFrames directly using indices
            if azimuth_idx in azimuth_map and height_idx in height_map:
                df_distances.at[height_idx, azimuth_idx].append(distance)
                df_intensities.at[height_idx, azimuth_idx].append(intensity)
    return df_distances, df_intensities

In [193]:
def create_background_lookup_table(dir):
    # Get the distances of the background map from the lidar files
    df_distances, df_intensities = process_files_to_grid(dir)

    # Create a new DataFrame with the same index and columns as df_distances
    lookup_table = pd.DataFrame(index=df_distances.index, columns=df_distances.columns)
    # Iterate through each cell in df_distances
    for (height, azimuth), distances in df_distances.stack().items():
        if distances:  # If the list is not empty
            # Calculate the value as the largest distance minus the standard deviation
            # value = np.max(distances) - (2 * np.std(distances))
            cutoff = np.max(distances)
            standard_deviation = np.std(distances)
            adjustment = standard_deviation / 2
            
            # If the points are tightly packed, this is probably a background area, set really close
            if standard_deviation < std_dev_cutoff:
                cutoff = 1
            # If they are not, then be stricter (farther away) with the background
            else:
                cutoff = cutoff - adjustment
                
            value = cutoff
        else:
            value = np.nan  # If the list is empty, set the cell to NaN

        # Set the value in the new DataFrame
        lookup_table.at[height, azimuth] = value

    return lookup_table

## Filtering and saving functions

In [195]:
def convert_to_dataframe(bin_path):
    pre_filtered_data = np.fromfile(bin_path, dtype=np.float32).reshape(-1, 4) 
    columns = ['x', 'y', 'z', 'intensity']
    df = pd.DataFrame(pre_filtered_data, columns=columns)
    return df

In [196]:
def add_lookup_coords_to_xyz(points_df):
    # Calculate the distance, azimuth, and height using vectorized operations
    x, y, z, intensity = points_df['x'], points_df['y'], points_df['z'], points_df['intensity']
    distance = np.sqrt(x**2 + y**2 + z**2)
    azimuth = np.degrees(np.arctan2(y, x))
    height = np.degrees(np.arctan2(z, np.sqrt(x**2 + y**2)))
    
    # Convert and scale
    azimuth_idx = np.floor((azimuth + 180) / azimuth_resolution).astype(int) \
        * azimuth_resolution * 10 - 1800
    height_idx = np.floor((height + 30) / height_resolution).astype(int) - 300
    
    # Add new columns to dataframe
    points_df['distance'] = distance
    points_df['azimuth_idx'] = azimuth_idx
    points_df['height_idx'] = height_idx
    
    return points_df

In [197]:
def filter_points(input_file, lookup_table):
    # Get dataframe from file
    pre_filtered_points = convert_to_dataframe(input_file)

    # Add lookup table coordinates
    pre_filtered_grid_lookup = add_lookup_coords_to_xyz(pre_filtered_points)
    
    # Initialize a list to store rows that meet the criteria
    filtered_data = []

    # Iterate through each row in the input DataFrame
    # for idx, row in df_input.iloc[:10].iterrows():
    for idx, row in pre_filtered_grid_lookup.iterrows():
        azimuth_idx = int(row['azimuth_idx'])
        height_idx = int(row['height_idx'])
        
        # Check if the indices exist in the lookup table and the value is not NaN
        if azimuth_idx in lookup_table.columns and height_idx in lookup_table.index:
            # print('.', end='')
            lookup_value = lookup_table.at[height_idx, azimuth_idx]
            
            if not pd.isna(lookup_value) and row['distance'] < lookup_value:
                # If criteria are met, add the row's x, y, z, and intensity to the filtered_data list
                filtered_data.append({
                    'x': row['x'],
                    'y': row['y'],
                    'z': row['z'],
                    'intensity': row['intensity']
                })

    # Create a DataFrame from the filtered data
    filtered_df = pd.DataFrame(filtered_data)
    return filtered_df

In [198]:
def save_as_binary(df, bin_path):
    # Ensure the DataFrame is in the correct order and data type
    data = df[['x', 'y', 'z', 'intensity']].astype(np.float32).values
    
    # Write the data to a binary file
    data.tofile(bin_path)

In [199]:
def filter_frames(dir, background_lookup_table):
    # Create a new folder for the filtered frames in the directory
    new_save_location = Path(dir, 'filtered_points')
    new_save_location.mkdir(exist_ok=True)
    
    lidar_dir = Path(dir, 'velodyne_points')
    
    # Get just the file names
    files = [f for f in os.listdir(lidar_dir) if f.endswith('.bin')]
    # For each file
    # for filename in files[:3]:
    for filename in files:
        # Append file name to location
        print(filename)
        from_file = Path(lidar_dir, filename)

        # Filter file
        filtered_df = filter_points(from_file, background_lookup_table)

        # APPEND FILE NAME TO NEW LOCATION
        to_file = Path(new_save_location, filename)

        # CONVERT BACK TO BINARY and save
        save_as_binary(filtered_df, to_file)


In [200]:
# For each folder in the stationary data directory
p = Path(DATA_DIR_ROOT)

# For each sequence (folder) in the stationary data
for dir in p.iterdir(): 
    if dir.is_dir():
        print(dir) 
        # Make background map
        background_distance_lookup_table = create_background_lookup_table(dir)
        # Filter and save each filtered frame
        filter_frames(dir, background_distance_lookup_table)

..\stationary_data\2011_09_26_drive_0017_sync_0_to_113
0000000000.bin
0000000001.bin
0000000002.bin
0000000003.bin
0000000004.bin
0000000005.bin
0000000006.bin
0000000007.bin
0000000008.bin
0000000009.bin
0000000010.bin
0000000011.bin
0000000012.bin
0000000013.bin
0000000014.bin
0000000015.bin
0000000016.bin
0000000017.bin
0000000018.bin
0000000019.bin
0000000020.bin
0000000021.bin
0000000022.bin
0000000023.bin
0000000024.bin
0000000025.bin
0000000026.bin
0000000027.bin
0000000028.bin
0000000029.bin
0000000030.bin
0000000031.bin
0000000032.bin
0000000033.bin
0000000034.bin
0000000035.bin
0000000036.bin
0000000037.bin
0000000038.bin
0000000039.bin
0000000040.bin
0000000041.bin
0000000042.bin
0000000043.bin
0000000044.bin
0000000045.bin
0000000046.bin
0000000047.bin
0000000048.bin
0000000049.bin
0000000050.bin
0000000051.bin
0000000052.bin
0000000053.bin
0000000054.bin
0000000055.bin
0000000056.bin
0000000057.bin
0000000058.bin
0000000059.bin
0000000060.bin
0000000061.bin
0000000062.bin
