In [1]:
import argparse
import logging
import sys
import matplotlib.pyplot as plt
import numpy as np
import torch
import torchvision
import torch.nn.functional as F

from torch.utils.tensorboard import SummaryWriter

import lidar_data_processing

import pandas as pd
from laspy.file import File
from pickle import dump, load


import torch.nn as nn
import torch.optim as optim
import torch.utils.data as udata
from torch.autograd import Variable
from sklearn.preprocessing import MinMaxScaler

%matplotlib inline

In [2]:
# Training Data parameters
scan_line_gap_break = 7000 # threshold over which scan_gap indicates a new scan line
min_pt_count = 1730 # in a scan line, otherwise line not used
max_pt_count = 2000 # in a scan line, otherwise line not used
num_scan_lines = 2000 # to use as training set
starting_line=1000
val_split = 0.2
seq_len = 64

# Angle range of considered points (deg /0.006)
starting_angle = 4500
ending_angle = -4500

# points in between scan lines
stride_inline = 5
stride_across_lines = 3

# Note: x_scaled, y_scaled, and z_scaled MUST be the first 3 features and miss_pts_before MUST be the last feature
feature_list = [
    'x_scaled',
    'y_scaled',
    'z_scaled',
    'scan_line_idx',
    'scan_angle_deg',
    'abs_scan_angle_deg',
    'miss_pts_before'
]

In [3]:
# scan_line_tensor is of dimension [num_scan_lines,pts_per_line,num_features]
scan_line_tensor = lidar_data_processing.create_scan_line_tensor()
scan_line_tensor = scan_line_tensor[:-1,:,:]



Not enough points in line 3983


In [21]:
scan_line_tensor.shape

1

Note: the stride_inline and stride_across_lines only impact where sample squares are placed. The squares themselves do not skip lines in either dimension

In [5]:
def generate_sample_idx_2d(data,
                       starting_line, 
                       num_scan_lines, 
                       seq_len,
                       stride_inline,
                       stride_across_lines,
                       sc):
    '''
    Function generates training and validation samples for filling
    randomly chosen missing points.
    Inputs:
        data: 3-Tensor with dimensions: i) the number of viable scan lines in the flight pass, 
                                        ii) the minimum number of points in the scan line,
                                        iii) 3 (xyz, or feature count)
    
    '''
    # Create generic x tensor - now accounts for stride
    # Number of samples per scan line, accounting for stride
    seq_per_line = int((data.shape[1]-seq_len)/stride_inline+1)
    # Number of samples across scan lines, accounting for stride
    seq_across_lines = int((num_scan_lines - seq_len)/stride_across_lines+1)

    valid_idx_list = []
    # Cycle through the number of scan lines requested
    # This now strides based on stride_across_lines
    for i in range(starting_line,(starting_line+ \
                                      stride_across_lines*seq_across_lines),stride_across_lines):
        # For each viable starting line, loop through possible patches (accounting for stride)
        for j in range(0,seq_per_line*stride_inline,stride_inline):
            # does the patch with top left at data[i,j] have missing points?
            if data[i:i+seq_len,j:j+seq_len,-1].min()==0.:
                # Add index to the list
                valid_idx_list.append([i,j])
    return valid_idx_list


def sliding_windows2d(data, seq_len, seq_per_line, line_num, stride_inline, sc, x):
    '''Given the scan_line_tensor as data and a line number, function iterates over the line, creating the 
        specified sequences (each is a sample).
        This also removes the miss_pts_before column to conserve memory'''
    for i,start_idx in enumerate(range(0,seq_per_line*stride_inline,stride_inline)):
        # sample_idx considers previous lines
        sample_idx = i+line_num*seq_per_line
        _x = data[:seq_len,i:i+seq_len,:]
        if _x[:,:,-1].min() == 0.:
            x.append(_x[:,:,:-1]) # Remove miss_pts_before
    return x

def min_max_tensor(tensor):
    ''' Function takes a 4-D tensor, performs minmax scaling to [0,1] along the third dimension.
    MinMaxScaler will be created.  '''
    print("tensor shape: ",tensor.shape)
    # Remove infilled points
    condition = tensor[:,:,-1] !=-1.
    t = tensor[condition]
    print("min miss_pts: ",t[:,-1].min())
    t = t.contiguous()
    sc =  MinMaxScaler()
    sc.fit(t[:,:-1])
    
    return sc

In [6]:
# Split the scan lines into train and test, then generate x_train and x_val
num_scan_lines_train = int(num_scan_lines*(1-val_split))
num_scan_lines_val = num_scan_lines - num_scan_lines_train

sc = min_max_tensor(scan_line_tensor[starting_line:starting_line+num_scan_lines_train,:,:])

train_idx_list = generate_sample_idx_2d(scan_line_tensor,starting_line,
                                        num_scan_lines_train,seq_len,stride_inline,
                                        stride_across_lines,sc)

valid_idx_list = generate_sample_idx_2d(scan_line_tensor,starting_line+num_scan_lines_train,
                                        num_scan_lines_val,seq_len,stride_inline,
                                        stride_across_lines,sc)

# sc.transform of scan_line_tensor, after identifying valid indices
# Remove miss_pts_before
scan_line_tensor = scan_line_tensor[:,:,:-1]
slt_out = torch.Tensor(sc.transform(scan_line_tensor.reshape(-1,scan_line_tensor.shape[-1])).reshape(scan_line_tensor.shape))

tensor shape:  torch.Size([1600, 1730, 7])
min miss_pts:  tensor(0.)


In [17]:
print(slt_out.min())
print(scan_line_tensor.min())

tensor(-890.1019)
tensor(-26.8140)


In [None]:
# Save files 
torch.save(slt_out,'../lidar_data/'+str(seq_len)+'_'+str(seq_len)+'/'+'scan_line_tensor.pts')
torch.save(train_idx_list,'../lidar_data/'+str(seq_len)+'_'+str(seq_len)+'/'+'train_idx_list.pts')
torch.save(valid_idx_list,'../lidar_data/'+str(seq_len)+'_'+str(seq_len)+'/'+'valid_idx_list.pts')
torch.save(sc,'../lidar_data/'+str(seq_len)+'_'+str(seq_len)+'/'+'sc.pts')

## Load the data

In [None]:
def add_missing_pts(first_return_df):
    # Create a series with the indices of points after gaps and the number of missing points (no max)
    miss_pt_ser = first_return_df[first_return_df['miss_pts_before']>0]['miss_pts_before']
    # miss_pts_arr is an array of zeros that is the dimensions [num_missing_pts,cols_in_df]
    miss_pts_arr = np.zeros([int(miss_pt_ser.sum()),first_return_df.shape[1]])
    # Create empty series to collect the indices of the missing points
    indices = np.ones(int(miss_pt_ser.sum()))

    # Fill in the indices, such that they all slot in in order before the index
    i=0
    for index, row in zip(miss_pt_ser.index,miss_pt_ser):
        new_indices = index + np.arange(row)/row-1+.01
        indices[i:i+int(row)] = new_indices
        i+=int(row)
    # Create a Dataframe of the indices and miss_pts_arr
    miss_pts_df = pd.DataFrame(miss_pts_arr,index=indices,columns = first_return_df.columns)
    miss_pts_df['mask'] = [0]*miss_pts_df.shape[0]
    miss_pts_df['miss_pts_before'] = -1
    # Fill scan fields with NaN so we can interpolate them
    for col in ['scan_angle','scan_angle_deg']:
        miss_pts_df[col] = [np.NaN]*miss_pts_df.shape[0]
    # Concatenate first_return_df and new df
    full_df = first_return_df.append(miss_pts_df, ignore_index=False)
    # Resort so that the missing points are interspersed, and then reset the index
    full_df = full_df.sort_index().reset_index(drop=True)
    # Interpolate the scan angles
    full_df[['scan_angle','scan_angle_deg']] = full_df[['scan_angle','scan_angle_deg']].interpolate()
    # Fill miss_pts_before with -1 so infilled points can be identified
    
    return full_df

In [None]:
first_return_df = pd.read_pickle("../../lidar/Data/parking_lot/first_returns_modified_164239.pkl")

In [None]:
# miss_pts_before is the count of missing points before the point in question (scan gap / 5 -1)
first_return_df['miss_pts_before'] = round((first_return_df['scan_gap']/-5)-1)
first_return_df['miss_pts_before'] = [max(0,pt) for pt in first_return_df['miss_pts_before']]

# Add 'mask' column, set to one by default
first_return_df['mask'] = [1]*first_return_df.shape[0]

In [None]:
# Add abs_scan_angle_deg as a feature
first_return_df['abs_scan_angle_deg'] = abs(first_return_df['scan_angle_deg'])

#### Extract tensor of scan lines

In [None]:
# Number of points per scan line
scan_line_pt_count = first_return_df.groupby('scan_line_idx').count()['gps_time']

# Remove scan lines outside the point count range from first_return_df
valid_scan_line_idx = scan_line_pt_count[(scan_line_pt_count>min_pt_count) * (scan_line_pt_count<max_pt_count)].index

# Only the points that are in valid scan lines
first_return_df_valid = first_return_df[first_return_df['scan_line_idx'].isin(valid_scan_line_idx)]

# # Minimum and maximum scan_angle_deg per scan line
# min_scan_angle_deg = first_return_df.groupby('scan_line_idx').min()['scan_angle_deg']
# max_scan_angle_deg = first_return_df.groupby('scan_line_idx').max()['scan_angle_deg']

# Identify the indices for points at end of scan lines
# scan_break_idx = first_return_df[(first_return_df['scan_gap']>scan_line_gap_break)].index

In [None]:
# Fill in missing points``
first_return_df_valid = add_missing_pts(first_return_df_valid)

In [None]:
first_return_df_valid.isna().any()

In [None]:
# Now remove lines that don't have 1730 points between -27 and 27 degrees
# Number of points per scan line
scan_line_pt_count = first_return_df_valid.groupby('scan_line_idx').count()['gps_time']

# Remove scan lines outside the point count range from first_return_df
valid_scan_line_idx = scan_line_pt_count[scan_line_pt_count>min_pt_count].index

# Only the points that are in valid scan lines
first_return_df_valid = first_return_df_valid[first_return_df_valid['scan_line_idx'].isin(valid_scan_line_idx)]

In [None]:
# Indices for the point closes to starting_angle in each scan line
starting_idx = [abs(first_return_df_valid[first_return_df_valid['scan_line_idx']==line_idx] \
     ['scan_angle']-starting_angle).argmin() for line_idx in first_return_df_valid['scan_line_idx'].unique()]

# Remove the nan idx corresponding to zero scan line
starting_idx = [x for x in starting_idx if str(x) != 'nan']

In [None]:
# Create Tensor
scan_line_tensor = torch.randn([len(starting_idx),min_pt_count,len(feature_list)])

# Loop thru scan lines
for line,line_idx in enumerate(starting_idx):
        # Fill the appropriate line in scan_line_tensor
        name = first_return_df_valid.iloc[line_idx].name
        try:
            scan_line_tensor[line,:,:] = torch.Tensor(first_return_df_valid.loc\
                                      [name:name+min_pt_count-1][feature_list].values)
        except RuntimeError:
            print("Not enough points in line {}".format(line))


In [None]:
del([first_return_df,first_return_df_valid])

In [None]:
# Function inputs
samples_per_file = 1000
# Split the scan lines into train and test, then generate x_train and x_val
num_scan_lines = 100
num_scan_lines_train = int(num_scan_lines*(1-val_split))
num_scan_lines_val = num_scan_lines - num_scan_lines_train


# Function is called on just the training scan_lines
data = scan_line_tensor[starting_line:starting_line+num_scan_lines_train]

# IN THE FUNCTION
# Number of samples per scan line, accounting for stride
seq_per_line = int((data.shape[1]-seq_len)/stride_inline+1)

# Number of samples across scan lines, accounting for stride
seq_across_lines = int((num_scan_lines - seq_len)/stride_across_lines+1)

sample_count = seq_per_line*seq_across_lines

print(sample_count/samples_per_file)

In [None]:
x_trans = sc.transform(x_train_list[0].reshape(-1,x_train_list[0].shape[2])).reshape(x_train_list[0].shape)

In [None]:
del scan_line_tensor

In [None]:
x_val_list,sc = torch.load('x_val_raw.pt')
# torch.save(x_val_list_norm,'x_val.pt')
# torch.save([x_val_list,sc],'x_val_raw.pt')

In [None]:
x_val_list_norm = [sc.transform(x.reshape(-1,x.shape[2])).reshape(x.shape) for x in x_val_list]

In [None]:
len(x_val_list_norm)

In [None]:
blocks = 10


### Old version of functions

In [None]:
# Original version
def generate_samples2d(data,seq_len,
                       stride_inline,
                       stride_across_lines,
                       sc, 
                       samples_per_file = 2000,
                       file_dir = '../lidar_data/train/', filename = 'x_train_'):
    '''
    Function generates training and validation samples for filling
    randomly chosen missing points.
    Inputs:
        data: 3-Tensor with dimensions: i) the number of viable scan lines in the flight pass, 
                                        ii) the minimum number of points in the scan line,
                                        iii) 3 (xyz, or feature count)
    
    '''
    # Create generic x tensor - now accounts for stride
    # Number of samples per scan line, accounting for stride
    seq_per_line = int((data.shape[1]-seq_len)/stride_inline+1)
    # Number of samples across scan lines, accounting for stride
    seq_across_lines = int((data.shape[0] - seq_len)/stride_across_lines+1)
    
    x_list = []
    file_count = 0
    # Cycle through the number of scan lines requested
    # This now strides over some lines based on stride_across_lines
    for j,line_idx in enumerate(range(0,(stride_across_lines*seq_across_lines),stride_across_lines)):
        # line_idx is the scan_line_idx
        x_list = sliding_windows2d(data[line_idx:line_idx+seq_len,:,:], \
                                   seq_len,seq_per_line,j, stride_inline, sc, x_list)
#         # write file when x_list is long
#         if len(x_list) > samples_per_file:
#             x_out = [sc.transform(x.reshape(-1,x.shape[2])).reshape(x.shape) for x in x_list]
#             torch.save(x_out,file_dir+filename+str(file_count)+'.pts')
#             file_count+=1
#             x_list = []
#             print("Wrote file: {}".format(file_dir+filename+str(file_count)))
        # Write each element in x_list to its own file
        x_out = [sc.transform(x.reshape(-1,x.shape[2])).reshape(x.shape) for x in x_list]
        print("x_out length: ",len(x_out))
        while len(x_out)>0:
            file = x_out.pop()
            torch.save(file,file_dir+filename+str(file_count)+'.pts')
            file_count+=1
        print("Wrote files for line {}".format(j))
    
def make_one_tensor(x_list,train=True,sc=None):    
    x = tensor_list_combine(x_list)  

    # Remove the 'miss_pts_before' column
    x = x[:,:,:,:-1]

    # Different for train and val
    # Standardize the data 
    if train:
        x_norm_dim, sc = min_max_tensor(x)
    else:
        x_norm_dim, sc = min_max_tensor(x,sc)
    
    # Reorder to [row_count,feat_count,seq_len]
    x_norm = x_norm_dim.permute([0,3,1,2])

    return x_norm, sc

def sliding_windows2d(data, seq_len, seq_per_line, line_num, stride_inline, sc, x):
    '''Given the scan_line_tensor as data and a line number, function iterates over the line, creating the 
        specified sequences (each is a sample).
        This also removes the miss_pts_before column to conserve memory'''
    for i,start_idx in enumerate(range(0,seq_per_line*stride_inline,stride_inline)):
        # sample_idx considers previous lines
        sample_idx = i+line_num*seq_per_line
        _x = data[:seq_len,i:i+seq_len,:]
        if _x[:,:,-1].min() == 0.:
            x.append(_x[:,:,:-1]) # Remove miss_pts_before
    return x

def tensor_list_combine(tens_list):
    ''' Given a list of 3-D tensors with equal dimensions, function concatenates them into a 4D tensor'''
    new_tens = torch.Tensor(len(tens_list),tens_list[0].shape[0],tens_list[0].shape[1],tens_list[0].shape[2])
    for i,tensor in enumerate(tens_list):
        new_tens[i,:,:,:] = tensor
    return new_tens

def min_max_tensor(tensor):
    ''' Function takes a 4-D tensor, performs minmax scaling to [0,1] along the third dimension.
    If in train mode, MinMaxScaler will be created.  If train=False, the scaler provided will be used.'''

    # Remove infilled points
    condition = tensor[:,:,-1] !=-1.
    tensor = tensor[condition]
    
    # Train MinMaxScaler, return the scaler
    tensor = tensor.contiguous()
    sc =  MinMaxScaler()
    sc.fit(tensor)
    
    return sc