In [1]:
import numpy as np
import pandas as pd
from scipy import stats
from laspy.file import File
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d

%matplotlib inline

In [None]:
# Load the data using laspy
#inFile_cloud = File('../../Data/164239/10552_NYU_M2 - Scanner 1 - 190511_164239_1 - originalpoints.las', mode='r')
inFile_fwf = File('../../Data/172201/10552_NYU_M2 - Scanner 1 - 190511_172201_1 - originalpoints.las', mode='r')

# raw is a 1-D numpy array, each entry is a point
#raw_cloud = inFile_cloud.get_points()
raw_fwf = inFile_fwf.get_points()
#print("Number of points, point cloud: ",len(raw_cloud))
print("Number of points, full waveform: ",len(raw_fwf))

In [None]:
inFile_fwf.header.min

In [2]:
# Corresponds to LAS 1.4 Point Data Record Format 9
# X coordinate of the point (must be adjusted by offset and scale factor)
columns_fwf =[ 
    'X', 
    'Y',
    'Z',
    'intensity', #Intensity of the return, scaled to a 0-65,535 scale
    'flag_byte', #Return number, # of returns, Scan direction flag (+ or -), Edge of flight line flag
    'classification_flags', #Only if the point has been classified: vegetation, etc.
    'classification_byte', #Various flags about the point
    'user_data', 
    'scan_angle', # Down is 0.0, each unit is 0.006 degrees, 
    #so value between -30,000 and 30,000, or (180deg/0.006deg)
    #Spec says this incorporates the roll of the aircraft
    'pt_src_id', #Unique ID for the source of the point
    'gps_time', #GPS Week Time (seconds, reset each week) or Adj Std GPS Time
    'wave_packet_desc_index', # Indicates which waveform packet descriptor is applied.
    'byte_offset_to_waveform_data', # Locates the WF data in EVLR or .wdp file.
    'wavefm_pkt_size', # Size of the waveform packet
    'return_point_waveform_loc', # Time in picoseconds 
    'x_t', # Form parametric location: X = X_0 + x_t*t
    'y_t', # (X_0,Y_0,Z_0) is the anchor point associated with the waveform
    'z_t' ]

# Corresponds to LAS 1.4 Point Data Record Format 6
columns_point_cloud = [
    'X',
    'Y',
    'Z',
    'intensity',
    'flag_byte',
    'classification_flags',
    'classification_byte',
    'user_data',
    'scan_angle',
    'pt_src_id',
    'gps_time']

In [3]:
def raw_to_df(raw,column_names):
    '''function takes raw output of laspy.File.get_points() and column names, and returns a pandas Dataframe'''
    raw_list = [a[0].tolist() for a in raw]
    df = pd.DataFrame(raw_list,columns = column_names)
    return df

def scale_and_offset(df,header,append_to_df=False):
    '''Function takes as input the dataframe output of raw_to_df and the laspy header file.
       Output is a nx3 dataframe with adjusted X,Y, and Z coordinates, from the formula: 
       X_adj = X*X_scale + X_offset.
       Brooklyn LiDAR readings appear to be in feet, and use NAVD 88 in the vertical and 
       New York Long Island State Plane Coordinate System NAD 33 in the horizontal.'''
    offset = header.offset
    scale = header.scale
    scaled_xyz = df[['X','Y','Z']]*scale + offset
    if append_to_df:
        df['x_scaled'] = scaled_xyz['X']
        df['y_scaled'] = scaled_xyz['Y']
        df['z_scaled'] = scaled_xyz['Z'] 
        return df
    else:
        return scaled_xyz

#INACTIVE FUNCTIONS
def split_and_strip(raw):
    '''Function takes the output ndarray of laspy File.get_points(), 
        splits each point on commas, 
        outputs a list of lists.'''
    output_list = []
    for row in raw:
        stripped = [a.strip("( , )") for a in str(row).split(',')]
        output_list.append(stripped)
    return output_list

def row_clean(string_list):
    '''Function takes as input a list of lists, the output of split_and_strip().  Cleans the entries for spaces,
    returns a numpy array where each row is a point.'''
    output_list = []
    for row in string_list:        
        output_list.append(list(map(float,row[:-1])))
    output_arr = np.array(output_list)
    return output_arr


In [None]:
# Create dataframes for point cloud and fwf
#df_fwf = raw_to_df(raw_fwf,columns_fwf)
#df_fwf = raw_to_df(raw_fwf,columns_fwf)

# Generate adjusted coordinates for point cloud.  FWF is identical.
las_points = scale_and_offset(df_fwf,inFile_fwf.header,append_to_df=True)

In [None]:
#las_points.to_pickle("../../Data/pickles/las_points_164239.pkl")
las_points = pd.read_pickle("../../Data/pickles/las_points_164239.pkl")

In [None]:
las_points['X'].max()

## Nearby Points
Looking at points around the end of the scan angle (nearby_df), as well points near the middle of the scan angle, directly down (down_df).  There seems to be some noise in the points at the end of the scan angle.

In [None]:
# A handful of points at the end of the scan angle of file las_points_164239.pkl
nearby_points = list(range(32645,32653))+list(range(33760,33767))+list(range(34870,34877))
nearby_df = las_points.iloc[nearby_points]
# Time range
print("Time range: ",nearby_df['gps_time'].max() - nearby_df['gps_time'].min())

In [None]:
# Filter for points between -1 and 1 degrees scan angle
down_df = las_points[(las_points['scan_angle']<60)&(las_points['scan_angle']>-60)]

In [None]:
#Helicopter scan lines
plt.plot(down_df['gps_time'][10000:10100],down_df['y_scaled'][10000:10100],'xb')

In [None]:
nearby_df[['x_scaled','y_scaled','z_scaled']]

### Looking for specific Lat/Lon

In [None]:
def convert_lat_lon_to_dec(degrees,minutes,seconds):
    return degrees + (minutes/60) + (seconds/3600)
lat = convert_lat_lon_to_dec(40,38,38.8)
lon = convert_lat_lon_to_dec(74,1,33.0)*-1
print("Latitude: {:2.4f}".format(lat))
print("Longitude: {:2.4f}".format(lon))

## Find max North, South, East, West points
Max North: 164039
Max South: 163206
Max East: 172201
Max West: 194702

In [4]:
def max_cardinal_direction(filename,direction):
    # Load the file
    inFile = File(filename, mode='r')
    if direction == "North":
        return inFile.header.max[1]
    elif direction == "South":
        return inFile.header.min[1]
    elif direction == "East":
        return inFile.header.max[0]
    elif direction == "West":
        return inFile.header.min[0]

In [5]:
max_north_file = '10552_NYU_M2 - Scanner 1 - 190511_164039_1 - originalpoints.laz'
max_south_file = '10552_NYU_M2 - Scanner 1 - 190511_163206_1 - originalpoints.laz'
max_east_file = '10552_NYU_M2 - Scanner 1 - 190511_172201_1 - originalpoints.laz'
max_west_file = '10552_NYU_M3 - Scanner 1 - 190511_194702_1 - originalpoints.laz'

maxes=[]

# for card in zip([max_north_file,max_south_file,max_east_file,max_west_file],['North','South','East','West']):
#     maxes.append(max_cardinal_direction('../../Data/max_coordinate_point_clouds/'+card[0],card[1]))

In [7]:
inFile = File('../../Data/max_coordinate_point_clouds/'+max_north_file, mode='r')
raw = inFile.get_points()
df = raw_to_df(raw,columns_point_cloud)
las_points = scale_and_offset(df,inFile.header,append_to_df=True)

In [None]:
df['gps_time'].max() - df['gps_time'].min()

In [None]:
57 seconds
74 seconds, 19M points, ~2200 pts/scan


In [None]:
plt.plot(df['scan_angle'][1124780:1144780])

In [16]:
def points_per_scan(scan_angle):
    '''takes scan_angle as Series, counts the number of "jumps" in the scan angle, indicating start of a new scan
    then divides the number of points in scan_angle to return points / scan'''
    delta_angle = [scan_angle[i+1]-scan_angle[i] for i in range(len(scan_angle)-1)]
    print("Done")
    scan_count = sum(np.array(delta_angle) > 4000)
    print("Number of scans: {}".format(scan_count))
    print("Points per scan: {:2.2f}".format(len(scan_angle)/scan_count))
    return len(scan_angle)/scan_count, scan_count

def scans_per_second(gps_time_series,scan_count):
    run_time = gps_time_series.max() - gps_time_series.min()
    print("Scans per second: {:2.2f}".format(scan_count/run_time))
    print("Points per second: {:2.2f}".format(len(gps_time_series)/run_time))
    return scan_count/run_time

In [31]:
def distance_between_points(df):
    ''' Need to remove the jumps.'''
    x_dist_to_next = np.array([df['x_scaled'][i] - df['x_scaled'][i+1] for i in range(len(df)-1)])
    y_dist_to_next = np.array([df['y_scaled'][i] - df['y_scaled'][i+1] for i in range(len(df)-1)])
    eucl_dist_to_next = np.sqrt(x_dist_to_next**2+y_dist_to_next**2)
    print("Avg Euclidean distance between points: {:2.2f}".format(eucl_dist_to_next.mean()))

In [22]:
las_points = scale_and_offset(df,inFile.header,append_to_df=True)
las_points.head()

Unnamed: 0,X,Y,Z,intensity,flag_byte,classification_flags,classification_byte,user_data,scan_angle,pt_src_id,gps_time,x_scaled,y_scaled,z_scaled
0,983139,-2293289,106763,2038,17,128,0,0,5129,53,60059.241431,977983.139,170706.711,106.763
1,983093,-2293299,105625,1932,17,0,0,0,5124,53,60059.241434,977983.093,170706.701,105.625
2,983129,-2293307,104404,1961,17,0,0,0,5119,53,60059.241436,977983.129,170706.693,104.404
3,975857,-2293632,115330,1109,33,0,0,0,5114,53,60059.241439,977975.857,170706.368,115.33
4,983093,-2293319,103126,477,34,0,0,0,5114,53,60059.241439,977983.093,170706.681,103.126


In [30]:
distance_between_points(las_points.iloc[:100000])

Avg Euclidean distance between points: 1.66
