In [1]:
import numpy as np
import pandas as pd
from scipy import stats
from laspy.file import File

import sys
sys.path.insert(0,'..')
from point_density_functions import *

In [3]:
# Load nyc dataframe
nyc_file_dir = '../../Data/NYC_topo/'
nyc_pt_file = ['las_points_NYC_975172.lz']
nyc_df = pd.read_hdf(nyc_file_dir+nyc_pt_file[0])

#Sort by gps_time
nyc_df.sort_values(by=['adj_gps_time'],inplace=True)

In [9]:
def label_returns(las_df):
    '''
    Parses the flag_byte into number of returns and return number, adds these fields to las_df.
    Input - las_df - dataframe from .laz or .lz file
    Output - first_return_df - only the first return points from las_df.
           - las_df - input dataframe with num_returns and return_num fields added 
    '''
    
    las_df['num_returns'] = np.floor(las_df['flag_byte']/16).astype(int)
    las_df['return_num'] = las_df['flag_byte']%16
    first_return_df = las_df[las_df['return_num']==1]
    first_return_df = first_return_df.reset_index(drop=True)
    return first_return_df, las_df
# Note: this anlaysis doesn't really need this
_,nyc_df = label_returns(nyc_df)

In [29]:
gap_times = np.array(nyc_df.iloc[1:]['gps_time']) - np.array(nyc_df.iloc[:-1]['gps_time'])

In [40]:
# Largest time gap in days
# Topographic vs. Bathymetric

gap_times.max()/(60*60*24)

53.93011736111111

In [54]:
# Gaps between flights
gap_times[gap_times>30]

array([7.09614133e+02, 1.79690707e+02, 6.69055595e+02, 1.87644041e+02,
       6.62764546e+02, 4.65956214e+06, 3.60791444e+02, 3.35617147e+02,
       3.43717758e+02, 3.26562820e+02, 3.60826608e+02, 4.38196903e+02,
       3.61490675e+02, 2.08452254e+02, 3.89018295e+02, 1.90233535e+03])

In [52]:
nyc_df['scan_type'] = ['topographic']*nyc_df.shape[0]
nyc_df.loc[5818507:]['scan_type'] = ['bathymetric']*nyc_df.loc[5818507:].shape[0]



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



In [66]:
# Add flight_id for nyc flights based on time gaps
nyc_df['flight_id'] = [0]*nyc_df.shape[0]
flight_id = 0
old_i = 0
for i,gap in enumerate(gap_times):
    if gap > 30:
        nyc_df.loc[old_i:i+1,'flight_id'] = flight_id
        flight_id+=1
        old_i = i
nyc_df.loc[old_i:,'flight_id'] = flight_id

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16


In [70]:
# Create hdf file
nyc_df.to_hdf(nyc_file_dir+'las_points_nyc_flight_id',key='df',complevel=1,complib='lzo')