# Number of Flight IDs - NYC 2017
This notebook identifies and appends flight IDs for the NYC 2017 dataset based on time gaps between points.

In [None]:
import numpy as np
import pandas as pd
from scipy import stats
from laspy.file import File

import sys
sys.path.insert(0,'..')
from point_density_functions import *

In [None]:
# Load nyc dataframe
nyc_file_dir = '../../Data/NYC_topo/'
nyc_pt_file = ['las_points_NYC_975172.lz']
nyc_df = pd.read_hdf(nyc_file_dir+nyc_pt_file[0])

#Sort by gps_time
nyc_df.sort_values(by=['gps_time'],inplace=True)

In [None]:
plt.hist(nyc_df['scan_angle'][:10000000])

In [None]:
def label_returns(las_df):
    '''
    Parses the flag_byte into number of returns and return number, adds these fields to las_df.
    Input - las_df - dataframe from .laz or .lz file
    Output - first_return_df - only the first return points from las_df.
           - las_df - input dataframe with num_returns and return_num fields added 
    '''
    
    las_df['num_returns'] = np.floor(las_df['flag_byte']/16).astype(int)
    las_df['return_num'] = las_df['flag_byte']%16
    first_return_df = las_df[las_df['return_num']==1]
    first_return_df = first_return_df.reset_index(drop=True)
    return first_return_df, las_df
# Note: this anlaysis doesn't really need this
_,nyc_df = label_returns(nyc_df)

In [None]:
gap_times = np.array(nyc_df.iloc[1:]['gps_time']) - np.array(nyc_df.iloc[:-1]['gps_time'])

In [None]:
# Largest time gap in days
# Topographic vs. Bathymetric

gap_times.max()/(60*60*24)

In [None]:
nyc_df.columns

In [None]:
# Gaps between flights
gap_times[gap_times>30]

In [None]:
nyc_df['scan_type'] = ['topographic']*nyc_df.shape[0]
nyc_df.loc[5818507:]['scan_type'] = ['bathymetric']*nyc_df.loc[5818507:].shape[0]

In [None]:
# Add flight_id for nyc flights based on time gaps
nyc_df['flight_id'] = [0]*nyc_df.shape[0]
flight_id = 0
old_i = 0
for i,gap in enumerate(gap_times):
    if gap > 30:
        nyc_df.loc[old_i:i+1,'flight_id'] = flight_id
        flight_id+=1
        old_i = i
nyc_df.loc[old_i:,'flight_id'] = flight_id

In [None]:
# Create hdf file
nyc_df.to_hdf(nyc_file_dir+'las_points_nyc_flight_id',key='df',complevel=1,complib='lzo')

In [None]:
nyc_df.sort_values(by=['gps_time'],inplace=True)
nyc_df.reset

In [None]:
nyc_df['scan_type']

In [None]:
nn = File('../../Data/NYC_topo/975172.las',mode='r')

In [None]:
nyc_df.reset_index(inplace=True)
for i in range(105068,1000000):
    if abs(nyc_df['scan_angle'][i+1] - nyc_df['scan_angle'][i]) > 3000:
        print(nyc_df.iloc[i])

In [None]:
plt.plot(nyc_df['scan_angle'][:1000000])

In [None]:
pt1 = nyc_df.iloc[a]
pt2 = nyc_df.iloc[a+1]
dist = np.sqrt((pt1['x_scaled']-pt2['x_scaled'])**2+(pt1['y_scaled']-pt2['y_scaled'])**2)
dist/3.28