In [1]:
# import numpy as np
# import pandas as pd
# import matplotlib.pyplot as plt
# from laspy.file import File
# from pickle import dump, load

# import torch
# import torch.nn as nn
# import torch.nn.functional as F
# import torch.optim as optim
# import torch.utils.data as udata
# from torch.autograd import Variable
# from sklearn.preprocessing import MinMaxScaler

# %matplotlib inline

import argparse
import logging
import sys
import matplotlib.pyplot as plt
import numpy as np
import torch
import torchvision
import torch.nn.functional as F

from torch.utils.tensorboard import SummaryWriter

from utils import data
import models, utils

import pandas as pd
from laspy.file import File
from pickle import dump, load

import torch.nn as nn
import torch.optim as optim
import torch.utils.data as udata
from torch.autograd import Variable
from sklearn.preprocessing import MinMaxScaler

%matplotlib inline

ModuleNotFoundError: No module named 'utils'

### Inputs

In [2]:
# Training Data parameters
scan_line_gap_break = 7000 # threshold over which scan_gap indicates a new scan line
min_pt_count = 1700 # in a scan line, otherwise line not used
max_pt_count = 2000 # in a scan line, otherwise line not used
seq_len = 100
num_scan_lines = 150 # to use as training set
val_split = 0.2

# LSTM Model parameters
hidden_size = 100 # hidden features
num_layers = 2 # Default is 1, 2 is a stacked LSTM
output_dim = 3 # x,y,z

# Training parameters
num_epochs = 500
learning_rate = 0.01

In [3]:
# gpu or cpu
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

### Load the data

first_return_df has been processed in the following ways:  
* Removed outliers outside of [0.01,0.99] percentile range
* Normalized xyz values to [0,1]
* Mapped each point to a scan line index


In [4]:
first_return_df = pd.read_pickle("../../Data/parking_lot/first_returns_modified_164239.pkl")

In [None]:
# Note: x_scaled, y_scaled, and z_scaled MUST be the first 3 features
feature_list = [
    'x_scaled',
    'y_scaled',
    'z_scaled',
#     'scan_line_idx',
#     'scan_angle_deg',
#     'abs_scan_angle_deg'
]

## Mask the Data

In [None]:
# miss_pts_before is the count of missing points before the point in question (scan gap / 5 -1)
first_return_df['miss_pts_before'] = round((first_return_df['scan_gap']/-5)-1)
first_return_df['miss_pts_before'] = [max(0,pt) for pt in first_return_df['miss_pts_before']]

# Add 'mask' column, set to one by default
first_return_df['mask'] = [1]*first_return_df.shape[0]

In [7]:
def add_missing_pts(first_return_df):
    # Create a series with the indices of points after gaps and the number of missing points (max of 5)
    miss_pt_ser = first_return_df[(first_return_df['miss_pts_before']>0)&\
                                      (first_return_df['miss_pts_before']<6)]['miss_pts_before']
    # miss_pts_arr is an array of zeros that is the dimensions [num_missing_pts,cols_in_df]
    miss_pts_arr = np.zeros([int(miss_pt_ser.sum()),first_return_df.shape[1]])
    # Create empty series to collect the indices of the missing points
    indices = np.ones(int(miss_pt_ser.sum()))

    # Fill in the indices, such that they all slot in in order before the index
    i=0
    for index, row in zip(miss_pt_ser.index,miss_pt_ser):
        new_indices = index + np.arange(row)/row-1+.01
        indices[i:i+int(row)] = new_indices
        i+=int(row)
    # Create a Dataframe of the indices and miss_pts_arr
    miss_pts_df = pd.DataFrame(miss_pts_arr,index=indices,columns = first_return_df.columns)
    miss_pts_df['mask'] = [0]*miss_pts_df.shape[0]
    # Fill scan fields with NaN so we can interpolate them
    for col in ['scan_angle','scan_angle_deg']:
        miss_pts_df[col] = [np.NaN]*miss_pts_df.shape[0]
    # Concatenate first_return_df and new df
    full_df = first_return_df.append(miss_pts_df, ignore_index=False)
    # Resort so that the missing points are interspersed, and then reset the index
    full_df = full_df.sort_index().reset_index(drop=True)
    return full_df

In [8]:
first_return_df = add_missing_pts(first_return_df)
first_return_df[['scan_angle','scan_angle_deg']] = first_return_df[['scan_angle','scan_angle_deg']].interpolate()

In [9]:
first_return_df['abs_scan_angle_deg'] = abs(first_return_df['scan_angle_deg'])

In [10]:
first_return_df.iloc[9780:9790]

Unnamed: 0,index,X,Y,Z,intensity,flag_byte,classification_flags,classification_byte,user_data,scan_angle,...,z_scaled,adj_gps_time,num_returns,return_num,scan_gap,scan_angle_deg,scan_line_idx,miss_pts_before,mask,abs_scan_angle_deg
9780,51161.0,465708.0,3034892.0,32216.0,1753.0,17.0,0.0,0.0,0.0,-3072.0,...,32.216,0.348554,1.0,1.0,-6.0,-18.432,0.0,0.0,1,18.432
9781,51162.0,466242.0,3034757.0,32226.0,1490.0,17.0,0.0,0.0,0.0,-3077.0,...,32.226,0.348557,1.0,1.0,-5.0,-18.462,0.0,0.0,1,18.462
9782,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-3082.0,...,0.0,0.0,0.0,0.0,0.0,-18.492,0.0,0.0,0,18.492
9783,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-3087.0,...,0.0,0.0,0.0,0.0,0.0,-18.522,0.0,0.0,0,18.522
9784,51163.0,467975.0,3034317.0,32197.0,1357.0,17.0,0.0,0.0,0.0,-3092.0,...,32.197,0.348565,1.0,1.0,-15.0,-18.552,0.0,2.0,1,18.552
9785,51164.0,468571.0,3034166.0,32215.0,347.0,17.0,0.0,0.0,0.0,-3097.0,...,32.215,0.348567,1.0,1.0,-5.0,-18.582,0.0,0.0,1,18.582
9786,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-3102.5,...,0.0,0.0,0.0,0.0,0.0,-18.615,0.0,0.0,0,18.615
9787,51165.0,469711.0,3033876.0,32248.0,1812.0,17.0,0.0,0.0,0.0,-3108.0,...,32.248,0.348572,1.0,1.0,-11.0,-18.648,0.0,1.0,1,18.648
9788,51166.0,470284.0,3033731.0,32278.0,1979.0,17.0,0.0,0.0,0.0,-3113.0,...,32.278,0.348575,1.0,1.0,-5.0,-18.678,0.0,0.0,1,18.678
9789,51167.0,470838.0,3033591.0,32221.0,2047.0,17.0,0.0,0.0,0.0,-3118.0,...,32.221,0.348577,1.0,1.0,-5.0,-18.708,0.0,0.0,1,18.708


#### 2) Extract tensor of scan lines

In [11]:
# Number of points per scan line
scan_line_pt_count = first_return_df.groupby('scan_line_idx').count()['gps_time']

# Identify the indices for points at end of scan lines
scan_break_idx = first_return_df[(first_return_df['scan_gap']>scan_line_gap_break)].index

In [None]:
# Create Tensor
line_count = ((scan_line_pt_count>min_pt_count)&(scan_line_pt_count<max_pt_count)).sum()
scan_line_tensor = torch.randn([line_count,min_pt_count,len(feature_list)])

# Collect the scan lines longer than min_pt_count
# For each, collect the first min_pt_count points
i=0
for line,count in enumerate(scan_line_pt_count):
    if (count>min_pt_count)&(count<max_pt_count):
        try:
            line_idx = scan_break_idx[line-1]
            scan_line_tensor[i,:,:] = torch.Tensor(first_return_df.iloc\
                                      [line_idx:line_idx+min_pt_count][feature_list].values)
            i+=1
        except RuntimeError:
            print("line: ",line)
            print("line_idx: ",line_idx)

Note: Setting all features to [0,1] overvalues the z coordinate in MSE Loss.

In [13]:
def min_max_tensor(tensor):
    # Function takes a 3-D tensor, performs minmax scaling to [0,1] along the third dimension.
    # First 2 dimensions are flattened
    a,b,c = tensor.shape
    # Flatten first two dimensions
    flat_tensor = tensor.view(-1,c)
    sc =  MinMaxScaler()
    flat_norm_tensor = sc.fit_transform(flat_tensor)
    # Reshape to original
    output = flat_norm_tensor.reshape([a,b,c])
    return torch.Tensor(output), sc

In [14]:
scan_line_tensor_norm, sc = min_max_tensor(scan_line_tensor)

#### 3) Generate the data

In [None]:
def sliding_windows(data, seq_length, line_num, x, y):
    for i in range(len(data)-seq_length):
        # Index considers previous lines
        idx = i+line_num*(min_pt_count-seq_length)
        _x = data[i:(i+seq_length)]
        _y = data[i+seq_length,:3] # Assumes xyz are the first 3 features in scan_line_tensor
        x[idx,:,:] = _x
        y[idx,:,:] = _y

    return x,y

def generate_samples(data,min_pt_count,seq_len,num_scan_lines,val_split,starting_line=1000):
    '''
    Function generates training and validation samples for predicting the next point in the sequence.
    Inputs:
        data: 3-Tensor with dimensions: i) the number of viable scan lines in the flight pass, 
                                        ii) the minimum number of points in the scan line,
                                        iii) 3 (xyz, or feature count)
    
    '''
    # Create generic x and y tensors
    x = torch.ones([(min_pt_count-seq_len)*num_scan_lines,seq_len,len(feature_list)]) 
    y = torch.ones([(min_pt_count-seq_len)*num_scan_lines,1,3])
    i=0
    # Cycle through the number of scan lines requested, starting somewhere in the middle
    for line_idx in range(starting_line,starting_line+num_scan_lines):
        x,y = sliding_windows(data[line_idx,:,:],seq_len,line_idx-starting_line, x, y)
    x_train,y_train,x_val,y_val = train_val_split(x,y,val_split)
    return x_train,y_train,x_val,y_val

def train_val_split(x,y,val_split):   
    # Training/Validation split
    # For now, we'll do the last part of the dataset as validation...shouldn't matter?
    train_val_split_idx = int(x.shape[0]*(1-val_split))
    x_train = x[:train_val_split_idx,:,:]
    x_val = x[train_val_split_idx:,:,:]
    y_train = y[:train_val_split_idx,:,:]
    y_val = y[train_val_split_idx:,:,:]
    
    return x_train,y_train,x_val,y_val

In [16]:
x_train,y_train,x_val,y_val = generate_samples(scan_line_tensor_norm,min_pt_count,seq_len,num_scan_lines,val_split)

### 2: Train the model  
Borrowing a lot of code from here: https://github.com/spdin/time-series-prediction-lstm-pytorch/blob/master/Time_Series_Prediction_with_LSTM_Using_PyTorch.ipynb

#### 1) Define the model

In [17]:
class LSTM(nn.Module):

    def __init__(self, output_dim, input_size, hidden_size, num_layers, seq_len):
        super(LSTM, self).__init__()
        # output_dim = 3: X,Y,Z
        self.output_dim = output_dim
        self.num_layers = num_layers
        
        # inputs_size = 3: X,Y,Z (could be larger in the future if we add features here)
        self.input_size = input_size
        
        # Not sure what to do here, larger than input size?
        self.hidden_size = hidden_size
        # Passes from above
        self.seq_len = seq_len
        
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True)
        
        self.fc = nn.Linear(hidden_size, output_dim)

    def forward(self, x):
        self.lstm.flatten_parameters()
        h_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size)).to(device)
        
        c_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size)).to(device)
        
        # Propagate input through LSTM
        ula, (h_out, _) = self.lstm(x, (h_0, c_0))
        
        # In case multiple LSTM layers are used, this predicts using only the last layer
        h_out = h_out.view(num_layers,-1, self.hidden_size)
        out = self.fc(h_out[-1,:,:])
        
        return out

In [18]:
# Define a loss function that weights the loss according to coordinate ranges (xmax-xmin, ymax-ymin, zmax-zmin)
def weighted_MSELoss(pred,true,sc):
    '''Assumes that x,y,z are the first 3 features in sc scaler'''
    ranges = torch.Tensor(sc.data_max_[:3]-sc.data_min_[:3])
    raw_loss = torch.zeros(3,dtype=float)
    crit = torch.nn.MSELoss()
    for i in range(3):
        raw_loss[i] = crit(pred[:,:,i], true[:,:,i])
    return (ranges * raw_loss).sum()

In [19]:
def calculate_loss(lstm,x,y,ltype='Training'):
    # Training loss
    y_pred = lstm(x).detach().to('cpu')
    loss = weighted_MSELoss(y_pred.unsqueeze(1), y,sc)
    print("{} Loss: {:.4f}".format(ltype,loss))
    return loss

In [20]:
class LidarLstmDataset(udata.Dataset):
    def __init__(self, x, y):
        super(LidarLstmDataset, self).__init__()
        self.x = x
        self.y = y

    def __len__(self):
        return self.x.shape[0]

    def __getitem__(self,index):
        return self.x[index],self.y[index]

#### 2) Train the model

In [22]:
batch_loss,vl = [],[]
lstm_local = LSTM(output_dim, len(feature_list), hidden_size, num_layers, seq_len)
lstm = lstm_local.to(device)

# Create the dataloader
train_dataset = LidarLstmDataset(x_train,y_train)
val_dataset = LidarLstmDataset(x_val,y_val)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1024, num_workers=4, shuffle=True)
valid_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, num_workers=4, shuffle=False)

# criterion = torch.nn.MSELoss()    # mean-squared error for regression
optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)
# Scheduler to reduce the learning rate
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 50, gamma=0.5)

# Train the model
for epoch in range(num_epochs):
    for x,y in train_loader:
        print(x.shape)
        outputs = lstm(x.to(device))
        optimizer.zero_grad()
        # obtain the loss function
        loss = weighted_MSELoss(outputs.unsqueeze(1), y.to(device),sc)

        loss.backward()

        optimizer.step()
        batch_loss.append(loss)
    print("Epoch: %d, Training batch loss: %1.5f\n" % (epoch, loss.item()))
    scheduler.step()
    if epoch % 5 == 0:
        print("*"*30)
        val = calculate_loss(lstm,x_val.to(device),y_val,'Validation')
        print("*"*30)
        vl.append(val)

torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])
torch.Size([1024, 100, 6])


KeyboardInterrupt: 

In [21]:
for x,y in train_loader:
    print(x.shape)

NameError: name 'train_loader' is not defined

In [None]:
# Print loss plot
plt.subplot(2,1,1)
plt.plot(20*np.arange(len(batch_loss)-10),tl[10:],label='Training')
plt.xlabel("Batch")
plt.ylabel("Weighted MSE")

plt.subplot(2,1,2)
plt.plot(20*np.arange(len(vl)-10),vl[10:],'+',label='Validation')
plt.xlabel("Epoch")
plt.ylabel("Weighted MSE")
plt.legend()

#### 3) Evaluate the model

In [None]:
#Load the model
dir_name = '8_31_20/'
run_descriptor = 'seq_len_100_hidden_size_100'
scaler = load(open("models/"+dir_name+"SCALER_"+run_descriptor+".pkl",'rb'))
lstm = load(open("models/"+dir_name+run_descriptor+".pkl",'rb'))

In [None]:
def print_results(x,y,lstm,sc,sample_num,transform=False):
    markersize,fontsize=12,14
    if transform:
        in_seq = sc.inverse_transform(x[sample_num])
        pred_norm = (lstm(x[sample_num].unsqueeze(0).to(device)).view(-1,3).detach())
        pred_point =     pred_norm.to('cpu')*(sc.data_max_[:3]-sc.data_min_[:3])+sc.data_min_[:3]
        true_point = y[sample_num]*(sc.data_max_[:3]-sc.data_min_[:3])+sc.data_min_[:3]
    else:
        in_seq = x[sample_num]
        pred_point = (lstm(x[sample_num].unsqueeze(0).to(device)).view(-1,3).detach()).to('cpu')
        true_point = y[sample_num]
        
    plt.figure(figsize=[12,12])
    plt.subplot(2,1,1)
    plt.plot(in_seq[:,0],in_seq[:,1],'x',label='sequence')
    plt.plot(pred_point[0,0],pred_point[0,1],'ro',markersize=markersize,label='Prediction')
    plt.plot(true_point[0,0],true_point[0,1],'go',markersize=markersize,label='True')
    plt.xlabel("X",fontsize=fontsize)
    plt.ylabel("Y",fontsize=fontsize)
    plt.xticks(fontsize=fontsize)
    plt.yticks(fontsize=fontsize)
    plt.legend(fontsize=fontsize)
    
    plt.subplot(2,1,2)
    plt.plot(in_seq[:,0],in_seq[:,2],'x',label='sequence')
    plt.plot(pred_point[0,0],pred_point[0,2],'ro',markersize=markersize,label='Prediction')
    plt.plot(true_point[0,0],true_point[0,2],'go',markersize=markersize,label='True')
    plt.xlabel("X",fontsize=fontsize)
    plt.ylabel("Z",fontsize=fontsize)
    plt.xticks(fontsize=fontsize)
    plt.yticks(fontsize=fontsize)
    plt.legend(fontsize=fontsize)
    plt.show()

In [None]:
for i in range(4120,4125):
    print_results(x_train,y_train,lstm,scaler,i)

## Save the model

In [None]:
import os
import json

dir_name = '8_31_20/'
run_descriptor = 'seq_len_100_hidden_size_100'
os.mkdir('models/'+dir_name)
class Args(object):
    def __init__(self):
        self.scan_line_gap_break = scan_line_gap_break
        self.min_pt_count = min_pt_count
        self.max_pt_count = max_pt_count
        self.seq_len = seq_len
        self.num_scan_lines = num_scan_lines
        self.val_split = val_split
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.output_dim = output_dim
        self.num_epochs = num_epochs
        self.learning_rate = learning_rate
args=Args()

# Save the scaler
dump(lstm, open('models/'+dir_name+run_descriptor+'.pkl','wb'))
dump(sc, open('models/'+dir_name+'SCALER_'+run_descriptor+'.pkl', 'wb'))
dump(args, open('models/'+dir_name+'args_'+run_descriptor+'.pkl', 'wb'))
with open('models/'+dir_name+'args_'+run_descriptor+'.json', 'w') as json_file:
    json.dump(json.dumps(args.__dict__), json_file)

### Create .pts file of predictions
Include the actual and the predicted, indicated with a binary flag

In [None]:
def create_pts_file(y_val,x_val,lstm,sc):
    y_val_reinflate = np.concatenate((y_val[:,0,:]*(sc.data_max_[:3]-sc.data_min_[:3]) \
                                      +sc.data_min_[:3],np.zeros((y_val.shape[0],1))),axis=1)
    out_df = pd.DataFrame(np.array(y_val_reinflate[:,:]),columns=['x','y','z','class'])
    pred_norm = (lstm(x_val).view(-1,3).detach())
    pred_reinflate = pred_norm*(sc.data_max_[:3]-sc.data_min_[:3])+sc.data_min_[:3]
    pred_arr = np.concatenate((pred_reinflate,np.ones((pred_reinflate.shape[0],1))),axis=1)
    out_df = out_df.append(pd.DataFrame(pred_arr,columns = out_df.columns)).reset_index(drop=True)
    return out_df

In [None]:
out_df = create_pts_file(y_val,x_val,lstm,sc)
out_df.to_csv("output_test.pts")

### Data Prep
Already done, but this removes outliers and adds scan_line_idx to the first_return_df

In [None]:
# Adj GPS Time: Set both timestamps to zero for the first record
def adjust_time(df,time_field):
    # Function adds adj_gps_time to points or pulse dataframe, set to zero at the minimum timestamp.
    df['adj_gps_time'] = df[time_field] - df[time_field].min()
    return df

def label_returns(las_df):
    '''
    Parses the flag_byte into number of returns and return number, adds these fields to las_df.
    Input - las_df - dataframe from .laz or .lz file
    Output - first_return_df - only the first return points from las_df.
           - las_df - input dataframe with num_returns and return_num fields added 
    '''
    
    las_df['num_returns'] = np.floor(las_df['flag_byte']/16).astype(int)
    las_df['return_num'] = las_df['flag_byte']%16
    first_return_df = las_df[las_df['return_num']==1]
    first_return_df = first_return_df.reset_index(drop=True)
    return first_return_df, las_df


def pull_first_scan_gap(df):
    # Separate return num, only keep the first returns, add scan_gap, sort
    df['num_returns'] = np.floor(df['flag_byte']/16).astype(int)
    df['return_num'] = df['flag_byte']%16
    
    first_return_wall = df[df['return_num']==1]
    
    # Outliers
    # Remove outliers outside of [.01,.99] percentiles
    a = first_return_wall[['x_scaled','y_scaled','z_scaled']].quantile([.01,.99])
    first_return_wall = first_return_wall[(first_return_wall['x_scaled']>a.iloc[0]['x_scaled'])&\
                                         (first_return_wall['x_scaled']<a.iloc[1]['x_scaled'])&\
                                         (first_return_wall['y_scaled']>a.iloc[0]['y_scaled'])&\
                                         (first_return_wall['y_scaled']<a.iloc[1]['y_scaled'])&\
                                         (first_return_wall['z_scaled']>a.iloc[0]['z_scaled'])&\
                                         (first_return_wall['z_scaled']<a.iloc[1]['z_scaled'])]
    
    first_return_wall.sort_values(by=['gps_time'],inplace=True)
    first_return_wall.reset_index(inplace=True)
    first_return_wall.loc[1:,'scan_gap'] = [first_return_wall.loc[i+1,'scan_angle'] - first_return_wall.loc[i,'scan_angle'] for i in range(first_return_wall.shape[0]-1)]
    first_return_wall.loc[0,'scan_gap'] = 0
    first_return_wall['scan_angle_deg'] = first_return_wall['scan_angle']*.006
    return first_return_wall

# Load LAS points
las_df = pd.read_hdf("../../Data/parking_lot/las_points_164239.lz")
# Separate out the first returns only
las_df = adjust_time(las_df,'gps_time')
# Sort records by timestamp
las_df.sort_values(by=['adj_gps_time'],inplace=True)
# TO DO: consider only last returns?
# First returns only
first_return_df = pull_first_scan_gap(las_df)

In [None]:
# # Identify the indices for points at end of scan lines
scan_break_idx = first_return_df[(first_return_df['scan_gap']>scan_line_gap_break)].index

# # Concat adds index 0 as 0th scan line
_right = pd.DataFrame(data=range(1,len(scan_break_idx)+1),index=scan_break_idx,columns=['scan_line_idx'])
right = pd.concat([pd.DataFrame(data=[0],index=[0],columns=['scan_line_idx']),_right])
first_return_df = pd.merge_asof(first_return_df,right,left_index=True,right_index=True,direction='backward')