Given an unshifted ATL and lidar segments, this notebook shifts the ATL to the best fit.

In [1]:
!pip install ipynb

Collecting ipynb
  Downloading ipynb-0.5.1-py3-none-any.whl.metadata (303 bytes)
Downloading ipynb-0.5.1-py3-none-any.whl (6.9 kB)
Installing collected packages: ipynb
Successfully installed ipynb-0.5.1


In [69]:
from cmath import isnan
from ctypes import ArgumentError
from re import I
from statistics import mean
import pandas as pd
import datetime
import os
import argparse
import io
from csv import writer as csvWriter
import math
import numpy as np
from numpy import power as nppower
import datetime
from scipy import stats
import multiprocessing
import traceback

ATL07_SPOT_SIZE = 6

def rsquared_from_atl_Chirop(atl07Zs, chiropteraZs):
    slope, intercept, r_value, p_value, std_err = stats.linregress(atl07Zs, chiropteraZs)
    rsquared = r_value ** 2
    return rsquared

def meanChiropteraForATL(chiropteraDataFrame, atlX, atlY, spotSize):
    """
    Determines the elevation to be used as the corresponding "chiroptera elevation" for the given ATLAS sensor point.
    This works by first querying the dataframe for any points falling within the spot size radius of the given X and Y, which theoretically represent the center of the area illuminated by the ATLAS sensor's laser.
    """

    relevantPoints = chiropteraDataFrame.query("sqrt(@nppower((`x` - @atlX), 2) + @nppower((`y` - @atlY), 2)) <= @spotSize")
    maxZ = relevantPoints["z"].mean()

    return maxZ

def compareToATL07(chiropteraDataFrame, atl07DataFrame, sample_rate):
    """
    Performs the comparison of the given set of chiroptera data to the given set of ATLAS sensor data.
    """

    cMaxX = chiropteraDataFrame["x"].max()
    cMinX = chiropteraDataFrame["x"].min()
    cMaxY = chiropteraDataFrame["y"].max()
    cMinY = chiropteraDataFrame["y"].min()
   
    atl07InBoundsDataFrame = atl07DataFrame[(atl07DataFrame['X'] >= cMinX) & 
                               (atl07DataFrame['X'] <= cMaxX) & 
                               (atl07DataFrame['Y'] >= cMinY) & 
                               (atl07DataFrame['Y'] <= cMaxY)]
    
    #print(f"atl07InBounds has length {len(atl03InBoundsDataFrame)} ")

    chiropteraElevs = []
    atl07Elevs = []

    #this can take a while... calcChiroptera performs a mean over a spot for each corresponding z value in bounds.
    #one approach is to take every Nth Z value...
    for ind, row in atl07InBoundsDataFrame.iterrows():
        if ind % sample_rate == 0:
            x = row["X"]
            y = row["Y"]
            z = row["Z"]
            chiropteraElev = meanChiropteraForATL(chiropteraDataFrame, x, y, ATL07_SPOT_SIZE)
            #print(f"chiropteraElev: {chiropteraElev}")

            if not isnan(chiropteraElev) and not isnan(z):
                chiropteraElevs.append(chiropteraElev)
                atl07Elevs.append(z)

    dfZs = pd.DataFrame(columns=["c","a"])
    dfZs["c"] = chiropteraElevs
    dfZs["a"] = atl07Elevs
    return rsquared_from_atl_Chirop(dfZs['a'], dfZs['c'])

In [41]:
def shift_df(df, dx,dy):
    df['x'] = df['x'] + dx
    df['y'] = df['y'] + dy
    return df

#Splits a dataframe using an x value. 
def split_df(df, x):
    df1 = df[df['x'] < x]
    df2 = df[df['x'] > x]
    return df1, df2

In [49]:
#concat and subsample
df0_4 = pd.read_pickle('data/100 1B/0m-4m.pkl')
df4_8 = pd.read_pickle('data/100 1B/4m-8m.pkl')
df8_12 = pd.read_pickle('data/100 1B/8m-12m.pkl')
df12_16 = pd.read_pickle('data/100 1B/12m-16m.pkl')
df16_20 = pd.read_pickle('data/100 1B/16m-20m.pkl')
df20_24 = pd.read_pickle('data/100 1B/20m-24m.pkl')
df24_28 = pd.read_pickle('data/100 1B/24m-28m.pkl')
df28_32 = pd.read_pickle('data/100 1B/28m-32m.pkl')
chirop_df = pd.concat([df0_4, df4_8, df8_12, df12_16, df16_20, df20_24, df24_28, df28_32], ignore_index=True)
chirop_df.to_pickle('data/100 1B/1_100_1B.pkl')

def full_atl_to_xyz(adf):
    adf = adf.rename(columns={'x': 'X'})
    adf = adf.rename(columns={'y': 'Y'})
    adf = adf.rename(columns={'heights': 'Z'})
    adf = adf.drop(columns=['dt', 'conf', 'stype', ])
    adf = adf.iloc[:, :3]
    return adf

In [85]:
max(chirop_df['y']) - min(chirop_df['y'])

32216.889999998733

### Scoring

In [82]:
#unshifted score: 0.2298153738854924
#with sample rate 100: 0.15159101730401991
#with sample rate 10: 0.185965617068619
#with sample rate 11: 0.27755104668163544
#mean sample rate 100-110: 0.22412478377097417
#mean sample rate 90-100: 0.3058223440270319
#mean sample rate 80-90: 0.24780283276690201
#mean sample rate 120-140: 0.24867469323495867

#shifted score: 0.4535434638599172
#with sample rate 10: 0.5126277471027988

adf = pd.read_csv('data/gt3r_indexed_xyz.csv', index_col=0)
dfZs = []
for i in range(120,140):
    dfZs.append(compareToATL07(chirop_df, adf, i))
mean(dfZs)

0.24867469323495867

In [65]:
shifted_adf = pd.read_csv('Manual Solution/shifted_xyz.csv')
dfz = compareToATL07(chirop_df, shifted_adf)
dfz

0.185965617068619