In [1]:
import numpy as np
from pathlib import Path
from laspy.file import File
from src.dataset.tools.apply_rf import ApplyResponseFunction
import matplotlib.pyplot as plt
from multiprocessing import Pool, Array
from tqdm import tqdm
from pptk import viewer
from src.evaluation.histogram_matching import hist_match

In [2]:
base_scans = Path("../dublin/npy")
base_train_dataset = Path("../synth_crptn/150/neighborhoods")
base_eval_tile = Path("../synth_crptn/big_tile_no_overlap/neighborhoods")
gis_train_dataset = Path("../synth_crptn+shift/150/neighborhoods")
gis_eval_tile = Path("../synth_crptn+shift/big_tile_no_overlap/neighborhoods")

gis_train_dataset.mkdir(parents=True, exist_ok=True)
gis_eval_tile.mkdir(parents=True, exist_ok=True)

def sigmoid(x, h=0, v=0, s=1, l=1):
    return (s/(1 + np.exp(-l*(x-h)))) + v

In [3]:
min_x = min_y = min_z = 99999999
max_x = max_y = max_z = 1

min_intensity = 0
max_intensity = 512

for pc in base_scans.glob("*.npy"):
    f = np.load(pc)
    if f[:, 0].min() < min_x:
        min_x = f[:, 0].min()
    if f[:, 0].max() > max_x:
        max_x = f[:, 0].max()
    if f[:, 1].max() < min_y:
        min_y = f[:, 1].min()
    if f[:, 1].max() > max_y:
        max_y = f[:, 1].max()
    if f[:, 2].min() < min_z:
        min_z = f[:, 2].min()
    if f[:, 2].max() > max_z:
        max_z = f[:, 2].max()

print("X:", min_x, max_x)
print("Y:", min_y, max_y)
print("Z:", min_z, max_z)

X: 314307.449 317707.233
Y: 233031.721 235884.832
Z: -144.24200000000002 385.369


In [4]:
def proc(patch_path):
    f = np.loadtxt(patch_path)
    x = f[:, 0]
    x = (x - min_x)/(max_x - min_x)

    floor = .3  # lower bound of sigmoid
    center = .5 # where the middle is
    #  the first point is the source gt center, 
    # the second point is the source alt center
    # the 150 neighborhood comes from the target
    f[:, 3] = f[:, 3] * sigmoid(x, h=center, v=floor, l=100, s=1-floor)
    np.savetxt(gis_train_dataset/f"{patch_path.stem + patch_path.suffix}", f)

def proc2(patch_path):
    f = np.loadtxt(patch_path)
    x = f[:, 0]
    x = (x - min_x)/(max_x - min_x)

    floor = .3  # lower bound of sigmoid
    center = .5 # where the middle is
    f[:, 3] = f[:, 3] * sigmoid(x, h=center, v=floor, l=100, s=1-floor)
    np.savetxt(gis_eval_tile/f"{patch_path.stem + patch_path.suffix}", f)
    
def proc3(tile_path):
    f = np.load(tile_path)
    x = f[:, 0]
    x = (x - min_x)/(max_x - min_x)

    floor = .3  # lower bound of sigmoid
    center = .5 # where the middle is
    f[:, 3] = f[:, 3] * sigmoid(x, h=center, v=floor, l=100, s=1-floor)
    np.savetxt(gis_eval_tile.parents[0]/f"{tile_path.stem + tile_path.suffix}", f)
    
pool = Pool(processes=8)
# convert training patches
patches = [f for f in base_train_dataset.glob("*.txt.gz")]
for _ in tqdm(pool.imap_unordered(proc, patches), total=len(patches)):
    pass 

# convert eval patches
patches = [f for f in base_eval_tile.glob("*.txt.gz")]
for _ in tqdm(pool.imap_unordered(proc2, patches), total=len(patches)):
    pass 

# convert eval gt and alt tiles
tile_path = base_eval_tile.parents[0]
print(tile_path)
for tile in ["gt.npy", "alt.npy"]:
    proc3(tile_path / tile)  


100%|██████████| 1993653/1993653 [27:10<00:00, 1222.85it/s]
100%|██████████| 1000000/1000000 [14:12<00:00, 1173.28it/s]


../synth_crptn/big_tile_no_overlap


Process ForkPoolWorker-6:
Process ForkPoolWorker-1:
Process ForkPoolWorker-2:
Process ForkPoolWorker-7:
Process ForkPoolWorker-4:
Traceback (most recent call last):
  File "/home/david/.conda/envs/lidar/lib/python3.7/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
Process ForkPoolWorker-8:
Process ForkPoolWorker-3:
Process ForkPoolWorker-5:
Traceback (most recent call last):
  File "/home/david/.conda/envs/lidar/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/home/david/.conda/envs/lidar/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/david/.conda/envs/lidar/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self

In [28]:
# copy over csv files, update paths -- target intensities will be wrong :P
import pandas as pd

# training
for csv in ["master.csv", "train.csv", "val.csv", "test.csv"]:
    base_csv = pd.read_csv(str(base_train_dataset.parents[0] / csv))
    new_csv_path = gis_train_dataset.parents[0] / csv
    
    gis_examples = [None] * len(base_csv)
    for i in tqdm(range(len(gis_examples))):
        file = Path(base_csv.examples[i])
        name = file.stem + ".gz"
        new_file = file.parents[3] / "synth_crptn+shift/150/neighborhoods" / name 
        if not new_file.exists():
            exit(f"file not found: {new_file}")
        gis_examples[i] = str(new_file)

    base_csv.examples = gis_examples
    base_csv.to_csv(f"../synth_crptn+shift/150/{csv}")



100%|██████████| 1993653/1993653 [01:49<00:00, 18254.04it/s]
100%|██████████| 81782/81782 [00:03<00:00, 23407.73it/s]
100%|██████████| 20299/20299 [00:00<00:00, 23442.63it/s]
100%|██████████| 25374/25374 [00:01<00:00, 23515.77it/s]


In [27]:
# eval
base_csv = pd.read_csv(str(base_eval_tile.parents[0] / "big_tile_dataset.csv"))
new_csv_path = gis_eval_tile.parents[0] / "big_tile_dataset.csv"

gis_examples = [None] * len(base_csv)
for i in tqdm(range(len(gis_examples))):
    file = Path(base_csv.examples[i])
    name = file.stem + ".gz"
    new_file = file.parents[3] / "synth_crptn+shift/big_tile_no_overlap/neighborhoods" / name 
    if not new_file.exists():
        exit(f"file not found: {new_file}")
    gis_examples[i] = str(new_file)

base_csv.examples = gis_examples
base_csv.to_csv(f"../synth_crptn+shift/big_tile_no_overlap/big_tile_dataset.csv")



  0%|          | 1699/1000000 [00:00<00:58, 16985.60it/s]

Index(['Unnamed: 0', 'examples', 'source_scan', 'target_intensity'], dtype='object')


100%|██████████| 1000000/1000000 [00:56<00:00, 17748.21it/s]


In [None]:
# see the ref scan
# from pptk import viewer
# ref = 1
# gt_pc = np.load(flight_paths / f"{ref}.npy")[:, :4]
# shift_pc = np.load(gis_flight_paths / f"{ref}.npy")[:, 3]


# v = viewer(gt_pc[:, :3])
# attr1 = gt_pc[:, 3]
# attr2 = shift_pc
# v.attributes(attr1, attr2)