In [1]:
# Run these following 2 installs to if you don't have shapely, geopands, etc
# if you are using sagemaker, you should use the "conda_python3" kernel
!pip install laspy
!pip install geopandas

[33mYou are using pip version 10.0.1, however version 19.3.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m
[33mYou are using pip version 10.0.1, however version 19.3.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [2]:
!pip install boto3

[33mYou are using pip version 10.0.1, however version 19.3.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [3]:
import numpy as np
from laspy.file import File
from pandas import DataFrame
from geopandas import GeoDataFrame
from shapely.geometry import Point
import requests
import os
import copy # not sure if this is needed

In [4]:
import boto3 # like, are we not going to use boto3 in python on AWS?
from botocore.exceptions import ClientError

In [5]:
import pdb

In [6]:
import lasutility

In [7]:
lasKeynames = lasutility.get_las_file_names()

In [8]:
def download_las_data(keyname):
    url = lasutility.https_path_from_keyname(keyname)
    lidar_data = requests.get(url)
    lasdatafile = f"./lasdata/{keyname}"
    with open(lasdatafile, "wb+") as f:
        f.write(lidar_data.content)

In [9]:
def cleanup_las_data(keyname):
    lasdatafile = f"./lasdata/{keyname}"
    os.remove(lasdatafile)

In [10]:
def convert_las_data_to_sql_statement(keyname, max_rows_per_iteration = 100000):
    #we need to partition the GeoDataFrame.to_crs function to 100,000 blocks, otherwise jupyter seem to have memory errors
    #might need to come back to this and recheck
    lasdatafile = f"./lasdata/{keyname}"
    keysequencenum = keyname.split('.')[0] #get the #### of the file "####.las"
    lassqlfile = f"./lasdata/{keysequencenum}.sql"
    inFile = File(lasdatafile)
    row_count = 0
    lidar_points = np.array((inFile.X,inFile.Y,inFile.Z,inFile.intensity,
                          inFile.classification, inFile.gps_time, 
                          inFile.overlap, inFile.scan_angle, 
                          inFile.x, inFile.y, inFile.z, 
                          inFile.Synthetic, inFile.Withheld)).transpose()
    lidar_df=DataFrame(lidar_points, columns = ["X","Y","Z","intensity",
                                            "classification","gps_time",
                                            "overlap","scan_angle", 
                                            "x", "y", "z",
                                            "synthentic", "withheld"])
    
    lidar_df_total_length = lidar_df.shape[0]
    sql_lines_array = []
    sql_preamble = f"insert into lidar_values (ground_coord, z, intensity, classification, gps_time, overlap, scan_angle, synthetic, withheld, dcoctocode) VALUES"
    
    for lidar_idx_start in range(0, lidar_df_total_length, max_rows_per_iteration):
        lidar_idx_end = lidar_idx_start + max_rows_per_iteration
        lidar_idx_end = min(lidar_idx_end, lidar_df_total_length)
        lidar_indices_curr_iter = np.arange(lidar_idx_start, lidar_idx_end)
        lidar_df_sampled = lidar_df.iloc[lidar_indices_curr_iter]
        
        geometry_sampled = [Point(xyz) for xyz in zip(lidar_df_sampled.x,lidar_df_sampled.y,lidar_df_sampled.z)]
        
        crs = None
        lidar_geodf_sampled = GeoDataFrame(lidar_df_sampled, crs=crs, geometry=geometry_sampled)
        lidar_geodf_sampled.crs = {'init': 'epsg:26985'}
        lidar_geodf_sampled['geometry'] = lidar_geodf_sampled['geometry'].to_crs(epsg=4326)
        
        for idx, frame in lidar_geodf_sampled.iterrows():
            row_count+=1
            X, Y, Z, intensity, classification, gps_time, overlap, scan_angle, x, y, z, synthetic, withheld, geom = frame.to_list()
            lat, long, z = geom.x, geom.y, geom.z #  Replace the old CRS values and move this over.
            items = [f"'POINT({lat} {long} {z})'::geometry", z, intensity, int(classification), gps_time, overlap, scan_angle, int(synthetic), int(withheld), int(keysequencenum)]
            items_str = map(str, items)
            sql_line = '(' + ', '.join(items_str) + ')'
            sql_lines_array.append(sql_line);
        
    
    sql_complete_statement = sql_preamble + "\n" + (",\n".join(sql_lines_array)) + ';'
    text_file = open(lassqlfile, "w")
    text_file.write(sql_complete_statement)
    text_file.close()
    
    return row_count # return the number of entries created
    

In [11]:
def convert_las_data_to_geodataframe(keyname):
    lasdatafile = f"./lasdata/{keyname}"
    inFile = File(lasdatafile)
    row_count = 0
    lidar_points = np.array((inFile.X,inFile.Y,inFile.Z,inFile.intensity,
                          inFile.classification, inFile.gps_time, 
                          inFile.overlap, inFile.scan_angle, 
                          inFile.x, inFile.y, inFile.z, 
                          inFile.Synthetic, inFile.Withheld)).transpose()
    lidar_df=DataFrame(lidar_points, columns = ["X","Y","Z","intensity",
                                            "classification","gps_time",
                                            "overlap","scan_angle", 
                                            "x", "y", "z",
                                            "synthentic", "withheld"])
    
    geometry = [Point(xyz) for xyz in zip(lidar_df.x,lidar_df.y,lidar_df.z)]
    
    crs = None
    lidar_geodf = GeoDataFrame(lidar_df, crs=crs, geometry=geometry)
    lidar_geodf.crs = {'init': 'epsg:26985'}
    lidar_geodf['geometry'] = lidar_geodf['geometry'].to_crs(epsg=4326)
    
    return lidar_geodf

In [12]:
def upload_las_data_to_s3_bucket(keyname, s3bucket, s3prefix):
    s3_client = boto3.client('s3')
    local_path = f"./lasdata/{keyname}"
    object_name = f"{s3prefix}/{keyname}"
    try:
        response = s3_client.upload_file(local_path, s3bucket, object_name)
    except ClientError as e:
        return False
    return True

In [13]:
def cleanup_sql_file(sqlfilename):
    sqldatafile = f"./lasdata/{sqlfilename}"
    os.remove(sqldatafile)

In [None]:
#process all las file into sql statements

#uncomment this line to only process a single las file
# lasKeynames = ['1121.las']
for laskeyname in lasKeynames:
    download_las_data(laskeyname)
    row_count = convert_las_data_to_sql_statement(laskeyname)
    print(row_count)
    cleanup_las_data(laskeyname)
    
    keysequencenum = laskeyname.split('.')[0] #get the #### of the file "####.las"
    sqlfilename = f"{keysequencenum}.sql"
    upload_las_data_to_s3_bucket(sqlfilename, "additional-test-datasets", "bldg-height/las-sql-statements")
    cleanup_sql_file(sqlfilename)
    


  return _prepare_from_string(" ".join(pjargs))
  return _prepare_from_string(" ".join(pjargs))
  return _prepare_from_string(" ".join(pjargs))
  return _prepare_from_string(" ".join(pjargs))
  return _prepare_from_string(" ".join(pjargs))
  return _prepare_from_string(" ".join(pjargs))
  return _prepare_from_string(" ".join(pjargs))
  return _prepare_from_string(" ".join(pjargs))
  return _prepare_from_string(" ".join(pjargs))
  return _prepare_from_string(" ".join(pjargs))
  return _prepare_from_string(" ".join(pjargs))
  return _prepare_from_string(" ".join(pjargs))
  return _prepare_from_string(" ".join(pjargs))
  return _prepare_from_string(" ".join(pjargs))
  return _prepare_from_string(" ".join(pjargs))
  return _prepare_from_string(" ".join(pjargs))
  return _prepare_from_string(" ".join(pjargs))
  return _prepare_from_string(" ".join(pjargs))
