In [1]:
# imports
import pdal
import json
import os
import time
from math import sqrt


def run_pipe_with_time(pipeline, streaming=False, chunk_size=10000):
    """
    Run a PDAL pipeline and measure the time taken.
    """
    print("Starting PDAL pipeline execution in streaming mode..." if streaming else "Starting PDAL pipeline execution...")
    start_time = time.time()
    if streaming and pipeline.streamable:
        # Execute the pipeline in streaming mode
        num_points = pipeline.execute_streaming(chunk_size)
    else:
        # Execute the pipeline in normal mode
        num_points = pipeline.execute()
    end_time = time.time()
    elapsed = end_time - start_time
    # print(pipeline.log)
    print(f"Pipeline execution complete. Processed {num_points} points in {elapsed:.2f} seconds.")

input_file = "output\\processed\\2024-10-04\\lidar_combined.laz"
# input_file = "output\\processed\\2025-03-31\\lidar_combined.laz"
# input_file = "output\\output_small.laz"

In [None]:
# create a count output raster at a specified resolution

resolution = 0.2
output_file = os.path.join(os.path.dirname(input_file), f"count_small_{resolution}.tif")

pipeline = {
    "pipeline": []
}

pipeline["pipeline"].append({ "type":"readers.las",  "filename":input_file })
pipeline['pipeline'].append({ "type":"writers.gdal",
      "filename":output_file,
      "resolution":resolution,
      "output_type":"count" }
    )


print(json.dumps(pipeline, indent=2))
run_pipe_with_time(pdal.Pipeline(json.dumps(pipeline)), streaming=True)

# file sizes for [0.1,0.2,0.25]
# files = 11
# tiles_per_file = 4
# size_per_file = [31,49,195]

# size = [s * tiles_per_file * files for s in size_per_file]

# size
# [1364, 2156, 8580]

# outcomes:
# all OK on the histograms - pick 0.2 to get a 2GB output raster file

# Check the histogram in QGIS

{
  "pipeline": [
    {
      "type": "readers.las",
      "filename": "output\\output_small.laz"
    },
    {
      "type": "writers.gdal",
      "filename": "output\\count_small_0.25.tif",
      "resolution": 0.25,
      "output_type": "count"
    }
  ]
}
Starting PDAL pipeline execution in streaming mode...
Pipeline execution complete. Processed 16707192 points in 43.31 seconds.


[1364, 2156, 8580]

In [None]:
# check point spacing
pipeline_json = {
    "pipeline": [
        input_file,
        {
            "type": "filters.stats",
            "dimensions": "X,Y"
        }
    ]
}

p = pdal.Pipeline(json.dumps(pipeline_json))
p.execute()

metadata = p.metadata
stats = metadata["metadata"]["filters.stats"]["statistic"]
las_bounds = metadata["metadata"]["readers.las"]

# Extract count from one dimension (X or Y, both same)
count = stats[0]["count"]

# Extract bounding box from LAS metadata
minx, maxx = las_bounds["minx"], las_bounds["maxx"]
miny, maxy = las_bounds["miny"], las_bounds["maxy"]
area_m2 = (maxx - minx) * (maxy - miny)

# Estimate points per square metre
points_per_m2 = count / area_m2
avg_spacing_m = sqrt(1 / points_per_m2)

print(f"Input file: {input_file}")
print(f"Point count: {count:,}")
print(f"Area: {area_m2:.2f} m²")
print(f"Avg point spacing: {avg_spacing_m:.2f} m")

| Avg Point Spacing | Suggested Resolution |
| ----------------- | -------------------- |
| < 0.1 m           | 0.1 m                |
| \~0.2–0.5 m       | 0.25–0.5 m           |
| \~1.0 m           | 1.0 m                |
| \~2.0 m           | 2.0 m                |


In [2]:
# generate dsm and dtm rasters
resolution = 0.2


pipeline = {
    "pipeline": []
}

pipeline["pipeline"].append({ "type":"readers.las",  "filename":input_file })

# write the dsm raster by taking the maximum value in each pixel
pipeline['pipeline'].append({ "type":"writers.gdal",
      "filename":os.path.join(os.path.dirname(input_file), f"dsm_{resolution}.tif"),
      "resolution":resolution,
      "output_type":"max" }
    )
# keep only ground returns (Class 2)
pipeline['pipeline'].append({                                   
      "type": "filters.range",
      "limits": "Classification[2:2]"
    }
    )
# write the dtm raster by taking the min value in each pixel from the filtered data
pipeline['pipeline'].append({ "type":"writers.gdal",
      "filename":os.path.join(os.path.dirname(input_file), f"dtm_{resolution}.tif"),
      "resolution":resolution,
      "output_type":"min" }
    )

pipeline['num_threads'] = 8


print(json.dumps(pipeline, indent=2))
run_pipe_with_time(pdal.Pipeline(json.dumps(pipeline)), streaming=True)

{
  "pipeline": [
    {
      "type": "readers.las",
      "filename": "output\\processed\\2024-10-04\\lidar_combined.laz"
    },
    {
      "type": "writers.gdal",
      "filename": "output\\processed\\2024-10-04\\dsm_0.2.tif",
      "resolution": 0.2,
      "output_type": "max"
    },
    {
      "type": "filters.range",
      "limits": "Classification[2:2]"
    },
    {
      "type": "writers.gdal",
      "filename": "output\\processed\\2024-10-04\\dtm_0.2.tif",
      "resolution": 0.2,
      "output_type": "min"
    }
  ],
  "num_threads": 8
}
Starting PDAL pipeline execution in streaming mode...
Pipeline execution complete. Processed 226308134 points in 16702.46 seconds.


In [3]:
# fill the nodata values in the dtm raster
# Use gdal_fillnodata.bat output/dtm_small_fill_20_0_0.2.tif output/dtm_small_fill_20_0_0.2.tif -md 20 -b 1 -of GTiff
dtm = os.path.join(os.path.dirname(input_file), f"dtm_{resolution}.tif")
outfile = os.path.join(os.path.dirname(input_file), f"dtm_filled_{resolution}.tif")

command = f"gdal_fillnodata {dtm} {outfile} -md 20 -b 1 -of GTiff"
print(command)
# os.system(command)

gdal_fillnodata output\dtm_0.2.tif output\dtm_filled_0.2.tif -md 20 -b 1 -of GTiff


In [4]:

# construct the chm command line and shell it out
dsm = os.path.join(os.path.dirname(input_file), f"dsm_{resolution}.tif")
dtm = os.path.join(os.path.dirname(input_file), f"dtm_filled_{resolution}.tif")
outfile = os.path.join(os.path.dirname(input_file), f"chm_{resolution}.tif")
# command = f"gdal_calc.py -A {dsm} -B {dtm} --outfile={outfile} --calc=\"A-B\" --NoDataValue=-9999 "
print(dsm)
command = f"gdal_calc -A {dsm} -B {dtm} --outfile={outfile} --calc=\"A-B\" --NoDataValue=-9999 "
print(command)
# os.system(command)


output\dsm_0.2.tif
gdal_calc -A output\dsm_0.2.tif -B output\dtm_filled_0.2.tif --outfile=output\chm_0.2.tif --calc="A-B" --NoDataValue=-9999 
