In [None]:
import time
import boto3

session = boto3.Session()
creds = session.get_credentials().get_frozen_credentials()

s3_config = {
        "credentials": {
                "AccessKeyId": creds.access_key,
                "SecretAccessKey": creds.secret_key,
                "SessionToken": creds.token,
        },
        "region_name": "us-east-1"
}

In [None]:
# Replace by your ID
my_id = input()
print("Your ID is", my_id)
bucket = "scipy-tutorial-" + str(my_id)

In [None]:
# Upload data
from lithops.storage import Storage

st = Storage()
s3 = boto3.Session().client('s3')

filename = "IN2020_26550965_12.copc.las"
s3.copy_object(
    CopySource={'Bucket': "scipy-tutorial-data", 'Key': filename},
    Bucket=f"scipy-tutorial-{str(my_id)}",
    Key=filename
)
filename = "IN2020_26550965_12.las"
s3.copy_object(
    CopySource={'Bucket': "scipy-tutorial-data", 'Key': filename},
    Bucket=f"scipy-tutorial-{str(my_id)}",
    Key=filename
)

In [None]:
# First method: Static partitioning
# Download file from S3 -> Split it into chunks -> Repush chunks to S3

import laspy

output_paths = ["part1.las", "part2.las", "part3.las", "part4.las"]

start_m1 = time.time()

st.download_file(bucket=bucket, key=filename, file_name="/work/data/" + filename)

# Load file
las = laspy.read("/work/data/" + filename)
points = las.points
total_points = len(points)
quarter = total_points // 4

# Generate partitions of the file
for i in range(4):
    start = i * quarter
    end = (i + 1) * quarter if i < 3 else total_points
    new_las = laspy.create(point_format=las.header.point_format, file_version=las.header.version)
    new_las.points = points[start:end]
    new_las.write(output_paths[i])

# Upload the partitioned LAS files back to the storage bucket
for output_path in output_paths:
    st.upload_file(file_name=output_path, bucket=bucket, key="static_las/" + output_path)

end_m1 = time.time()

size_m1 = int(st.head_object(bucket, filename)["content-length"])
for file in output_paths:
    size_m1 += int(st.head_object(bucket, "static_las/" + file)["content-length"])

print("M1 preprocessing time is", end_m1-start_m1, "s")
print("M1 size is", f"{size_m1 / (1024**2):.2f}", "MB")


In [None]:
# Second method: Dataplug with LAS format
# Enabling cloud-optimized access to non-cloud-optimized format
from dataplug import CloudObject
from dataplug.formats.geospatial.laspc import LiDARPointCloud, square_split_strategy

filename = "IN2020_26550965_12.las"
co = CloudObject.from_s3(
    LiDARPointCloud, f"s3://{bucket}/{filename}", s3_config=s3_config
)

start_m2 = time.time()
co.preprocess(force=True)
end_m2 = time.time()

size_m2 = int(st.head_object(bucket, filename)["content-length"])
size_m2 += int(st.head_object(bucket + ".meta", filename)["content-length"])
size_m2 += int(st.head_object(bucket + ".meta", filename + ".attrs")["content-length"])

print("M2 preprocessing time is", end_m2-start_m2, "s")
print("M2 size is", f"{size_m2 / (1024**2):.2f}", "MB")

data_slices = co.partition(square_split_strategy, num_chunks=4)

In [None]:
# Third method: Dataplug with COPC format
# Using Dataplug API for get partitions of the CO data in a seamless way
import time
import boto3
import laspy

from dataplug import CloudObject
from dataplug.formats.geospatial.copc import CloudOptimizedPointCloud, square_split_strategy

filename = "IN2020_26550965_12.copc.las"
co = CloudObject.from_s3(
    CloudOptimizedPointCloud, f"s3://{bucket}/{filename}", s3_config=s3_config
)

start_m3 = time.time()
co.preprocess(force=True)
end_m3 = time.time()

size_m3 = int(st.head_object(bucket, filename)["content-length"])

print("M3 preprocessing time is", end_m3-start_m3, "s")
print("M3 size is", f"{size_m3 / (1024**2):.2f}", "MB")

slices = co.partition(square_split_strategy, num_chunks=9)