# Doing Point cloud data extraction and token attribution (Circum tutorial) for the point cloud dataset of noto region of Japan.

This are tutorials for analysing the pointcloud dataset released by the geospatial organisation of Japan for the region of Noto Peninsula before the earthquake.

This will demonstrate our E2E pipeline for the users providing how they can
- Parse the various tiles from the dataset and get the initial metadata of the pointcloud.
- Perform the preprocessing steps to get the pointcloud in a format that can be used for further steps processing.
- After cropping and generating the new lidar file for doing the tiling process (optional).
- And finally attributing the tokens in for data providers for their contribution to the protocol.



In [20]:
# installing all the requisite dependencies of the data_preparation project.
%pip install  -r ../requirements.txt
## installing the dependencies of the protocol.
!pip install flytekit bacalhau-sdk flytekitplugins-bacalhau 

## for storage clients, we provide the intigeration with IPFS storage using lighthouse or the web3.storage
!pip install lighthouseweb3

## for the web3 storage:
!npm install -g @web3-storage/w3-cli 
## then storing the necessary env variables to the bashrc in order to run the w3cli without the need of creating alias.
#!cat WEB3_STORAGE_ENV >> ~/.bashrc
!w3 space create circum_user_database

Collecting protobuf!=3.20.0,<=3.20.3
  Using cached protobuf-3.20.3-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)
Collecting botocore==1.34.34
  Using cached botocore-1.34.34-py3-none-any.whl (11.9 MB)
Collecting colorama<0.4.5,>=0.2.5
  Using cached colorama-0.4.4-py2.py3-none-any.whl (16 kB)
Collecting urllib3<2.1,>=1.25.4
  Downloading urllib3-2.0.7-py3-none-any.whl (124 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m124.2/124.2 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: urllib3, protobuf, colorama, botocore
  Attempting uninstall: urllib3
    Found existing installation: urllib3 2.2.0
    Uninstalling urllib3-2.2.0:
      Successfully uninstalled urllib3-2.2.0
  Attempting uninstall: protobuf
    Found existing installation: protobuf 4.25.2
    Uninstalling protobuf-4.25.2:
      Successfully uninstalled protobuf-4.25.2
  Attempting uninstall: colorama
    Found existing installation: colorama 0.4.6
    Uni

In [21]:
import laspy
import numpy as np
import pandas as pd
import os
from subprocess import call
params_list = []

## downloading the 

def read_data_header(base_dir = '../datas/'):
    ## downloading the dataset
    if not os.path.exists(base_dir):
        call(["wget", "https://gic-ishikawa.s3.ap-northeast-1.amazonaws.com/2024/noto/las/notoeast1.zip", "-O", "japan_ne.zip"])
        call(["7z",  "-x" , "japan_ne.zip"])
        call(["rm", "japan_ne.zip"])
    
    for it in os.listdir(base_dir):
        try:
            file_path = os.path.join(base_dir, it)
            read_file = laspy.read(file_path)
            dimensions = (str(read_file.header.x_max), str(read_file.header.x_min) ,str(read_file.header.y_max) ,str(read_file.header.y_min), str(read_file.header.z_max) ,str(read_file.header.z_min) )
            point_amount = (str(read_file.header.point_count))
            maxs_mins = (str(read_file.header.maxs), str(read_file.header.mins))
            scale = (str(read_file.header.scale[0]))
            point_dimensions = (read_file.point_format.dimensions)  
            #user_data = read_file['user_data']          
            
            params_list.append({
            "filename": it,
            "dimensions": dimensions,
            "point_amount": point_amount,
            "maxs_mins": maxs_mins,
            "scale": scale,
            "point_dimensions": point_dimensions,
            })    
            
            # print('dimensions' + ' ' + str(read_file.header.x_max) + ' ' + str(read_file.header.x_min) + ' ' + str(read_file.header.y_max) + ' ' + str(read_file.header.y_min))
            # print('number of points' + ' ' + str(read_file.header.point_count))
            # print('header maxs and mins ' + '...' + str(read_file.header.maxs) + '...' + str(read_file.header.mins))
            # print('scale:' + str(read_file.header.scale[0]))
            # print('offset:' + str(read_file.header.offset[0]))
            # print('crs code:' + str(read_file))
            # print ('dimensions:' + str(read_file.point_format.dimensions))
        except Exception as e:
            print( f"error in getting values for file {it}: " +str(e))
    df = pd.DataFrame(params_list)
    return df
    
print(read_data_header())

error in getting values for file 07ED5834.las: buffer size must be a multiple of element size
error in getting values for file japan_ne.zip: Invalid file signature "b'PK\x03\x04'"
error in getting values for file bunny.pcd: Invalid file signature "b'# .P'"
        filename                                         dimensions  \
0   07ED4942.las  (-0.01, -1000.0, 166499.99, 165750.0, 242.1400...   
1   07ED5814.las  (-6000.01, -7000.0, 164249.99, 163500.0, 197.6...   
2   07ED4933.las  (-3000.01, -4000.0, 165749.99, 165000.0, 183.0...   
3   07ED5821.las  (-5000.01, -6000.0, 164999.99, 164250.0, 123.2...   
4   07ED4844.las  (-4000.01, -5000.0, 165749.99, 165000.0, 91.42...   
5   07ED5824.las  (-4000.01, -5000.0, 164249.99, 163500.0, 329.9...   
6   07ED5822.las  (-4000.01, -5000.0, 164999.99, 164250.0, 254.1...   
7   07ED4934.las  (-2000.01, -3000.0, 165749.99, 165000.0, 274.5...   
8   07ED5832.las  (-6000.01, -7000.0, 163499.99, 162750.0, 262.3...   
9   07ED5831.las  (-7000.01, -800

In [28]:
## functions for operating on las file.
 
def parameters_header(lidar_file_path = "../datas/07ED4844.las"):
    """
    This function reads the las file header and returns the parameters as a dictionary
    """
    params = []
    with laspy.open(lidar_file_path) as fh:
        print(f"file details: major-version: {fh.header.major_version}")
        params = {
            
                "x_min" : fh.header.x_min,
                "x_max" : fh.header.x_max,
                "x_scale" : fh.header.x_scale,
                "x_offset" : fh.header.x_offset,
                "y_min" : fh.header.y_min,
                "y_max" : fh.header.y_max,
                "y_scale" : fh.header.y_scale,
                "y_offset" : fh.header.y_offset,
                "z_min" : fh.header.z_min,
                "z_max" : fh.header.z_max,
                "z_scale" : fh.header.z_scale,
                "z_offset" : fh.header.z_offset,
            }
        
        return pd.DataFrame(params, index=[0])

df_init = parameters_header()

print("-----------------------------------------------------")
print(f"scaling values:  {df_init['x_scale'][0], df_init['y_scale'][0], df_init['z_scale'][0]}")
def scaling_parameters(x,y,z, df_init):
    """
    fetches all the sides of the X,Y,Z axis and returns them as a dictionary
    """    
     
    #if (x + df_init['x_scale'][0]) > df_init['']     
    X_coord = (x * df_init['x_scale'][0]) + df_init['x_offset'][0],
    Z_coord = (z * df_init["z_scale"][0]) + df_init["z_offset"][0],
    Y_coord= (y * df_init["y_scale"][0]) + df_init["y_offset"][0],
    return X_coord,Y_coord,Z_coord


def determine_correct_indices(lidar_file_path):
    """
    defines the indices that are not valid for the point cloud.
    """
    with laspy.open(lidar_file_path) as read_file:
        ## define the current in-valid values for the parameters:
        X_invalid = (read_file.header.mins[0] > abs(df_init['x_max'][0])) | (read_file.header.maxs[0] < df_init['x_min'][0])
        Y_invalid = (read_file.header.mins[1] > abs(df_init['y_max'][0])) | (read_file.header.maxs[1] < df_init['y_min'][0])
        Z_invalid = (read_file.header.mins[2] > abs(df_init['z_max'][0])) | (read_file.header.maxs[2] < df_init['z_min'][0])
        bad_indices = np.where(X_invalid | Y_invalid | Z_invalid)

    print(bad_indices)


print("example of scaling of the coordinates: " + str(scaling_parameters(35.447227, 136.756165,100, df_init)))

file details: major-version: 1
-----------------------------------------------------
scaling values:  (0.01, 0.01, 0.01)
example of scaling of the coordinates: ((0.35447227,), (1.36756165,), (1.0,))


## Now running the cropping step:

In [30]:
import open3d as o3d
import geopandas as gpd
from shapely.geometry import Point  
from pyproj import Transformer
import sys
from geopandas import GeoDataFrame, points_from_xy
sys.path.append("../../../")
from data_preparation.llm.pipeline_generation import  PDAL_template_manual
from subprocess import  check_call

Points1 = Point(float(37.124), float(136.54), float(1.0))
Points2 = Point(float(38.124), float(137.54), float(1.0))
transformer = Transformer.from_crs('EPSG:4326','EPSG:6685') ## for GPS coordiantes to japan coordinates
lidar_file_path = "../datas/07ED4844.las"

## taking exampl of the points that are considered as boundation points 
X1,Y1,Z1 = transformer.transform(Points1.x, Points1.y, Points1.z)
X2,Y2,Z2 = transformer.transform(Points2.x, Points2.y, Points2.z)

## scaling to thelocal coordinates
X1_scaled, Y1_scaled, Z1_scaled = scaling_parameters(X1,Y1,Z1,df_init)
X2_scaled, Y2_scaled, Z2_scaled = scaling_parameters(X2,Y2,Z2,df_init)


buffer_radius = 0.3

print(f"finally scaled params: X_Scaled: {X1_scaled} to {X2_scaled}, Y_scaled:  {Y1_scaled} to {Y2_scaled} , Z_scaled: {Z1_scaled} to {Z2_scaled} ")
print("so now applying the cropping technique  using PDAL")
fileInfo = laspy.read(lidar_file_path)


# async def pdal_pipeline_processing(filepath, point_x, point_y, point_z, buffer_radius, final_laz_file):
#     try:
#         open_ai = PDAL_json_generation_template()
#         open_ai.define_assistant_parameter("pdal_generator", "transform_cropping.json")
#         await open_ai.creating_message_thread(f"i want you to create a json file (named transform_cropping.json) that converts the given laz file stored in {filepath} by cropping the section with center as {Point(point_x,point_y,point_z)} and radius of {buffer_radius}, having the final transformed filename as {final_laz_file}")
#         print("the application is finally generated: " + os.path.join('.', "transform_cropping.json"))
#         assert os.path.isfile("transform_cropping.json") is True
#         check_call(["pdal", "pipeline", "transform_cropping.json"])
#         assert os.path.isfile("cropped.laz") is True
#     except Exception as e:
#         print("under the pdal_pipeline_processing, the following error:  " + str(e))
    

def generate_crop_pipeline():
    manual_template = PDAL_template_manual()
    lidar_file_path = "../datas/07ED4844.las"
    points = "POINT({}{}{})".format(X1_scaled,Y1_scaled,Z1_scaled)
    manual_template.generate_cropping_template(lidar_file_path,points,10,"demo_crop.json")
    
        




if __name__ == "__main__":
    #await pdal_pipeline_processing(lidar_file_path, X1_scaled, Y1_scaled, Z1_scaled, buffer_radius, "cropped.laz")
    generate_crop_pipeline()

ImportError: cannot import name 'PDAL_template_manual' from 'data_preparation.llm.pipeline_generation' (/home/ubuntu/app/version-2-geospatial-pipelines/geospatial-pipelines/packages/data_preparation/test/../../data_preparation/llm/pipeline_generation.py)

## Now comes the reconstruction step(optional) : 

In the gepspatial pipeline, we rarely get a well defined pointcloud from the start. thus the pointcloud needs to be reconstructed using the reconstruction algorithms. below we will be giving an example of the how we run the decentralised compute over data platform in order to schedule the reconstruction of the pointcloud. this process at this instance will be simulated on the top of bacalhau public network where the nodes are managed on no cost by the parent company expanso, but in the circum protocol we will be providing the custom network of nodes based on the requirements of the user regarding the nodes, they will be able too bid for the compute job along with the description of the  





In [4]:
from flytekit import workflow, task, dynamic, kwtypes
from bacalhau_apiclient.models import Spec, JobSpecDocker, publisher_spec
from flytekitplugins.bacalhau import BacalhauTask
from subprocess import check_call
from dataclasses import dataclass
from pathlib import Path
from lighthouseweb3 import Lighthouse
bacalhau_reconstruction_job = BacalhauTask(
    name='running surface_reconstruction_job',
    inputs=kwtypes(
        spec=dict,
        api_version=str
    ),
)


store_result = BacalhauTask(
    name='store_ipfs_result',
    inputs=kwtypes(
        spec=dict,
        api_version=str
    ),
)

@dataclass
class reconstructionDatasetFile():
    def __init__(self, toy_example, group, filename, scene_type):
        self.group = group
        self.filename = filename
        self.scene_type = scene_type

@task
def surface_reconstruction_job(input_params_job: reconstructionDatasetFile) -> str:
    
    
    try:
        task_1 = bacalhau_reconstruction_job(
            api_version = "v0.1",
            spec = dict(
                engine="Docker",
                verifier="Noop",
                publisher_spec= {"type": "IPFS"},
                docker= JobSpecDocker(
                    image="devextralabs/neuralangelo:0.1",
                    entrypoint=[input_params_job.filename, input_params_job.scene_type, input_params_job.group],
                ),
                language= {
                    "job_context": None,
                },
                wasm=None,
                resources=None,
                outputs=[
                    {
                        "storage_source": "IPFS",
                        "name": "outputs",
                        "path": "/outputs",
                    }
                ],
                deal={"concurrrrency":1},
            )
        )   
    except Exception as e:
        print("under the surface_reconstruction_job task, the error is: ", e)

    
    cid = task_1
    path = Path(os.path.join(os.getcwd(), "/ipfs/", cid))
    lh = Lighthouse(token=os.environ.get("WEB3STORAGE_TOKEN"))
    print("uploading the file to the ipfs storage")
    print(lh.upload(path))    
    ## here we use the lighthouse storage sdk , but for the web3storage we can also integrate following the tutorials as defined here: https://web3.storage/docs/quickstart/



executing BacalhauTask with name: running surface_reconstruction_job
executing BacalhauTask with name: store_ipfs_result


AssertionError: Dataclass <class '__main__.reconstructionDatasetFile'> should be decorated with @dataclass_json to be serialized correctly

## Now comes the stage for onchain disbursement of rewards:
- we will be deploying and issuing reward token to the users based on the amount of stake that is provided to the other participants.
- once we get the CID of the rendered dataset, then the token holders can select the amount of the tokens they can stake to the given dataset



In [6]:
!pip install eth-brownie



Collecting eth-brownie
  Downloading eth_brownie-1.20.0-py3-none-any.whl (218 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m218.7/218.7 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
Collecting pytest==6.2.5
  Downloading pytest-6.2.5-py3-none-any.whl (280 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m280.7/280.7 kB[0m [31m33.0 MB/s[0m eta [36m0:00:00[0m
Collecting vvm==0.1.0
  Downloading vvm-0.1.0-py3-none-any.whl (11 kB)
Collecting web3==6.15.0
  Downloading web3-6.15.0-py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m36.8 MB/s[0m eta [36m0:00:00[0m:00:01[0m
Collecting pygments==2.17.2
  Using cached pygments-2.17.2-py3-none-any.whl (1.2 MB)
Collecting multidict==6.0.5
  Downloading multidict-6.0.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (124 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m124.3/124.3 kB[0m [31m14.6 MB/s[0m eta [36m0:

In [None]:
## first setting up the contracts folder from @geospatial-pipeline/web3 and then compiling the packages

from brownie import accounts
from subprocess import check_call


## compiling the contracts 







 