# Generic Functions

In [None]:
def split_path_and_fileName(strPathName):
    """
    Split the path and the name of the file of the given string 
    :INPUT:
        strPathName: str with the path and name of a file. ex: /data/pointcloud.txt
    :OUTPUT:
        list, ["/data/", "pointcloud.txt"]
    """
    for idx in range(len(strPathName)-1, 0, -1):
        if(strPathName[idx]=='/'):
            return strPathName[0:idx], strPathName[idx+1:]
    return None

In [None]:
from sklearn.model_selection import train_test_split
from randlanet.utils.data_prepare_apple_tree import * 
def dataSet(path2files, path2output, model, verbose=False, protocol="field"):
    """
    :INPUT:
        path2files : str of the path to the folder of input files
        path2output: str of the path to the output folder 
        model      : str, "rdf" or "rdnet"
        verbose    : If true print few message of the code steps 
        protocol   : Type of protocol to handle ; synthetic/field/field_only_xyz
    :OUTPUT:
        Write the splitted dataset  on the folder
    """
    # NOTE: This segment will be only executed from the notebook 
    lstOfFiles = glob.glob(os.path.join(path2files,"*.txt"))
    if(verbose):
        print("Found files: %i " %(len(lstOfFiles)))
    # Split the files
    #X_train, X_test, _,_ = train_test_split(lstOfFiles, range(len(lstOfFiles)), test_size=0.20, random_state=42)
    if(verbose):
        print(" -> Train set: %i" %len(X_train))
        print(" -> Test set : %i" %len(X_test))
    # Create the directory to keep the test and train sets 
    path2initialSplit = path2output #os.path.join(data2annotatedApples, "dataToRDF")
    if(not os.path.isdir(path2initialSplit)):
        os.mkdir(path2initialSplit)
    for folderName, fileList in zip( ["test" if model == "rdf" else "test"], [lstOfFiles] ):
        path2saveData = os.path.join(path2initialSplit)
        for file2feature in fileList:
            output2wrt = os.path.join(path2saveData, folderName)
            if(not os.path.isdir(output2wrt)):
                os.mkdir(output2wrt)
                print("Folder was created: %s" %output2wrt)
            print("-> Loading: %s" %split_path_and_fileName(file2feature)[1])
            file2wrt = os.path.join(output2wrt, split_path_and_fileName(file2feature)[1])
            if(model == "rdf"):
                # NOTE: If you change the position or the name of the feature generator change the
                # next string "cmd2feature" [execution command]
                cmd2features = "./pcl/build/my_feature %s %.3f %s %s" %("fpfh",          # Feature extractor 
                                                                        0.025,           # Grid size 
                                                                        file2feature,    # Input File
                                                                        file2wrt)        # Output File
                print(" -> Running feature extractor")
                os.system(cmd2features)
            else: # RandLA-NET
                if(folderName=="test"):
                    convert_for_test(file2feature, path2saveData, grid_size=0.001, protocol=protocol)
                else:
                    convert_for_training(file2feature, None, path2saveData, grid_size=0.001, protocol=protocol)

# Predictions

## RandLA-NET

In [None]:
import sys 
import os
from randlanet.main_apple_tree import *

In [None]:
path2data_rnet = "data/annotated_apples_noRadiometric/"  # Data to predict
path2model_rnet= "data/trained_model_randlanet/snapshots_only_xyz/snap-9001" # Trained model 
dataReady      = False # If the data is on txt format set as true

In [None]:
print("-> Input path: %s" %("Not found" if not os.path.isdir(path2data_rnet) else "OK" ) )
print("-> Model path: %s" %("Not found" if not os.path.isdir(path2model_rnet) else "OK" ) )

In [None]:
if(not dataReady):
    dataSet(path2data_rnet, path2data_rnet, "rnet", verbose=False, protocol="field_only_xyz")

In [None]:
# Arguments for the model
param = {"gpu":0, # -1 no GPU
         "model_path":path2model_rnet, 
         "path2data":path2data_rnet, 
         "path2output": "./", # This arg only works to save the training 
         "protocol":"field_only_xyz", 
         "trainFromCHK":False}  
# NOTE: Ensure that the subsampling points in the training are the same for the prediction[test]

In [None]:
randlanet_predict(param)

In [None]:
# Merge labels 
# NOTE: RandLA-NET write the probabilities and the labels of each point cloud on different files, 
# To visualize the classification the predicted classes and the point cloud are going to be merged 
from randlanet.utils.merge_label_apple import *

In [None]:
path2prediction = "test/Log_XXXXX/predictions/" # The name of the folder always is 
                                                # going to change with the date
path2data = os.path.join(param["path2data"],"test/")
OutputPath = os.path.join(path2data, "merged/")

merge_pointCloudAndLabels(path2data, "./test/", OutputPath)

## Random Forest 

In [None]:
from machine_learning.predict import predict 
from machine_learning.RFClassifier import RFClassifier

In [None]:
model = RFClassifier
model_weights = "data/example2notebook_realdata/model_rf.sav"
path2data = "data/example2notebook_realdata/dataToRDF/test/"
OutputPath = os.path.join(path2data, "prediction/")

In [None]:
predict(model, model_weights, path2data, path2output)

# Clustering 

In [None]:
import os 
import glob
import numpy as np 
import sklearn.cluster
from post_processing.algorithm import clustering 

In [None]:
files_annApples = glob.glob(os.path.join(OutputPath,"*.txt"))
path2wrt = os.path.join(OutputPath,"clusters/")

if(not os.path.join(path2wrt)):
    os.mkdir(path2wrt)

eps, minSamples = 0.1, 20 # 0.4, 20 funciona pero consume mucha memoria 

print("Found annotated files: %i" %(len(files_annApples)))

for idx, file2clustering in enumerate(files_annApples, start=1):
    _, actualFileName = split_path_and_fileName(file2clustering)
    print("-> Loading[%i/%i]: %s" %(len(files_annApples), idx, actualFileName))
    pointCloud2cluster = np.loadtxt(file2clustering)
    cluster = clustering(pointCloud2cluster, minSamples, eps)
    print(" -> The file will be written in: %s" %path2wrt)
    np.savetxt(path2wrt+actualFileName, cluster)# The cluster is in the last column of the file