### Convert RayCloudtools TreeFile to be more like TreeQSM. Gives a start, end and direction vector to each segment. 

In [46]:
import pandas as pd
import math

In [47]:
# Define the input files
treefile = "/home/capheus/projects/forest_info.txt"

In [48]:
##
# Load the treeinfo file to a Pandas DataFrame
##


def treeinfo_attributes_segment(tree_file):
    """
    Extracts per-segment attributes of a tree file generated using treeinfo and returns a DataFrame.
    Can be run on a 'forest' or single treefile.

    Parameters:
    tree_file (str): The path to the treefile created using treeinfo.

    Returns:
    pandas.DataFrame: A DataFrame containing segment attributes. If the input file is a forest, the DataFrame will contain a column 'tree_id' with the tree ID.
    """
    line_list = []
    tree_ids = []
    tree_id = 0

    with open(tree_file, "r") as file:
        lines = file.readlines()
        line_count = 0
        for line in lines:
            data = line.split(", ")
            for row in data:
                section_data = row.strip().split(", ")
                cell_data = section_data[0].strip().split(",")
                if len(cell_data) == 7 and all(
                    x.replace(".", "", 1).isdigit() for x in cell_data
                ):
                    tree_id += 1
                if len(cell_data) > 7:
                    if tree_id != 0:
                        tree_ids.append(tree_id)
                    line_list.append(cell_data)
            line_count += 1
    df = pd.DataFrame(line_list[1:], columns=line_list[0]).astype(float)
    df.insert(0, "tree_id", tree_ids)
    # remove row where parent_id is -1.0
    df = df[df["parent_id"] != -1.0]
    return df


df = treeinfo_attributes_segment(treefile)
df

Unnamed: 0,tree_id,x,y,z,radius,parent_id,volume,diameter,length,strength,min_strength,dominance,angle,children,branch,branch_order,extension,pos_in_branch,segment_length
1,1,7.1251,4.5507,2.2095,0.1216,0.0,0.1026,0.2432,12.7800,0.0271,0.0271,0.3370,43.2023,2.0,1.0,0.0,2.0,1.0,2.2095
2,1,7.1943,4.2205,4.4881,0.0994,1.0,0.0715,0.1988,10.5705,0.0282,0.0271,0.2275,44.8770,2.0,1.0,0.0,3.0,2.0,2.3034
3,1,7.5282,4.1823,6.0500,0.0779,2.0,0.0305,0.1558,8.2671,0.0300,0.0271,0.3207,44.9834,2.0,1.0,0.0,4.0,3.0,1.5976
4,1,7.9612,4.1033,7.1951,0.0633,3.0,0.0154,0.1266,6.6694,0.0318,0.0271,0.2440,44.8813,2.0,1.0,0.0,5.0,4.0,1.2268
5,1,8.3399,4.2320,8.0717,0.0499,4.0,0.0075,0.0998,5.4427,0.0326,0.0271,0.2085,52.5887,2.0,1.0,0.0,6.0,5.0,0.9635
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5073,20,-7.2599,-10.8551,2.9597,0.0100,145.0,0.0001,0.0200,1.3454,0.0395,0.0329,0.0000,0.0000,1.0,48.0,3.0,147.0,2.0,0.2206
5074,20,-7.2953,-10.9369,3.0470,0.0080,146.0,0.0000,0.0160,1.1248,0.0400,0.0329,0.0000,0.0000,0.0,48.0,3.0,0.0,3.0,0.1248
5075,20,-7.2984,-10.7000,2.9313,0.0077,145.0,0.0000,0.0154,1.1899,0.0367,0.0329,0.0000,0.0000,0.0,54.0,4.0,0.0,0.0,0.1899
5076,20,-7.0008,-10.8257,2.6902,0.0091,144.0,0.0001,0.0182,1.3784,0.0359,0.0329,0.0000,0.0000,1.0,53.0,4.0,150.0,0.0,0.2240


In [49]:
##
# Adjust the start and end coordinates of each segment
##
# Add new columns for start coordinates
df["start_x"] = None
df["start_y"] = None
df["start_z"] = None

trees = df.groupby("tree_id")

# Iterate over each tree and adjust the start coordinates
for tree_id, tree in trees:
    start_idx = tree.index[0]
    for idx, row in tree.iterrows():
        pid = row["parent_id"]
        if pid == 0 or pd.isnull(pid):
            # Root node: start coordinates are the same as current node, but z is adjusted by segment length
            df.at[idx, "start_x"] = row["x"]
            df.at[idx, "start_y"] = row["y"]
            df.at[idx, "start_z"] = row["z"] - row["segment_length"]
        else:
            # Ensure parent_id is an integer
            pid = int(pid)
            # Retrieve parent's coordinates
            try:
                parent_row = df.loc[start_idx + (pid - 1)]
                df.at[idx, "start_x"] = parent_row["x"]
                df.at[idx, "start_y"] = parent_row["y"]
                df.at[idx, "start_z"] = parent_row["z"]
            except KeyError as e:
                print(
                    f"PID: {pid} invalid for tree: {row['tree_id']}, check row {idx} and {start_idx + (pid - 1)}"
                )

# Keep original x, y, z columns and add end_x, end_y, end_z as copies
df["end_x"] = df["x"]
df["end_y"] = df["y"]
df["end_z"] = df["z"]

# Display the updated DataFrame
df

Unnamed: 0,tree_id,x,y,z,radius,parent_id,volume,diameter,length,strength,...,branch_order,extension,pos_in_branch,segment_length,start_x,start_y,start_z,end_x,end_y,end_z
1,1,7.1251,4.5507,2.2095,0.1216,0.0,0.1026,0.2432,12.7800,0.0271,...,0.0,2.0,1.0,2.2095,7.1251,4.5507,0.0,7.1251,4.5507,2.2095
2,1,7.1943,4.2205,4.4881,0.0994,1.0,0.0715,0.1988,10.5705,0.0282,...,0.0,3.0,2.0,2.3034,7.1251,4.5507,2.2095,7.1943,4.2205,4.4881
3,1,7.5282,4.1823,6.0500,0.0779,2.0,0.0305,0.1558,8.2671,0.0300,...,0.0,4.0,3.0,1.5976,7.1943,4.2205,4.4881,7.5282,4.1823,6.0500
4,1,7.9612,4.1033,7.1951,0.0633,3.0,0.0154,0.1266,6.6694,0.0318,...,0.0,5.0,4.0,1.2268,7.5282,4.1823,6.05,7.9612,4.1033,7.1951
5,1,8.3399,4.2320,8.0717,0.0499,4.0,0.0075,0.0998,5.4427,0.0326,...,0.0,6.0,5.0,0.9635,7.9612,4.1033,7.1951,8.3399,4.2320,8.0717
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5073,20,-7.2599,-10.8551,2.9597,0.0100,145.0,0.0001,0.0200,1.3454,0.0395,...,3.0,147.0,2.0,0.2206,-7.1783,-10.7404,2.7898,-7.2599,-10.8551,2.9597
5074,20,-7.2953,-10.9369,3.0470,0.0080,146.0,0.0000,0.0160,1.1248,0.0400,...,3.0,0.0,3.0,0.1248,-7.2599,-10.8551,2.9597,-7.2953,-10.9369,3.0470
5075,20,-7.2984,-10.7000,2.9313,0.0077,145.0,0.0000,0.0154,1.1899,0.0367,...,4.0,0.0,0.0,0.1899,-7.1783,-10.7404,2.7898,-7.2984,-10.7000,2.9313
5076,20,-7.0008,-10.8257,2.6902,0.0091,144.0,0.0001,0.0182,1.3784,0.0359,...,4.0,150.0,0.0,0.2240,-7.0531,-10.6439,2.5703,-7.0008,-10.8257,2.6902


In [50]:
##
# Calculate the direction vector for each segment
##
def calculate_direction_vector(start_point, end_point):
    """
    Calculate the direction vector from start_point to end_point in 3D space.
    Returns normalized vector components as separate values.

    :param start_point: A tuple or list containing (x, y, z) coordinates of the start point
    :param end_point: A tuple or list containing (x, y, z) coordinates of the end point
    :return: A tuple containing the normalized direction vector components (dx, dy, dz)
    """
    dx = float(end_point[0] - start_point[0])
    dy = float(end_point[1] - start_point[1])
    dz = float(end_point[2] - start_point[2])

    # Normalize the vector
    magnitude = math.sqrt(dx**2 + dy**2 + dz**2)
    if magnitude != 0:
        dx /= magnitude
        dy /= magnitude
        dz /= magnitude

    # Use round to 5 decimal places for consistent precision
    return (round(dx, 5), round(dy, 5), round(dz, 5))


# Apply the function to the dataframe and split the results into separate columns
df[["dx", "dy", "dz"]] = pd.DataFrame(
    df.apply(
        lambda row: calculate_direction_vector(
            (row["start_x"], row["start_y"], row["start_z"]),
            (row["end_x"], row["end_y"], row["end_z"]),
        ),
        axis=1,
    ).tolist(),
    index=df.index,
)
df

Unnamed: 0,tree_id,x,y,z,radius,parent_id,volume,diameter,length,strength,...,segment_length,start_x,start_y,start_z,end_x,end_y,end_z,dx,dy,dz
1,1,7.1251,4.5507,2.2095,0.1216,0.0,0.1026,0.2432,12.7800,0.0271,...,2.2095,7.1251,4.5507,0.0,7.1251,4.5507,2.2095,0.00000,0.00000,1.00000
2,1,7.1943,4.2205,4.4881,0.0994,1.0,0.0715,0.1988,10.5705,0.0282,...,2.3034,7.1251,4.5507,2.2095,7.1943,4.2205,4.4881,0.03004,-0.14335,0.98922
3,1,7.5282,4.1823,6.0500,0.0779,2.0,0.0305,0.1558,8.2671,0.0300,...,1.5976,7.1943,4.2205,4.4881,7.5282,4.1823,6.0500,0.20899,-0.02391,0.97762
4,1,7.9612,4.1033,7.1951,0.0633,3.0,0.0154,0.1266,6.6694,0.0318,...,1.2268,7.5282,4.1823,6.05,7.9612,4.1033,7.1951,0.35296,-0.06440,0.93342
5,1,8.3399,4.2320,8.0717,0.0499,4.0,0.0075,0.0998,5.4427,0.0326,...,0.9635,7.9612,4.1033,7.1951,8.3399,4.2320,8.0717,0.39303,0.13357,0.90977
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5073,20,-7.2599,-10.8551,2.9597,0.0100,145.0,0.0001,0.0200,1.3454,0.0395,...,0.2206,-7.1783,-10.7404,2.7898,-7.2599,-10.8551,2.9597,-0.36984,-0.51986,0.77004
5074,20,-7.2953,-10.9369,3.0470,0.0080,146.0,0.0000,0.0160,1.1248,0.0400,...,0.1248,-7.2599,-10.8551,2.9597,-7.2953,-10.9369,3.0470,-0.28374,-0.65565,0.69973
5075,20,-7.2984,-10.7000,2.9313,0.0077,145.0,0.0000,0.0154,1.1899,0.0367,...,0.1899,-7.1783,-10.7404,2.7898,-7.2984,-10.7000,2.9313,-0.63229,0.21270,0.74496
5076,20,-7.0008,-10.8257,2.6902,0.0091,144.0,0.0001,0.0182,1.3784,0.0359,...,0.2240,-7.0531,-10.6439,2.5703,-7.0008,-10.8257,2.6902,0.23351,-0.81172,0.53534


In [51]:
# Write the DataFrame to a CSV file with these columns
fields_to_keep = [
    "tree_id",
    "parent_id",
    "radius",
    "segment_length",
    "start_x",
    "start_y",
    "start_z",
    "end_x",
    "end_y",
    "end_z",
    "dx",
    "dy",
    "dz",
    "extension",
    "branch",
    "branch_order",
    "pos_in_branch",
]

output_file = treefile.replace(".txt", "_parsed.csv")
df[fields_to_keep].to_csv(output_file, index=False)