### Convert RayCloudtools TreeFile to be more like TreeQSM. Gives a start, end and direction vector to each segment. 

In [None]:
import pandas as pd
import math

In [None]:
# Define the input files
treefile = "example_data/forest_trees.txt"

In [None]:
##
# Load the treeinfo file to a Pandas DataFrame
##


def treeinfo_attributes_segment(tree_file):
    """
    Extracts per-segment attributes of a tree file generated using treeinfo and returns a DataFrame.
    Can be run on a 'forest' or single treefile.

    Parameters:
    tree_file (str): The path to the treefile created using treeinfo.

    Returns:
    pandas.DataFrame: A DataFrame containing segment attributes. If the input file is a forest, the DataFrame will contain a column 'tree_id' with the tree ID.
    """
    line_list = []
    tree_ids = []
    tree_id = 0

    with open(tree_file, "r") as file:
        lines = file.readlines()
        line_count = 0
        for line in lines:
            data = line.split(", ")
            for row in data:
                section_data = row.strip().split(", ")
                cell_data = section_data[0].strip().split(",")
                if len(cell_data) == 7 and all(
                    x.replace(".", "", 1).isdigit() for x in cell_data
                ):
                    tree_id += 1
                if len(cell_data) > 7:
                    if tree_id != 0:
                        tree_ids.append(tree_id)
                    line_list.append(cell_data)
            line_count += 1
    df = pd.DataFrame(line_list[1:], columns=line_list[0]).astype(float)
    df.insert(0, "tree_id", tree_ids)
    # remove row where parent_id is -1.0
    df = df[df["parent_id"] != -1.0]
    return df


df = treeinfo_attributes_segment(treefile)
df

In [None]:
##
# Adjust the start and end coordinates of each segment
##
# Add new columns for start coordinates
df["start_x"] = None
df["start_y"] = None
df["start_z"] = None

trees = df.groupby("tree_id")

# Iterate over each tree and adjust the start coordinates
for tree_id, tree in trees:
    start_idx = tree.index[0]
    for idx, row in tree.iterrows():
        pid = row["parent_id"]
        if pid == 0 or pd.isnull(pid):
            # Root node: start coordinates are the same as current node, but z is adjusted by segment length
            df.at[idx, "start_x"] = row["x"]
            df.at[idx, "start_y"] = row["y"]
            df.at[idx, "start_z"] = row["z"] - row["segment_length"]
        else:
            # Ensure parent_id is an integer
            pid = int(pid)
            # Retrieve parent's coordinates
            try:
                parent_row = df.loc[start_idx + (pid - 1)]
                df.at[idx, "start_x"] = parent_row["x"]
                df.at[idx, "start_y"] = parent_row["y"]
                df.at[idx, "start_z"] = parent_row["z"]
            except KeyError as e:
                print(
                    f"PID: {pid} invalid for tree: {row['tree_id']}, check row {idx} and {start_idx + (pid - 1)}"
                )

# Keep original x, y, z columns and add end_x, end_y, end_z as copies
df["end_x"] = df["x"]
df["end_y"] = df["y"]
df["end_z"] = df["z"]

# Display the updated DataFrame
df

In [None]:
##
# Calculate the direction vector for each segment
##
def calculate_direction_vector(start_point, end_point):
    """
    Calculate the direction vector from start_point to end_point in 3D space.
    Returns normalized vector components as separate values.

    :param start_point: A tuple or list containing (x, y, z) coordinates of the start point
    :param end_point: A tuple or list containing (x, y, z) coordinates of the end point
    :return: A tuple containing the normalized direction vector components (dx, dy, dz)
    """
    dx = float(end_point[0] - start_point[0])
    dy = float(end_point[1] - start_point[1])
    dz = float(end_point[2] - start_point[2])

    # Normalize the vector
    magnitude = math.sqrt(dx**2 + dy**2 + dz**2)
    if magnitude != 0:
        dx /= magnitude
        dy /= magnitude
        dz /= magnitude

    # Use round to 5 decimal places for consistent precision
    return (round(dx, 5), round(dy, 5), round(dz, 5))


# Apply the function to the dataframe and split the results into separate columns
df[["dx", "dy", "dz"]] = pd.DataFrame(
    df.apply(
        lambda row: calculate_direction_vector(
            (row["start_x"], row["start_y"], row["start_z"]),
            (row["end_x"], row["end_y"], row["end_z"]),
        ),
        axis=1,
    ).tolist(),
    index=df.index,
)
df

In [None]:
# Write the DataFrame to a CSV file with these columns
fields_to_keep = [
    "tree_id",
    "parent_id",
    "radius",
    "segment_length",
    "start_x",
    "start_y",
    "start_z",
    "end_x",
    "end_y",
    "end_z",
    "dx",
    "dy",
    "dz",
    "extension",
    "branch",
    "branch_order",
    "pos_in_branch",
]

output_file = treefile.replace(".txt", "_parsed.csv")
df[fields_to_keep].to_csv(output_file, index=False)