In [1]:
import os
import pandas as pd

# Define the root folder
root_folder = 'data/external/drones'

# List to hold individual DataFrames
dataframes = []

# Walk through the directory structure
for subdir, _, files in os.walk(root_folder):
    for file in files:
        # Check if the file is a Parquet file
        if file.endswith('.parquet'):
            file_path = os.path.join(subdir, file)
            # Read the Parquet file into a DataFrame
            df = pd.read_parquet(file_path)
            dataframes.append(df)

# Concatenate all DataFrames into one
df = pd.concat(dataframes, ignore_index=True)

# Ensure columns are numeric
for col in ['altitude', 'elevation', 'home_height']:
    df[col] = pd.to_numeric(df[col], errors='coerce')  # Convert to numeric, coercing errors to NaN

# Fill missing values
df['altitude'] = df['altitude'].fillna(df['elevation'] + df['home_height'])
df['elevation'] = df['elevation'].fillna(df['altitude'] - df['home_height'])
df = df[(~df.elevation.isnull()) & (~df.altitude.isnull())]
df.elevation = df.elevation.astype(int)
df.altitude = df.altitude.astype(int)
df.drop(columns={"gps", "rssi"}, inplace=True)

In [2]:
df.to_parquet("data/processed/drone_data_23_24_25.parquet")