In [1]:
# the purpose of this notebook is to explore the separated 10m, 30m, and 50m datasets and their aggregate data (average
# pitch/roll, variance in turbulence, battery loss, all that jazz)

In [1]:
# import tings
import pandas as pd
import numpy as np
from pathlib import Path

In [2]:
# fix individual logs
# running the function(s) below, I found that Jan07.3-35_10m and Jan06.4-47_30m had mistakenly collected data on the way DOWN, too, which confounds
# the data. here, we reassign that file itself but without the distracting irrelevant data

badfile1 = pd.read_csv(Path('UsableLogs/Jan07.3-35_10m.csv'))
badfile1 = badfile1[badfile1['index'] <= 381]
badfile1.to_csv('UsableLogs/Jan07.3-35_10m.csv', index=False)

badfile2 = pd.read_csv(Path('UsableLogs/Jan06.4-47_30m.csv'))
badfile2 = badfile2[badfile2['index'] <= 603]
badfile2.to_csv('UsableLogs/Jan06.4-47_30m.csv', index=False)

In [23]:
# function that turns the files fed into it into a single row of aggregate data

# if file ends with _10m, entry in 'altitude' section is 10m, etc.
# include file name to check weird outliers out
# find average of all speeds, voltage, current, pitch, roll, battery_percent lost over time
# each time, append the row to the dataframe previously made

directory = Path('UsableLogs')
preaggregate_logs = directory.glob('*.csv')
output_dir = Path('AggregateLogs')

def process_usable_logs():
    aggregated_data = []

    for file_path in preaggregate_logs:        
        # Parse altitude from filename
        altitude = None
        if '_10m' in file_path.name:
            altitude = 10
        elif '_30m' in file_path.name:
            altitude = 30
        elif '_50m' in file_path.name:
            altitude = 50
        
        try:
            # Read the CSV
            df = pd.read_csv(file_path)
            start_time, end_time = pd.to_datetime(df['timestamp'].iloc[0]), pd.to_datetime(df['timestamp'].iloc[-1])
            flight_time = (end_time - start_time).total_seconds()
            df['tilt_magnitude'] = np.sqrt(df['pitch(degrees)']**2 + df['roll(degrees)']**2)
            
            # calculate the aggregate stats
            stats = {
                'file_name': file_path.stem,
                'flight_time(seconds)': flight_time,
                'altitude(m)': altitude,
                'compass_heading_avg': df['compass_heading(degrees)'].mean(),
                'compass_heading_std': df['compass_heading(degrees)'].std(),
                'x_speed_avg': df['xSpeed(mph)'].mean(),
                'y_speed_avg': df['ySpeed(mph)'].mean(),
                'z_speed_avg': df['zSpeed(mph)'].mean(),
                'speed_avg': df['speed(mph)'].mean(),
                'speed_sd': df['speed(mph)'].std(),
                'voltage_avg': df['voltage(v)'].mean(),
                'voltage_sd': df['voltage(v)'].std(),
                'current_avg': df['current(A)'].mean(),
                'current_sd': df['current(A)'].std(),
                'tilt_avg': df['tilt_magnitude'].mean(),
                'tilt_sd': df['tilt_magnitude'].std(),
                # We just take battery percent from first row minus last row
                'battery_loss_per_second': (df['battery_percent'].iloc[0] - df['battery_percent'].iloc[-1]) / flight_time
            }
            
            aggregated_data.append(stats)
            
        except Exception as e:
            print(f"Error processing {file_path.name}: {e}")

    return pd.DataFrame(aggregated_data).sort_values('file_name')
aggregate_df = process_usable_logs()

In [24]:
aggregate_df

Unnamed: 0,file_name,flight_time(seconds),altitude(m),compass_heading_avg,compass_heading_std,x_speed_avg,y_speed_avg,z_speed_avg,speed_avg,speed_sd,voltage_avg,voltage_sd,current_avg,current_sd,tilt_avg,tilt_sd,battery_loss_per_second
19,Jan06.4-37_10m,12.0,10,17.810484,0.074155,0.000000,0.000000e+00,0.000000,0.000000,0.000000,7.493718,0.014779,4.726492,0.171985,2.284007,0.788179,0.083333
29,Jan06.4-37_30m,19.0,30,17.653039,0.169490,-0.001236,-1.112291e-02,0.019774,0.022987,0.072540,7.482028,0.022114,4.535116,0.315259,6.432705,1.355536,0.052632
16,Jan06.4-37_50m,19.0,50,17.767027,0.165649,0.012092,0.000000e+00,-0.008464,0.012092,0.050720,7.417032,0.012993,4.973200,0.121762,5.471956,0.839141,0.052632
116,Jan06.4-41_10m,16.0,10,21.902976,0.114476,0.000000,-2.663024e-03,0.000000,0.002663,0.024334,7.348792,0.022183,5.007988,0.285532,4.717383,1.199246,0.062500
117,Jan06.4-41_30m,18.0,30,21.793785,0.115384,0.002528,0.000000e+00,0.000000,0.002528,0.023711,7.326424,0.018667,4.748463,0.316471,6.183099,0.766242,0.055556
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55,Jan13.12-24_30m,23.0,30,15.363559,0.133793,-0.011374,0.000000e+00,0.001896,0.013270,0.052955,7.618220,0.011123,4.538564,0.220859,6.893871,1.160434,0.043478
42,Jan13.12-24_50m,25.0,50,13.830833,0.155133,-0.009321,-9.320583e-04,0.000932,0.014913,0.055916,7.553808,0.029723,5.025162,0.514583,12.940563,1.455183,0.080000
61,Jan13.12-28_10m,25.0,10,8.410700,0.249232,0.006444,2.284410e-19,-0.009206,0.058731,0.108534,7.501407,0.020691,5.231523,0.273290,6.213589,2.322029,0.080000
78,Jan13.12-28_30m,23.0,30,7.929646,0.228730,-0.002969,5.938779e-03,0.001980,0.043749,0.095763,7.427527,0.027382,5.494327,0.777363,12.663325,2.297586,0.043478


In [15]:
# push to csv
aggregate_df.to_csv('aggregate_data_TEST.csv', index=False) 

In [28]:
aggregate_df.groupby('altitude(m)')[['tilt_avg']].mean()

Unnamed: 0_level_0,tilt_avg
altitude(m),Unnamed: 1_level_1
10,5.304867
30,8.573677
50,9.341368


In [29]:
# seeing which values matter most to determining altitude
# inititally these results look a little disappointing, but I have a theory that the wind shear is not linear.
# We'll use a random forest decision tree to get the real pattern out of these variables in another notebook.

aggregate_df.drop(columns={'file_name', 'flight_time(seconds)'}).corr()['altitude(m)'].sort_values(ascending=False)

altitude(m)                1.000000
tilt_avg                   0.524320
current_sd                 0.208752
z_speed_avg                0.142879
voltage_sd                 0.098318
battery_loss_per_second    0.082652
y_speed_avg                0.018275
current_avg                0.010352
x_speed_avg                0.003482
speed_avg                 -0.037920
speed_sd                  -0.040284
compass_heading_avg       -0.061019
compass_heading_std       -0.067177
tilt_sd                   -0.084703
voltage_avg               -0.152381
Name: altitude(m), dtype: float64