In [1]:
# the purpose of this notebook is to explore the separated 10m, 30m, and 50m datasets and their aggregate data (average
# pitch/roll, variance in turbulence, battery loss, all that jazz)

In [60]:
# import tings
import pandas as pd
import numpy as np
from pathlib import Path

In [66]:
# fix individual logs
# running the function(s) below, I found that Jan07.3-35_10m had mistakenly collected data on the way DOWN, too, which confounds
# the data. here, we reassign that file itself but without the distracting irrelevant data

badfile = pd.read_csv(Path('UsableLogs/Jan07.3-35_10m.csv'))
badfile = badfile[badfile['index'] <= 381]
badfile.to_csv('UsableLogs/Jan07.3-35_10m.csv', index=False)

# actually, looking at sort_values of the aggregate_df, it looks like we're going to have to do this to quite a few files.

In [62]:
# function that turns the files fed into it into a single row of aggregate data

# if file ends with _10m, entry in 'altitude' section is 10m, etc.
# include file name to check weird outliers out
# find average of all speeds, voltage, current, pitch, roll, battery_percent lost over time
# each time, append the row to the dataframe previously made

directory = Path('UsableLogs')
preaggregate_logs = directory.glob('*.csv')
output_dir = Path('AggregateLogs')

def process_usable_logs():
    aggregated_data = []

    for file_path in preaggregate_logs:        
        # Parse altitude from filename
        altitude = None
        if '_10m' in file_path.name:
            altitude = 10
        elif '_30m' in file_path.name:
            altitude = 30
        elif '_50m' in file_path.name:
            altitude = 50
        
        try:
            # Read the CSV
            df = pd.read_csv(file_path)
            start_time, end_time = pd.to_datetime(df['timestamp'].iloc[0]), pd.to_datetime(df['timestamp'].iloc[-1])
            flight_time = (end_time - start_time).total_seconds()
            
            # calculate the aggregate stats
            stats = {
                'file_name': file_path.stem,
                'flight_time(seconds)': flight_time,
                'altitude': altitude,
                'x_speed_avg': df['xSpeed(mph)'].mean(),
                'y_speed_avg': df['ySpeed(mph)'].mean(),
                'z_speed_avg': df['zSpeed(mph)'].mean(),
                'speed_avg': df['speed(mph)'].mean(),
                'speed_sd': df['speed(mph)'].std(),
                'voltage_avg': df['voltage(v)'].mean(),
                'voltage_sd': df['voltage(v)'].std(),
                'current_avg': df['current(A)'].mean(),
                'current_sd': df['current(A)'].std(),
                'pitch_avg': df['pitch(degrees)'].mean(),
                'pitch_sd': df['pitch(degrees)'].std(),
                'roll_avg': df['roll(degrees)'].mean(),
                'roll_sd': df['roll(degrees)'].std(),
                # We just take battery percent from first row minus last row
                'battery_loss_per_second': (df['battery_percent'].iloc[0] - df['battery_percent'].iloc[-1]) / flight_time
            }
            
            aggregated_data.append(stats)
            
        except Exception as e:
            print(f"Error processing {file_path.name}: {e}")

    return pd.DataFrame(aggregated_data).sort_values('file_name')
aggregate_df = process_usable_logs()

In [65]:
aggregate_df.sort_values('flight_time(seconds)', ascending=False)

Unnamed: 0,file_name,flight_time(seconds),altitude,x_speed_avg,y_speed_avg,z_speed_avg,speed_avg,speed_sd,voltage_avg,voltage_sd,current_avg,current_sd,pitch_avg,pitch_sd,roll_avg,roll_sd,battery_loss_per_second
43,Jan06.4-47_30m,66.0,30,-0.006157,-0.004104,0.090298,0.019086,0.066631,7.149679,0.018702,5.248908,0.335076,1.756422,1.438407,-7.822936,2.221663,0.060606
13,Jan08.12-39_10m,36.0,10,0.000621,-0.004350,-0.015534,0.044863,0.093111,7.360944,0.026315,4.715778,0.680818,7.419444,2.363018,4.370833,1.651355,0.083333
24,Jan08.12-30_50m,34.0,50,0.005819,0.000000,-0.005172,0.005819,0.035657,7.693182,0.039167,4.585072,0.652233,10.218208,1.703190,4.843931,1.226651,0.058824
59,Jan08.12-27_30m,31.0,30,0.319259,-0.438185,0.043181,1.287869,2.151068,7.788456,0.038332,4.612807,0.604842,6.388608,4.632089,3.200316,7.374674,0.064516
98,Jan11.3-34_50m,30.0,50,-0.004534,-0.006046,0.036275,0.025090,0.085796,7.516953,0.022780,4.141240,0.430515,9.089865,2.457137,-5.007770,1.101081,0.066667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105,Jan06.4-41_30m,18.0,30,0.002528,0.000000,0.000000,0.002528,0.023711,7.326424,0.018667,4.748463,0.316471,4.555932,0.608072,-4.139548,0.746747,0.055556
82,Jan07.12-28_10m,17.0,10,0.000000,0.034100,0.000000,0.032736,0.079306,7.810274,0.019439,4.550695,0.349538,8.078049,0.897785,1.522561,0.756674,0.058824
104,Jan06.4-41_10m,16.0,10,0.000000,-0.002663,0.000000,0.002663,0.024334,7.348792,0.022183,5.007988,0.285532,4.276190,1.347530,-1.771429,0.676212,0.062500
73,Jan07.12-28_30m,12.0,30,-0.039688,0.012628,-0.003608,0.149731,0.204963,7.761056,0.041138,4.975573,0.483224,7.509677,2.047105,0.441129,1.854549,0.083333


In [47]:
# push to csv
aggregate_df.to_csv('aggregate_data_TEST.csv', index=False) 