# NCSU Drone Delivery for Retail

### Drone operational data analytics


In [None]:
!pip3 install pandas matplotlib seaborn statsmodels

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import statsmodels.api as sm
import seaborn as sns

### Loading the data

In [None]:
"""
This file contains all experiments with varying speed, altitude, route etc.
"""
parameters_df = pd.read_csv('../data/parameters.csv')
parameters_df.sample(5)

In [None]:
flight_df = pd.read_csv('../data/flights.csv', dtype={"altitude": str})
# Filter bad data
flight_df = flight_df[flight_df["altitude"] != "25-50-100-25"] 
flight_df.sample(10)

### Creating useful feature columns based on raw data

In [None]:
"""
Creating additional data columns based on raw data.
"""
flight_df["actual_velocity"] = np.sqrt(
    flight_df["velocity_x"] ** 2 + 
    flight_df["velocity_y"] ** 2 + 
    flight_df["velocity_z"] ** 2
)
flight_df["actual_acceleration"] = np.sqrt(
    flight_df["linear_acceleration_x"] ** 2 + 
    flight_df["linear_acceleration_y"] ** 2 + 
    flight_df["linear_acceleration_z"] ** 2
)
flight_df["power_usage"] = flight_df["battery_current"] * flight_df["battery_voltage"]

### Heat Map - Correlation

In [None]:
"""
Using important columns to see correlation between features
"""
heatmap_columns = ['flight', 'time', 'wind_speed', 'wind_angle', 'speed', 'payload', 'route', 'power_usage', "actual_velocity", "actual_acceleration"]

In [None]:
figure = plt.figure(figsize=(7,6))
dataplot =sns.heatmap(flight_df[heatmap_columns].corr(numeric_only=True),  cmap="YlGnBu")

### Sample Data of Speed=4 and altitude=25 dataframe

In [None]:
"""
This slices the all_df dataframe and extracts the rows that
had the same flight number as flight_id_speed_4 and some more columns
"""
flight_id_speed_4 = parameters_df[(parameters_df["speed"] == 4.0) & (parameters_df["altitude"] == "25")]["flight"]
speed_4_df = flight_df[flight_df["flight"].isin(flight_id_speed_4)][["flight", "time", "payload", "actual_velocity", "actual_acceleration", "wind_speed", "power_usage"]]
speed_4_df

### Statsmodel Model 1

In [None]:
"""
Selected columns with high correlation
The p-values are good, except for wind_speed's p-value so I have to create another StatsModel.
"""
x = sm.add_constant(speed_4_df[["payload", "actual_velocity", "actual_acceleration", "wind_speed"]], prepend=False)
y = speed_4_df["power_usage"]
model = sm.OLS(y, x).fit()
model_summary = model.summary()
print(model_summary)

### Statsmodel Model 2

In [None]:
"""
This is the updated StatsModel with perfect p-values
"""
x = sm.add_constant(speed_4_df[["payload", "actual_velocity", "actual_acceleration"]], prepend=False)
y = speed_4_df["power_usage"]
model = sm.OLS(y, x).fit()
model_summary = model.summary()
print(model_summary)

### Create another dataset grouped by flight with the mean of every column

In [None]:
"""
Before 35 seconds the drone is ascending and after 175 secs the drone is 
descending so those values are outliers because power consumption and altitudes 
are varying drastically, so I am excluding them
"""
speed_4_df = speed_4_df[(speed_4_df["time"] > 35) & (speed_4_df["time"] < 175)]

In [None]:
"""
This dataframe contains the flight values rows that have a constant 
speed of 4 and altitude of 25. 
To create a graph average the data for each flight
"""
speed_4_groupby_flight_df = speed_4_df.groupby(["flight"]).mean()
speed_4_groupby_flight_df

### Payload vs Power Usage

In [None]:
sns.regplot(data=speed_4_groupby_flight_df, x="payload", y="power_usage", order=2)
plt.title("Power Consumption with Increasing Load", fontsize=14)
plt.show()

### Velocity vs Power Usage 

In [None]:
sns.regplot(data=speed_4_groupby_flight_df, x="actual_velocity", y="power_usage", order=1)
plt.title("Power Consumption with Increasing Speed", fontsize=14)
plt.show()

### Wind_speed vs Power Usage

In [None]:
sns.regplot(data=speed_4_groupby_flight_df, x="wind_speed", y="power_usage", order=1)
plt.title("Power Consumption with Increasing Wind Speed", fontsize=14)
plt.show()

### Sample Data of Speed=10 and altitude=50

In [None]:
"""
Gets the flight values that satisfy the values of speed=10 and altitude=50
"""
flight_id_speed_10 = parameters_df[(parameters_df["speed"] == 10.0) & (parameters_df["altitude"] == "50")]["flight"]

In [None]:
"""
Makes a new dataframe with the flight values in flight_id_speed_10 and has the following columns (flight, payload, battery_current, wind_speed, etc)
"""
speed_10_df = flight_df[flight_df["flight"].isin(flight_id_speed_10)][["flight", "time", "payload", "wind_speed", "actual_velocity", "actual_acceleration", "battery_voltage", "battery_current"]]
speed_10_df["power_usage"] = speed_10_df["battery_current"] * speed_10_df["battery_voltage"]
speed_10_df

### Stasmodel Model 1

In [None]:
"""
Picked the columns that had a good correlation value in the heat map.
The p-values were good, so I do not have to create another StatsModel.
"""
x = sm.add_constant(speed_10_df[["payload", "actual_velocity", "actual_acceleration", "wind_speed"]], prepend=False)
y = speed_10_df["power_usage"]
model = sm.OLS(y, x).fit()
model_summary = model.summary()
print(model_summary)

### Create another dataset grouped by flight with the mean of every column

In [None]:
"""
Before 25 seconds the drone is ascending and after 110 secs the drone is descending so those values are outliers, so I am excluding them
"""
speed_10_df = speed_10_df[(speed_10_df["time"] > 25) & (speed_10_df["time"] < 110)]

In [None]:
"""
This dataframe contains the flight values rows that have a constant 
speed of 10 and altitude of 50 and averages the value of every column
"""
speed_10_groupby_flight_df = speed_10_df.groupby(["flight"]).mean()
speed_10_groupby_flight_df

## Payload vs Power Usage

In [None]:
sns.regplot(data=speed_10_groupby_flight_df, x="payload", y="power_usage", order=2)
plt.title("Payload V/S Power Usage", fontsize=14)