In [31]:
import pandas as pd
import open3d as o3d
import numpy as np
import glob
import pandas as pd
import matplotlib.pyplot as plt

# # Get a list of all CSV file paths in the directory
# csv_files = glob.glob("drone_flight_dataset_dedrone/*.csv")

# # Read each CSV file and concatenate the dataframes
# dfs = []
# for file in csv_files:
#     df = pd.read_csv(file)
#     dfs.append(df)

# # Concatenate all dataframes into a single dataframe
# df = pd.concat(dfs, ignore_index=True)

# read in drone_flight_dataset_dedrone/2023-05-01_00-00-00_clean.csv
df = pd.read_csv('drone_flight_dataset_dedrone/2023-05-01_00-00-00_clean.csv')

df['SensorTime'] = pd.to_datetime(df['SensorTime'], format='mixed')

# Get unique drone IDs
drone_ids = df['DroneId'].unique()

print(len(drone_ids))

# Loop through each drone ID and assign a flight number to each unique flight
for drone_id in drone_ids:
    # Filter the dataframe for the current drone ID
    filtered_df = df[df['DroneId'] == drone_id]

    # sort filtered_df by SensorTime
    filtered_df = filtered_df.sort_values('SensorTime')

    # Convert the SensorTime column to numpy array
    timestamps = filtered_df['SensorTime'].values

    # Calculate the time differences between consecutive timestamps
    time_diff = np.diff(timestamps)

    # Find the indices where the time difference is greater than 30 minutes
    split_indices = np.where(time_diff > np.timedelta64(20, 'm'))[0] + 1

    # Assign flight IDs based on the split indices
    flight_ids = np.zeros(len(timestamps), dtype=int)
    current_flight_id = 1
    for idx in split_indices:
        flight_ids[idx:] = current_flight_id
        current_flight_id += 1

    # Add the flight_ids column to the DataFrame
    filtered_df['FlightNum'] = flight_ids
    df.loc[filtered_df.index, 'FlightNum'] = filtered_df['FlightNum']

# Create a new column called UniqueFlightId by concatenating DroneId and FlightNum
df['UniqueFlightId'] = df['DroneId'] + '_' + df['FlightNum'].astype(int).astype(str)


1456


We now have unique flight paths. Filter out those with fewer than 100 points

In [None]:
# Loop through each flight ID
flight_ids = df['UniqueFlightId'].unique()

for flight_id in flight_ids:
    # Filter the dataframe for the current flight ID
    filtered_df_flight = df[df['UniqueFlightId'] == flight_id]

    if len(filtered_df_flight) < 100:
        df = df.drop(filtered_df_flight.index)
        continue

    # Describe the SensorTime column
    print(filtered_df_flight['SensorTime'].describe())

    # Plot the time series data with color based on FlightId
    plt.scatter(filtered_df_flight['DetectionLatitude'], filtered_df_flight['DetectionLongitude'], c=filtered_df_flight['Altitude'], cmap='viridis', s=10)
    plt.xlabel('Longitude')
    plt.ylabel('Latitude')
    plt.title(f'Altitude vs. Time (Flight ID: {flight_id})')
    plt.show()

print(len(df['UniqueFlightId'].unique()))

# Generate synthetic flight data

In [45]:
import json
import pandas as pd
import numpy as np
import geopandas as gpd
# Load GeoJSON data
truedata_df = gpd.read_file('drug.geojson')

# Step 1: Generate a sequence of numbers for latitude and longitude
latitudes = truedata_df['geometry'].y  # Replace 30, 40 with the range of latitudes you want
longitudes = truedata_df['geometry'].x  # Replace -100, -90 with the range of longitudes you want

# Step 2: Generate random altitudes
altitudes = np.random.uniform(100, 500, 51)  # Replace 100, 500 with the range of altitudes you want

# Step 3: Generate a sequence of timestamps
timestamps = pd.date_range('2024-05-04', periods=51, freq='S')  # Replace '2022-01-01' with the start date you want

# Step 4: Generate random drone types and drone IDs
drone_types = np.random.choice(['Mavic Mini'], 51)  # Replace 'Type1', 'Type2', 'Type3' with the drone types you have
drone_ids = np.random.choice(['eiue83jecf1d243944915ed78b4cehen8'], 51)  # Replace 1, 11 with the range of drone IDs you have

# Step 5: Generate flight numbers and unique flight IDs
unique_flight_ids = np.ones(51)  # 1000 ones

# Step 6: Combine all these arrays into a DataFrame
df = pd.DataFrame({
    'DetectionLatitude': latitudes,
    'DetectionLongitude': longitudes,
    'Altitude': altitudes,
    'SensorTime': timestamps,
    'DroneType': drone_types,
    'DroneId': drone_ids,
    'UniqueFlightId': unique_flight_ids
})
good_drone_df = df.copy()


# create the noisy DataFrames
noisy_dfs = []  # List to hold the noisy DataFrames
for i in range(1, 6):
    # Create a noisy version of good_drone_df for a different drone_id and flight_id
    noisy_drone_df = good_drone_df.copy()
    noisy_drone_df['DetectionLatitude'] = noisy_drone_df['DetectionLatitude'] + np.random.normal(0, 0.001, len(noisy_drone_df))
    noisy_drone_df['DetectionLongitude'] = noisy_drone_df['DetectionLongitude'] + np.random.normal(0, 0.001, len(noisy_drone_df))
    noisy_drone_df['Altitude'] = noisy_drone_df['Altitude'] + np.random.normal(0, 0.1, len(noisy_drone_df))
    noisy_drone_df['SensorTime'] = noisy_drone_df['SensorTime'] + pd.to_timedelta(np.random.normal(0, 1, len(noisy_drone_df)), unit='s')
    noisy_drone_df['DroneId'] = 'eiue83jecf1d243944915ed78b4cehen' + str(i + 8)  # Change the drone ID
    noisy_drone_df['UniqueFlightId'] = i + 1  # Change the flight ID

    # Append the noisy DataFrame to the list
    noisy_dfs.append(noisy_drone_df)

# Combine good_drone_df and all the noisy DataFrames
df = pd.concat([good_drone_df] + noisy_dfs)


  timestamps = pd.date_range('2024-05-04', periods=51, freq='S')  # Replace '2022-01-01' with the start date you want


In [11]:
from scipy.special import comb
import numpy as np
import pandas as pd
import random

# Assume start_end_points is a list of tuples, where each tuple is (start_lat, start_long, end_lat, end_long)
start_end_points = [((37.790123, -122.411469), (37.805595, -122.430852)),
                    ((37.789532, -122.418833), (37.856981, -122.482052)),
                    ((37.799797, -122.479973), (37.857664, -122.423856)),
                    ((37.766944, -122.419872), (37.837696, -122.313873))
                   ]

def bernstein_poly(i, n, t):
    """
    The Bernstein polynomial of n, i as a function of t
    """
    return comb(n, i) * ( t**(n-i) ) * (1 - t)**i

def bezier_curve(points, nTimes=1000):
    """
    Given a set of control points, return the bezier curve defined by the control points.
    points should be a list of lists, or list of tuples such as [ [1,1], 
    [2,3], [4,5], ..[Xn, Yn] ]
    nTimes is the number of time steps, defaults to 1000
    See http://processingjs.nihongoresources.com/bezierinfo/
    """
    nPoints = len(points)
    xPoints = np.array([p[0] for p in points])
    yPoints = np.array([p[1] for p in points])

    t = np.linspace(0.0, 1.0, nTimes)

    polynomial_array = np.array([ bernstein_poly(i, nPoints-1, t) for i in range(0, nPoints)   ])

    xvals = np.dot(xPoints, polynomial_array)
    yvals = np.dot(yPoints, polynomial_array)

    return xvals, yvals

# Define start, control and end points
start_end_points = [
    ((37.775473, -122.418969), (37.846549, -122.316653), (37.826931, -122.423065)),
    ((37.776707, -122.419275), (37.846549, -122.313653), (37.825976, -122.422246)),
    ((37.776066, -122.418811), (37.846549, -122.314653), (37.827101, -122.423674)),
    ((37.790123, -122.411469), (37.795000, -122.440000), (37.805595, -122.430852)),
    ((37.789532, -122.418833), (37.836981, -122.492052), (37.856981, -122.482052)),
    ((37.799797, -122.479973), (37.847664, -122.333856), (37.857664, -122.423856)),
    ((37.766944, -122.419872), (37.637696, -122.233873), (37.837696, -122.313873)),
    ((37.793456, -122.421824), (37.805595, -122.430852), (37.790123, -122.411469)),
    ((37.789532, -122.418833), (37.856981, -122.482052), (37.789532, -122.418833)),
    ((37.799797, -122.479973), (37.857664, -122.423856), (37.799797, -122.479973)),
    ((37.766944, -122.419872), (37.837696, -122.313873), (37.766944, -122.419872)) 
                   ]

# create 50 more random drone flights within the same area as above
# Define the bounds of the existing points
min_lat = 37.70
max_lat = 37.85
min_long = -122.48
max_long = -122.30
min_ctrl_lat = 37.60
max_ctrl_lat = 37.90
min_ctrl_long = -122.60
max_ctrl_long = -122.20

# Generate 50 random tuples within the bounds
random_points = []
for _ in range(50):
    random_lat_start = random.uniform(min_lat, max_lat)
    random_long_start = random.uniform(min_long, max_long)
    random_lat_end = random.uniform(min_lat, max_lat)
    random_long_end = random.uniform(min_long, max_long)
    random_lat_ctrl = random.uniform(min_ctrl_lat, max_ctrl_lat)
    random_long_ctrl = random.uniform(min_ctrl_long, max_ctrl_long)
    random_points.append(((random_lat_start, random_long_start), (random_lat_ctrl, random_long_ctrl), (random_lat_end, random_long_end)))

# Concatenate the random points with start_end_points
start_end_points += random_points


df = pd.DataFrame()

for i, ((start_lat, start_long), (control_lat, control_long), (end_lat, end_long)) in enumerate(start_end_points):
    # Generate points between start and end coordinates using Bezier curve
    latitudes, longitudes = bezier_curve([(start_lat, start_long), (control_lat, control_long), (end_lat, end_long)], 200)

    run_df = pd.DataFrame()
    run_df['DetectionLatitude'] = latitudes
    run_df['DetectionLongitude'] = longitudes
    # run_df['Altitude'] = np.linspace(100, 500, 200)
    run_df['UniqueFlightId'] = i + 8  # Change the flight ID for each run_df
    run_df['SensorTime'] = pd.date_range('2024-05-04', periods=200, freq='s')
    run_df['DroneType'] = 'Mavic Mini'
    run_df['DroneId'] = 'eiue83jecf1d243944915ed78b4cehen' + str(i + 8)  # Change the drone ID for each run_df

    run_df = run_df[['DetectionLatitude', 'DetectionLongitude', 'UniqueFlightId', 'SensorTime', 'DroneType', 'DroneId']]

    df = pd.concat([df, run_df])

df.to_csv('synthetic_data_full.csv', index=False)

In [2]:
import geopandas as gpd
# save the df to a new csv file
# df.to_csv('synthetic_data.csv', index=False)

# flight_id_dict = run_frechet(df, flight_id=8)



# Create a GeoDataFrame from the latitude and longitude columns
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['DetectionLongitude'], df['DetectionLatitude']))

# Convert the GeoDataFrame to GeoJSON format
geojson = gdf.to_file('synthetic_data_full.geojson', driver='GeoJSON')


In [5]:
import keplergl

# Create a Kepler.gl map
map_1 = keplergl.KeplerGl(height=800)
map_1.add_data(data=df, name='synthetic_data')

# Save the map to an HTML file
map_1.save_to_html(file_name='synthetic_data_map.html')

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter
Map saved to synthetic_data_map.html!
