In [None]:
# Synthetic Dataset That Only Generates Simulated Flight Scores Based on Different Independent Variables

import pandas as pd
import numpy as np

# Set a seed for reproducibility
np.random.seed(0)

# Define the number of samples
num_samples = 100

# Generate the synthetic data
synthetic_data = {
    "Flight ID": np.arange(1, num_samples + 1),
    "Pilot ID": np.random.choice(range(1001, 1051), num_samples, replace=True),
    "Flight Duration (min)": np.random.uniform(5, 30, num_samples),
    "Battery Percentage at Landing (%)": np.random.uniform(15, 100, num_samples),
    "Distance From Pilot (m)": np.random.uniform(0, 500, num_samples),
    "Maximum Altitude (ft)": np.random.uniform(0, 400, num_samples),
    "Use of Return to Home (RTH)": np.random.choice(['Yes', 'No'], num_samples),
    "Number of Flights Submitted": np.random.randint(5, 50, num_samples),
}

# Convert the dictionary to a DataFrame
df = pd.DataFrame(synthetic_data)

# Randomize RTH Bonus between 0 and 0.1
df['RTHBonus'] = np.where(df['Use of Return to Home (RTH)'] == 'Yes', np.random.uniform(0, 0.1, num_samples), 0)

# Normalize the safety score calculation to ensure it falls between 0 and 1
df['Safety Score'] = (0.2 * df['Battery Percentage at Landing (%)'] / 100 -
                      0.1 * df['Flight Duration (min)'] / 30 -
                      0.0001 * df['Distance From Pilot (m)'] / 500 -
                      0.0001 * df['Maximum Altitude (ft)'] / 400 +
                      0.05 * df['Number of Flights Submitted'] / 50 +
                      df['RTHBonus'])

# Normalize the Safety Score to be between 0 and 1
df['Safety Score'] = (df['Safety Score'] - df['Safety Score'].min()) / (df['Safety Score'].max() - df['Safety Score'].min())

# Calculate the discount rate, applying a penalty for scores under 0.3
df['Discount Rate (%)'] = df['Safety Score'].apply(lambda x: x * 30 if x >= 0.3 else x * -30)

# Display the head of the DataFrame to verify the results
print(df.head())

# Save the DataFrame to an Excel file
df.to_excel('updated_synthetic_safety_score_dataset.xlsx', index=False)

print("Updated synthetic dataset generated and saved.")


   Flight ID  Pilot ID  Flight Duration (min)  \
0          1      1045              24.458637   
1          2      1048              28.739276   
2          3      1001              21.563172   
3          4      1004               5.339291   
4          5      1004              20.571152   

   Battery Percentage at Landing (%)  Distance From Pilot (m)  \
0                          46.067169                 4.620037   
1                          16.413378               416.519049   
2                          34.613099               492.164717   
3                          80.017494               351.747392   
4                          95.250499                90.815600   

   Maximum Altitude (ft) Use of Return to Home (RTH)  \
0             364.217889                         Yes   
1              33.188699                         Yes   
2             320.151385                          No   
3             350.852386                         Yes   
4              37.383783          