### Importing Dataset

In [8]:
import numpy as np
import pandas as pd

data = pd.read_csv('flights.csv')

print(data.shape)

print(data.head())

(257896, 28)
   flight  time  wind_speed  wind_angle  battery_voltage  battery_current  \
0       1   0.0         0.1        12.0        24.222174         0.087470   
1       1   0.2         0.1         3.0        24.227180         0.095421   
2       1   0.3         0.1       352.0        24.225929         0.095421   
3       1   0.5         0.1       354.0        24.224678         0.095421   
4       1   0.6         0.1       359.0        24.210905         0.079518   

   position_x  position_y  position_z  orientation_x  ...  angular_z  \
0  -79.782396   40.458047  269.332402       0.001772  ...   0.006815   
1  -79.782396   40.458047  269.332056       0.001768  ...   0.002034   
2  -79.782396   40.458047  269.333081       0.001768  ...  -0.000874   
3  -79.782396   40.458047  269.334648       0.001775  ...   0.002443   
4  -79.782396   40.458047  269.336178       0.001775  ...  -0.006425   

   linear_acceleration_x  linear_acceleration_y  linear_acceleration_z  speed  \
0         

  data = pd.read_csv('flights.csv')


In [9]:
print(data.columns)
print(data.shape)

Index(['flight', 'time', 'wind_speed', 'wind_angle', 'battery_voltage',
       'battery_current', 'position_x', 'position_y', 'position_z',
       'orientation_x', 'orientation_y', 'orientation_z', 'orientation_w',
       'velocity_x', 'velocity_y', 'velocity_z', 'angular_x', 'angular_y',
       'angular_z', 'linear_acceleration_x', 'linear_acceleration_y',
       'linear_acceleration_z', 'speed', 'payload', 'altitude', 'date',
       'time_day', 'route'],
      dtype='object')
(257896, 28)


In [10]:
print(data.columns.isna())

[False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False]


### Pre Processing

In [11]:
data.drop(columns=['flight','battery_voltage','battery_current','wind_angle',
        'orientation_x', 'orientation_y', 'orientation_z', 'orientation_w', 'altitude',
        'linear_acceleration_x', 'linear_acceleration_y',
        'linear_acceleration_z','date','time_day','route'],inplace=True)
print(data.columns)

Index(['time', 'wind_speed', 'position_x', 'position_y', 'position_z',
       'velocity_x', 'velocity_y', 'velocity_z', 'angular_x', 'angular_y',
       'angular_z', 'speed', 'payload'],
      dtype='object')


In [12]:
# data['velocity'] = np.sqrt(
#     data['velocity_x']**2 + data['velocity_y']**2 + data['velocity_z']**2
# )

data.drop(columns=['velocity_x', 'velocity_y', 'velocity_z'], inplace=True)

print(data.columns)
print(data.shape)

Index(['time', 'wind_speed', 'position_x', 'position_y', 'position_z',
       'angular_x', 'angular_y', 'angular_z', 'speed', 'payload'],
      dtype='object')
(257896, 10)


In [13]:
data['angular'] = np.sqrt(
    data['angular_x']**2 + data['angular_y']**2 + data['angular_z']**2
)

data.drop(columns=['angular_x', 'angular_y', 'angular_z'], inplace=True)

print(data.columns)
print(data.shape)

Index(['time', 'wind_speed', 'position_x', 'position_y', 'position_z', 'speed',
       'payload', 'angular'],
      dtype='object')
(257896, 8)


In [14]:
data['distance'] = data['time'] * data['speed']

print(data.columns)
print(data.shape)

Index(['time', 'wind_speed', 'position_x', 'position_y', 'position_z', 'speed',
       'payload', 'angular', 'distance'],
      dtype='object')
(257896, 9)


### Training Model

- LinearRegression
- RandomForestRegressor

In [15]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import joblib

X = data[['distance','wind_speed', 'position_x', 'position_y', 'position_z','speed', 'payload', 'angular']]
y = data['time']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Training set shape:", X_train.shape)
print("Test set shape:", X_test.shape)

Training set shape: (206316, 8)
Test set shape: (51580, 8)


In [16]:
print("\n=== Linear Regression ===")
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

lr_pred = lr_model.predict(X_test)

lr_mse = mean_squared_error(y_test, lr_pred)
lr_rmse = np.sqrt(lr_mse)
lr_mae = mean_absolute_error(y_test, lr_pred)
lr_r2 = r2_score(y_test, lr_pred)

print(f"Linear Regression Results:")
print(f"MSE: {lr_mse:.4f}")
print(f"RMSE: {lr_rmse:.4f}")
print(f"MAE: {lr_mae:.4f}")
print(f"R² Score: {lr_r2:.4f}")


=== Linear Regression ===
Linear Regression Results:
MSE: 652.6033
RMSE: 25.5461
MAE: 18.6404
R² Score: 0.8354


In [17]:
print("\n=== Random Forest Regression ===")
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

rf_pred = rf_model.predict(X_test)

rf_mse = mean_squared_error(y_test, rf_pred)
rf_rmse = np.sqrt(rf_mse)
rf_mae = mean_absolute_error(y_test, rf_pred)
rf_r2 = r2_score(y_test, rf_pred)

print(f"Random Forest Results:")
print(f"MSE: {rf_mse:.4f}")
print(f"RMSE: {rf_rmse:.4f}")
print(f"MAE: {rf_mae:.4f}")
print(f"R² Score: {rf_r2:.4f}")

feature_importance = pd.DataFrame({
  'feature': X.columns,
  'importance': rf_model.feature_importances_
}).sort_values('importance', ascending=False)

print("\nFeature Importance (Random Forest):")
print(feature_importance)


=== Random Forest Regression ===
Random Forest Results:
MSE: 46.9257
RMSE: 6.8502
MAE: 0.9261
R² Score: 0.9882

Feature Importance (Random Forest):
      feature  importance
0    distance    0.593652
4  position_z    0.146361
5       speed    0.142829
2  position_x    0.092539
7     angular    0.017731
3  position_y    0.005294
1  wind_speed    0.001505
6     payload    0.000088


In [18]:
print("\n=== XGBoost Regression ===")
import xgboost as xgb

xgb_model = xgb.XGBRegressor(n_estimators=100, random_state=42, verbosity=0)
xgb_model.fit(X_train, y_train)

xgb_pred = xgb_model.predict(X_test)

xgb_mse = mean_squared_error(y_test, xgb_pred)
xgb_rmse = np.sqrt(xgb_mse)
xgb_mae = mean_absolute_error(y_test, xgb_pred)
xgb_r2 = r2_score(y_test, xgb_pred)

print(f"XGBoost Results:")
print(f"MSE: {xgb_mse:.4f}")
print(f"RMSE: {xgb_rmse:.4f}")
print(f"MAE: {xgb_mae:.4f}")
print(f"R² Score: {xgb_r2:.4f}")

xgb_feature_importance = pd.DataFrame({
    'feature': X.columns,
    'importance': xgb_model.feature_importances_
}).sort_values('importance', ascending=False)

print("\nFeature Importance (XGBoost):")
print(xgb_feature_importance)


=== XGBoost Regression ===
XGBoost Results:
MSE: 37.7481
RMSE: 6.1439
MAE: 1.7537
R² Score: 0.9905

Feature Importance (XGBoost):
      feature  importance
0    distance    0.395982
5       speed    0.262152
4  position_z    0.162088
2  position_x    0.144223
3  position_y    0.015761
7     angular    0.012668
6     payload    0.004297
1  wind_speed    0.002829


### Exporting the better model

In [19]:
model_scores = {
    "Random Forest": rf_r2,
    "Linear Regression": lr_r2,
    "XGBoost": xgb_r2
}

best_model_name = max(model_scores, key=model_scores.get)

if best_model_name == "Random Forest":
    best_model = rf_model
elif best_model_name == "Linear Regression":
    best_model = lr_model
else:
    best_model = xgb_model

print(f"\nSaving {best_model_name} model...")
joblib.dump(best_model, 'time_prediction_model.pkl')
print(f"Best model ({best_model_name}) saved as 'time_prediction_model.pkl'")

feature_names = list(X.columns)
joblib.dump(feature_names, 'feature_names.pkl')
print(f"Feature names saved: {feature_names}")


Saving XGBoost model...
Best model (XGBoost) saved as 'time_prediction_model.pkl'
Feature names saved: ['distance', 'wind_speed', 'position_x', 'position_y', 'position_z', 'speed', 'payload', 'angular']
