In [13]:
import pandas as pd
import numpy as np

pd.set_option("mode.copy_on_write", True)

## Load Data Sets
compressed_df = pd.read_parquet(
    r"C:\Users\DELL\Downloads\ML Project\enivornmental_impact_of_aviation\compressed_aviation_traffic_data.parquet"
)
v4 = pd.read_parquet(
    r"C:\Users\DELL\Downloads\ML Project\enivornmental_impact_of_aviation\cleaned_aviation_data_with_outliers_v4.parquet"
)
v3 = pd.read_parquet(
    r"C:\Users\DELL\Downloads\ML Project\enivornmental_impact_of_aviation\cleaned_aviation_data_v3.parquet"
)
v2 = pd.read_parquet(
    r"C:\Users\DELL\Downloads\ML Project\enivornmental_impact_of_aviation\cleaned_aviation_data_v2.parquet"
)
v1 = pd.read_parquet(
    r"C:\Users\DELL\Downloads\ML Project\enivornmental_impact_of_aviation\cleaned_aviation_data_v1.parquet"
)

In [14]:
compressed_df = compressed_df[compressed_df["seats"] > 0]  # Avoid divide-by-zero
# creating a new variable (fuel per seat)
compressed_df["fuel_per_seat"] = compressed_df["fuel_burn"] / compressed_df["seats"]
compressed_df["d"] = compressed_df["distance_km"]
compressed_df["d2"] = compressed_df["d"] ** 2
compressed_df["d3"] = compressed_df["d"] ** 3
df_model = compressed_df[["fuel_per_seat", "d", "d2", "d3"]].dropna()
X = df_model[["d", "d2", "d3"]]
y = df_model["fuel_per_seat"]

In [15]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error

model = LinearRegression()
model.fit(X, y)
y_pred = model.predict(X)

r2 = r2_score(y, y_pred)
mse = mean_squared_error(y, y_pred)
# msfe = mean_squared_error(y, y_pred, squared=False)

print(f"R²: {r2:.4f}")
print(f"MSE: {mse:.4f}")
# print(f"MSFE (RMSE): {msfe:.4f}")

R²: 0.0148
MSE: 544227.7876


In [16]:
compressed_df["manual_pred"] = (
    9.07
    + 1.65e-2 * compressed_df["d"]
    + 9.43e-7 * compressed_df["d2"]
    - 4.76e-12 * compressed_df["d3"]
)
r2_manual = r2_score(y, compressed_df["manual_pred"])
mse_manual = mean_squared_error(y, compressed_df["manual_pred"])
# rmse_manual = mean_squared_error(y, df['manual_pred'], squared=False)

print(f"[Manual Coefficients] R²: {r2_manual:.4f}, MSE: {mse_manual:.4f}")

[Manual Coefficients] R²: 0.0086, MSE: 547673.9897


### Weighted Regression for V4

In [17]:
v4 = v4[v4["seats"] > 0]
# creating a new variable (fuel per seat)
v4["fuel_per_seat"] = v4["fuel_burn"] / v4["seats"]
v4["d"] = v4["distance_km"]
v4["d2"] = v4["d"] ** 2
v4["d3"] = v4["d"] ** 3
df_model = v4[["fuel_per_seat", "d", "d2", "d3"]].dropna()
X = df_model[["d", "d2", "d3"]]
y = df_model["fuel_per_seat"]
model = LinearRegression()
model.fit(X, y)
y_pred = model.predict(X)
r2 = r2_score(y, y_pred)
mse = mean_squared_error(y, y_pred)
print(f"R²: {r2:.4f}")
print(f"MSE: {mse:.4f}")
v4["manual_pred"] = 9.07 + 1.65e-2 * v4["d"] + 9.43e-7 * v4["d2"] - 4.76e-12 * v4["d3"]
r2_manual = r2_score(y, v4["manual_pred"])
mse_manual = mean_squared_error(y, v4["manual_pred"])
print(f"[Manual Coefficients] R²: {r2_manual:.4f}, MSE: {mse_manual:.4f}")

R²: 0.0147
MSE: 551947.3377
[Manual Coefficients] R²: 0.0085, MSE: 555441.6309


### Weighted Regression for v3

In [18]:
v3 = v3[v3["seats"] > 0]
# creating a new variable (fuel per seat)
v3["fuel_per_seat"] = v3["fuel_burn"] / v3["seats"]
v3["d"] = v3["distance_km"]
v3["d2"] = v3["d"] ** 2
v3["d3"] = v3["d"] ** 3
df_model = v3[["fuel_per_seat", "d", "d2", "d3"]].dropna()
X = df_model[["d", "d2", "d3"]]
y = df_model["fuel_per_seat"]
model = LinearRegression()
model.fit(X, y)
y_pred = model.predict(X)
r2 = r2_score(y, y_pred)
mse = mean_squared_error(y, y_pred)
print(f"R²: {r2:.4f}")
print(f"MSE: {mse:.4f}")
v3["manual_pred"] = 9.07 + 1.65e-2 * v3["d"] + 9.43e-7 * v3["d2"] - 4.76e-12 * v3["d3"]
r2_manual = r2_score(y, v3["manual_pred"])
mse_manual = mean_squared_error(y, v3["manual_pred"])
print(f"[Manual Coefficients] R²: {r2_manual:.4f}, MSE: {mse_manual:.4f}")

R²: 0.0147
MSE: 551947.3377
[Manual Coefficients] R²: 0.0085, MSE: 555441.6309


### Weighted Regression for v2

In [19]:
v2 = v2[v2["seats"] > 0]
# creating a new variable (fuel per seat)
v2["fuel_per_seat"] = v2["fuel_burn"] / v2["seats"]
v2["d"] = v2["distance_km"]
v2["d2"] = v2["d"] ** 2
v2["d3"] = v2["d"] ** 3
df_model = v2[["fuel_per_seat", "d", "d2", "d3"]].dropna()
X = df_model[["d", "d2", "d3"]]
y = df_model["fuel_per_seat"]
model = LinearRegression()
model.fit(X, y)
y_pred = model.predict(X)
r2 = r2_score(y, y_pred)
mse = mean_squared_error(y, y_pred)
print(f"R²: {r2:.4f}")
print(f"MSE: {mse:.4f}")
v2["manual_pred"] = 9.07 + 1.65e-2 * v2["d"] + 9.43e-7 * v2["d2"] - 4.76e-12 * v2["d3"]
r2_manual = r2_score(y, v2["manual_pred"])
mse_manual = mean_squared_error(y, v2["manual_pred"])
print(f"[Manual Coefficients] R²: {r2_manual:.4f}, MSE: {mse_manual:.4f}")

R²: 0.0147
MSE: 551947.3377
[Manual Coefficients] R²: 0.0085, MSE: 555441.6309


### Weighted Regression for v1

In [20]:
v1 = v1[v1["seats"] > 0]
# creating a new variable (fuel per seat)
v1["fuel_per_seat"] = v1["fuel_burn"] / v1["seats"]
v1["d"] = v1["distance_km"]
v1["d2"] = v1["d"] ** 2
v1["d3"] = v1["d"] ** 3
df_model = v1[["fuel_per_seat", "d", "d2", "d3"]].dropna()
X = df_model[["d", "d2", "d3"]]
y = df_model["fuel_per_seat"]
model = LinearRegression()
model.fit(X, y)
y_pred = model.predict(X)
r2 = r2_score(y, y_pred)
mse = mean_squared_error(y, y_pred)
print(f"R²: {r2:.4f}")
print(f"MSE: {mse:.4f}")
v1["manual_pred"] = 9.07 + 1.65e-2 * v1["d"] + 9.43e-7 * v1["d2"] - 4.76e-12 * v1["d3"]
r2_manual = r2_score(y, v1["manual_pred"])
mse_manual = mean_squared_error(y, v1["manual_pred"])
print(f"[Manual Coefficients] R²: {r2_manual:.4f}, MSE: {mse_manual:.4f}")

R²: 0.0147
MSE: 551947.3377
[Manual Coefficients] R²: 0.0085, MSE: 555441.6309
