In [22]:
from joblib import load
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import numpy as np
import plotly.graph_objs as go
import plotly.io as pio
import pandas as pd
import json

In [2]:
dt = load('/Users/timurchiks/Desktop/flight_price_predictor/models/joblib/dt_v1.joblib')
gb = load('/Users/timurchiks/Desktop/flight_price_predictor/models/joblib/gb_v1.joblib')
rf = load('/Users/timurchiks/Desktop/flight_price_predictor/models/joblib/rf_v1.joblib')

lm = load('/Users/timurchiks/Desktop/flight_price_predictor/models/joblib/lm_v1.joblib')
lasso = load('/Users/timurchiks/Desktop/flight_price_predictor/models/joblib/lasso_v1.joblib')
ridge = load('/Users/timurchiks/Desktop/flight_price_predictor/models/joblib/ridge_v1.joblib')

In [3]:
tree_data = pd.read_csv('/Users/timurchiks/Desktop/flight_price_predictor/data/processed/prepared_tree.csv')
linear_data = pd.read_csv('/Users/timurchiks/Desktop/flight_price_predictor/data/processed/prepared_linear.csv')

In [4]:
x_tree = tree_data.drop(columns=['Price'])
y_tree = tree_data['Price']

x_linear = linear_data.drop(columns=['Price'])
y_linear = linear_data['Price']

In [11]:
x_tree_train, x_tree_test, y_tree_train, y_tree_test = train_test_split(x_tree, y_tree, test_size=0.2, random_state = 42)
x_linear_train, x_linear_test, y_linear_train, y_linear_test = train_test_split(x_linear, y_linear, test_size=0.2, random_state=52)

In [13]:
dt_pred = dt.predict(x_tree_test)

dt_metrics = {
    'MAE' : mean_absolute_error(y_tree_test, dt_pred),
    'RMSE' : np.sqrt(mean_squared_error(y_tree_test, dt_pred)),
    'R2' : r2_score(y_tree_test, dt_pred)
}

print("Mean Squared Error (MSE):", mean_squared_error(y_tree_test, dt_pred))
print("Mean Absolute Error (MAE):", mean_absolute_error(y_tree_test, dt_pred))
print("test R² Score:", r2_score(y_tree_test, dt_pred))

Mean Squared Error (MSE): 9276531.746857462
Mean Absolute Error (MAE): 2195.478023773413
test R² Score: 0.8753917462277203




In [14]:
gb_pred = gb.predict(x_tree_test)

gb_metrics = {
    'MAE' : mean_absolute_error(y_tree_test, gb_pred),
    'RMSE' : np.sqrt(mean_squared_error(y_tree_test, gb_pred)),
    'R2' : r2_score(y_tree_test, gb_pred)
}

print("Mean Squared Error (MSE):", mean_squared_error(y_tree_test, gb_pred))
print("Mean Absolute Error (MAE):", mean_absolute_error(y_tree_test, gb_pred))
print("test R² Score:", r2_score(y_tree_test, gb_pred))

Mean Squared Error (MSE): 1913352.2529558144
Mean Absolute Error (MAE): 1046.7480761250044
test R² Score: 0.9742986398798399




In [15]:
rf_pred = rf.predict(x_tree_test)

rf_metrics = {
    'MAE' : mean_absolute_error(y_tree_test, rf_pred),
    'RMSE' : np.sqrt(mean_squared_error(y_tree_test, rf_pred)),
    'R2' : r2_score(y_tree_test, rf_pred)
}

print("Mean Squared Error (MSE):", mean_squared_error(y_tree_test, rf_pred))
print("Mean Absolute Error (MAE):", mean_absolute_error(y_tree_test, rf_pred))
print("test R² Score:", r2_score(y_tree_test, rf_pred))

Mean Squared Error (MSE): 2029412.2514580276
Mean Absolute Error (MAE): 977.0111852707178
test R² Score: 0.9727396484225991




In [16]:
lm_pred = lm.predict(x_linear_test)

lm_metrics = {
    'MAE' : mean_absolute_error(y_linear_test, lm_pred),
    'RMSE' : np.sqrt(mean_squared_error(y_linear_test, lm_pred)),
    'R2' : r2_score(y_linear_test, lm_pred)
}

print("Mean Squared Error (MSE):", mean_squared_error(y_linear_test, lm_pred))
print("Mean Absolute Error (MAE):", mean_absolute_error(y_linear_test, lm_pred))
print("test R² Score:", r2_score(y_linear_test, lm_pred))

Mean Squared Error (MSE): 21258027.86288416
Mean Absolute Error (MAE): 3349.5555555555557
test R² Score: 0.657383509237528




In [18]:
lasso_pred = lasso.predict(x_linear_test)

lasso_metrics = {
    'MAE' : mean_absolute_error(y_linear_test, lasso_pred),
    'RMSE' : np.sqrt(mean_squared_error(y_linear_test, lasso_pred)),
    'R2' : r2_score(y_linear_test, lasso_pred)
}

print("Mean Squared Error (MSE):", mean_squared_error(y_linear_test, lasso_pred))
print("Mean Absolute Error (MAE):", mean_absolute_error(y_linear_test, lasso_pred))
print("test R² Score:", r2_score(y_linear_test, lasso_pred))

Mean Squared Error (MSE): 21273921.86875854
Mean Absolute Error (MAE): 3355.197897127942
test R² Score: 0.6571273449050714




In [21]:
ridge_pred = ridge.predict(x_linear_test)

ridge_metrics = {
    'MAE' : mean_absolute_error(y_linear_test, ridge_pred),
    'RMSE' : np.sqrt(mean_squared_error(y_linear_test, ridge_pred)),
    'R2' : r2_score(y_linear_test, ridge_pred)
}

print("Mean Squared Error (MSE):", mean_squared_error(y_linear_test, ridge_pred))
print("Mean Absolute Error (MAE):", mean_absolute_error(y_linear_test, ridge_pred))
print("test R² Score:", r2_score(y_linear_test, ridge_pred))

Mean Squared Error (MSE): 21279747.89171568
Mean Absolute Error (MAE): 3355.7613596350557
test R² Score: 0.6570334466585571




In [23]:
def plot_predictions(models_preds, y_true):
    fig = go.Figure()

    for model_name, y_pred in models_preds.items():
        fig.add_trace(go.Scatter(
            x=y_true,
            y=y_pred,
            mode='markers',
            name=model_name,
            opacity=0.6
        ))

    min_val = min(y_true.min(), *(pred.min() for pred in models_preds.values()))
    max_val = max(y_true.max(), *(pred.max() for pred in models_preds.values()))

    fig.add_trace(go.Scatter(
        x=[min_val, max_val],
        y=[min_val, max_val],
        mode='lines',
        line=dict(color='red', dash='dash'),
        name='Идеальное предсказание'
    ))

    fig.update_layout(
        title='Сравнение предсказаний моделей',
        xaxis_title='Истинные значения',
        yaxis_title='Предсказания',
        width=800,
        height=600
    )

    fig.show()


In [24]:
tree_models_preds = {
    'Random Forest': rf_pred,
    'Gradient Boosting': gb_pred,
    'Descision Tree': dt_pred
}

plot_predictions(tree_models_preds, y_tree_test)

In [25]:
linear_models_preds = {
    'Linear Regression': lm_pred,
    'Lasso': lasso_pred, 
    'Ridge': ridge_pred
}

plot_predictions(linear_models_preds, y_linear_test)

In [28]:
def plot_predictions(models_preds, y_true, bins=50):
    fig = go.Figure()

    for model_name, y_pred in models_preds.items():
        reiduals = y_true - y_pred
        fig.add_trace(go.Histogram(
            x=reiduals,
            nbinsx=bins,
            name=model_name,
            opacity=0.6
        ))

    fig.update_layout(
        barmode='overlay',
        title='residuals histogram',
        xaxis_title='Остатки (y_true - y_pred)',
        yaxis_title='Частота',
        width=800,
        height=600
    )

    fig.show()

In [29]:
plot_predictions(tree_models_preds, y_tree_test)

In [30]:
plot_predictions(linear_models_preds, y_linear_test)

In [31]:
with open("/Users/timurchiks/Desktop/flight_price_predictor/models/metrics/dt_metrics.json", "w") as f:
    json.dump(dt_metrics, f)

with open("/Users/timurchiks/Desktop/flight_price_predictor/models/metrics/gb_metrics.json", "w") as f:
    json.dump(gb_metrics, f)

with open("/Users/timurchiks/Desktop/flight_price_predictor/models/metrics/rf_metrics.json", "w") as f:
    json.dump(rf_metrics, f)

with open("/Users/timurchiks/Desktop/flight_price_predictor/models/metrics/lm_metrics.json", "w") as f:
    json.dump(lm_metrics, f)

with open("/Users/timurchiks/Desktop/flight_price_predictor/models/metrics/lasso_metrics.json", "w") as f:
    json.dump(lasso_metrics, f)

with open("/Users/timurchiks/Desktop/flight_price_predictor/models/metrics/ridge_metrics.json", "w") as f:
    json.dump(ridge_metrics, f)