In [1]:
import pandas as pd
from joblib import load
import numpy as np


lr_model = load("model\\linear_regression.joblib")
ohe = load("model\\onehot_encoder.pkl")

In [2]:
new_data = pd.DataFrame([{
    'carrier': 'VJ',
    'departure_date': '2024-12-01',
    'origin': 'HAN',
    'destination': 'SGN'
}])

In [3]:
categorical_cols = ['season','carrier', 'origin', 'destination', 'year','quarter','month', 'day','day_of_week','trend_by_month']

def preprocess_input(df):
    df['departure_date'] = pd.to_datetime(df['departure_date'], errors='coerce')

    df['year'] = df['departure_date'].dt.year
    df['month'] = df['departure_date'].dt.month
    df['day'] = df['departure_date'].dt.day
    df['day_of_week'] = df['departure_date'].dt.dayofweek
    df['quarter'] = df['departure_date'].dt.quarter

    df['sin_month'] = np.sin(2 * np.pi * df['month'] / 12)
    df['cos_month'] = np.cos(2 * np.pi * df['month'] / 12)
    df['sin_dayofweek'] = np.sin(2 * np.pi * df['day_of_week'] / 7)
    df['cos_dayofweek'] = np.cos(2 * np.pi * df['day_of_week'] / 7)

    def get_season(m): 
        return ["winter", "spring", "summer", "autumn"][(m % 12) // 3]
    df['season'] = df['month'].apply(get_season)
    def trend_by_month(month):
        if 4 < month <= 9:
            return "on_dinh"
        elif month in [10, 11, 12]:
            return "tang"
        elif month in [1, 2]:
            return "ngang"
        else:
            return "giam"
    df['trend_by_month'] = df['month'].apply(trend_by_month)

    X_num = df[['sin_month', 'cos_month', 'sin_dayofweek', 'cos_dayofweek']]
    X_cat = ohe.transform(df[categorical_cols])
    X_cat_df = pd.DataFrame(X_cat, columns=ohe.get_feature_names_out(categorical_cols), index=df.index)

    return pd.concat([X_cat_df, X_num], axis=1)

In [4]:
X_encoded = preprocess_input(new_data)

In [5]:
y_pred = lr_model.predict(X_encoded)
print(f"✅ Dự đoán economy: {y_pred[0]:.4f} gram CO2")

✅ Dự đoán economy: 108175.5358 gram CO2
