In [1]:
import sys
import os
import warnings
import sklearn
import pandas as pd
pd.set_option('display.max_columns',None)
sklearn.set_config(transform_output = "pandas")
warnings.filterwarnings("ignore")


# Add the parent directory (project root) to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

from utils.feature_utils import (
    is_north, find_part_of_month, part_of_day,
    make_month_object, remove_duration, have_info,
    duration_category
)
from utils.rbf import RBFPercentileSimilarity
from sklearn.pipeline import Pipeline
from feature_engine.datetime import DatetimeFeatures


In [15]:
dtoj_transformer = Pipeline(steps = [
    ("dt",DatetimeFeatures(features_to_extract = ['weekend'],yearfirst = True,format = "mixed"))
])

In [16]:
import os
import joblib
# Define the path to the column transformer
column_transformer_path = os.path.join("..", "artifacts", "column_transformer.joblib")
models_path = os.path.join("..", "artifacts", "models")

xgboostmodel_path = os.path.join(models_path,"xgb_flight_price_model.joblib")
randomforestmodel_path = os.path.join(models_path,"randomforest_flight_price_model.joblib")

# Load the column transformer
column_transformer = joblib.load(column_transformer_path)
print("✅ Column_transformer loaded successfully.")
xgb_model = joblib.load(xgboostmodel_path)
randomforest_model = joblib.load(randomforestmodel_path)

print("✅ Models loaded successfully.")


✅ Column_transformer loaded successfully.
✅ Models loaded successfully.


In [17]:
import pandas as pd
from datetime import datetime

# 🚀 Take inputs
airline = input("Enter airline: ")
source = input("Enter source: ")
destination = input("Enter destination: ")
duration = int(input("Enter duration in minutes (e.g. 120): "))
total_stops = int(input("Enter total stops (e.g. 0,1,2..): "))
additional_info = input("Enter additional info: ")
dtoj_day = int(input("Enter day of journey (1-31): "))
dtoj_month = int(input("Enter month of journey (1-12): "))
dept_time_hour = int(input("Enter departure hour (0-23): "))

# ✅ Set dtoj_year as 2019
dtoj_year = 2019


# 🔗 Create single-row dataframe including dtoj_year
input_df = pd.DataFrame({
    'airline': [airline],
    'source': [source],
    'destination': [destination],
    'duration': [duration],
    'total_stops': [total_stops],
    'additional_info': [additional_info],
    'dep_time_hour': [dept_time_hour],
    'dtoj_day': [dtoj_day],
    'dtoj_month': [dtoj_month],
    'dtoj_year': [dtoj_year],  # added year here
})

input_df = input_df.assign(
    date = pd.to_datetime(input_df.rename(columns={'dtoj_year': 'year', 
                                     'dtoj_month': 'month', 
                                     'dtoj_day': 'day'})[['year', 'month', 'day']])
)
weekend = dtoj_transformer.fit_transform(input_df[['date']])
input_df = input_df.assign(is_weekend = weekend)
input_df.drop(columns = ['dtoj_year'],inplace = True)

print("\n✅ Input data ready for prediction:")
input_df



✅ Input data ready for prediction:


Unnamed: 0,airline,source,destination,duration,total_stops,additional_info,dep_time_hour,dtoj_day,dtoj_month,date,is_weekend
0,Indigo,bangalore,new delhi,100,1,no info,12,15,5,2019-05-15,0


In [11]:
# airline = Indigo
# source = banglore
# destination = new delhi
# additional_info = no info

In [18]:
input_df_transformed = column_transformer.transform(input_df)
input_df_transformed

Unnamed: 0,tf1__airline,tf2__date_weekend,tf3__source_Other,tf3__source_banglore,tf3__source_delhi,tf3__source_kolkata,tf3__destination_Other,tf3__destination_banglore,tf3__destination_cochin,tf3__destination_delhi,tf3__source_is_north,tf3__destination_is_north,tf4__part_of_month,tf4__dtoj_day,tf5__part_of_day,tf5__dep_time_hour,tf6__pca0,tf6__dtoj_month,tf7__pca0,tf7__total_stops,tf7__is_direct_flight,tf8__duration_rbf_25,tf8__duration_rbf_50,tf8__duration_rbf_75,tf8__duration_cat,tf8__duration,tf9__additional_info_Other,tf9__additional_info_in-flight meal not included,tf9__additional_info_no info
0,-1.2349,0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0,1,-0.669521,0.538462,12,0.521739,-0.300968,-0.031323,-0.523635,1,0,-0.361153,-0.100935,-0.082143,0.0,-1.058348,0.0,0.0,1.0


In [20]:
prediction = xgb_model.predict(input_df_transformed)
print("💰 Predicted Flight Price:", prediction[0])

💰 Predicted Flight Price: 4487.915
