In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import joblib

In [3]:
# Load the dataset
df = pd.read_csv("Dataset.csv")

# Drop unnecessary columns
df_cleaned = df.drop(columns=["Unnamed: 0", "flight"])

# Define features and target
X = df_cleaned.drop(columns=["price"])
y = df_cleaned["price"]

In [4]:
# Identify categorical and numerical columns
categorical_cols = X.select_dtypes(include=["object"]).columns.tolist()
numerical_cols = X.select_dtypes(include=["int64", "float64"]).columns.tolist()

print("Categorical cols:",categorical_cols)
print("Numerical cols:",numerical_cols)

Categorical cols: ['airline', 'source_city', 'departure_time', 'stops', 'arrival_time', 'destination_city', 'class']
Numerical cols: ['duration', 'days_left']


In [5]:
preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_cols)
    ],
    remainder="passthrough"
)

In [6]:
X_processed = preprocessor.fit_transform(X)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X_processed, y, test_size=0.2, random_state=42
)

In [8]:
# Train a GradientBoostingRegressor model
from sklearn.ensemble import GradientBoostingRegressor

model = GradientBoostingRegressor(
    n_estimators=50,     
    max_depth=4,         
    learning_rate=0.1,
    random_state=42
)

model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Model Evaluation:\n- MSE: {mse:.2f}\n- R² Score: {r2:.4f}")

Model Evaluation:
- MSE: 23938238.14
- R² Score: 0.9536


In [9]:
# Save the model and preprocessor using joblib
joblib.dump(model, "flight_price_model.pkl")
joblib.dump(preprocessor, "preprocessor.pkl")

print("Model and preprocessor saved successfully!")

Model and preprocessor saved successfully!
