In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Load the dataset
df = pd.read_csv('Expanded_Travel_Price_Prediction_Dataset.csv')

# Separating the target variable and feature variables
X = df.drop("Total Estimated Cost", axis=1)
y = df["Total Estimated Cost"]

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# List of categorical features to be one-hot encoded
categorical_features = ["Destination", "Accommodation Type", "Activity Preference", "Dining Preference"]

# Creating the preprocessing pipeline for categorical features
preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(drop="first"), categorical_features)
    ],
    remainder="passthrough"  # Keep other columns as they are
)

# Creating a pipeline with preprocessing and model
model_pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("regressor", RandomForestRegressor(n_estimators=100, random_state=42))
])

# Training the model
model_pipeline.fit(X_train, y_train)

# Making predictions on the test set
y_pred = model_pipeline.predict(X_test)

# Evaluating the model
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Print evaluation metrics
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"R-squared (R²): {r2}")


Mean Absolute Error (MAE): 294.60324
Mean Squared Error (MSE): 168143.48162176012
R-squared (R²): 0.8981526408560485


In [7]:
import pickle

# Save the trained model pipeline to a file
with open('budget_prediction_model.pkl', 'wb') as file:
    pickle.dump(model_pipeline, file)

print("Model saved as budget_prediction_model.pkl")


Model saved as budget_prediction_model.pkl
