<a href="https://colab.research.google.com/github/swastik2475/Travel-mitra/blob/main/trip_cost.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
from sklearn.pipeline import Pipeline
from sklearn.metrics import r2_score, mean_squared_error,mean_absolute_error

In [None]:
!pip install pandas-profiling

In [None]:
pip install ydata-profiling

In [None]:
df=pd.read_csv('/content/trip_cost.csv')

In [None]:
from ydata_profiling import ProfileReport

prof = ProfileReport(df, title="Travel Cost Report")
prof.to_file("output.html")

In [None]:
target_cols = ['distance_km','food_cost_per_day', 'accommodation_cost_per_night', 'activities_cost', 'transport_cost_per_km','total_cost']
X = df.drop(columns=target_cols+['total_cost'])
y = df[target_cols]

In [None]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn.compose import ColumnTransformer

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder

transformer = ColumnTransformer(transformers=[
    ('ordinal', OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1), ['season']),
    ('onehot', OneHotEncoder(handle_unknown='ignore'), ['city', 'transport_mode']),
], remainder='passthrough',force_int_remainder_cols=False)


In [None]:
#identify categorical columns
categorical_columns=df.select_dtypes(include=['object']).columns
#categorical fill with mode
for col in categorical_columns:
 if df[col].isnull().sum()>0:
  mode_value=df[col].mode()[0]
  df[col].fillna(mode_value,inplace=True)

#one hot encoding
data_encoded=pd.get_dummies(df,columns=categorical_columns,drop_first=True)
data_encoded.head()

In [None]:
column_transformer = ColumnTransformer(transformers=[
    ('ordinal', OrdinalEncoder(categories=[['off-peak',' monsoon','peak','winter', 'monsoon', 'summer']]), ['season']),
    ('onehot', OneHotEncoder(drop='first'), ['start_location','end_location','transport_mode','accommodation_type'])
], remainder='passthrough')

In [None]:
ct=ColumnTransformer(transformers=[
    ('ordinal', OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1), ['season']),
    ('onehot', OneHotEncoder(handle_unknown='ignore'), ['city']),
    ...
])


In [None]:
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline

model = Pipeline(steps=[
    ('preprocessing', column_transformer),
    ('regression', MultiOutputRegressor(LinearRegression()))
])

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
Y_test

In [None]:
model.fit(X_train,Y_train)

In [None]:
Y_pred = model.predict(X_test)

In [None]:
pd.DataFrame(Y_pred,columns=target_cols)

In [None]:
print("R² Score:", r2_score(Y_test, Y_pred))
print("MSE:", mean_squared_error(Y_test, Y_pred))
print("MAE:", mean_absolute_error(Y_test, Y_pred))


In [None]:
user_data = {
     'start_location': input("Enter start location: "),
     'end_location':  input("Enter end location: "),
     'transport_mode': input("Enter transport mode (bus/train/flight): "),
     'accommodation_type': input("Enter accommodation type (hotel/homestay/etc.): "),
     'trip_days': int(input("Enter number of trip days: ")),
     'trip_nights': int(input("Enter number of trip nights: ")),
     'num_travelers':  int(input("Enter number of travelers: ")),
     'season': input("Enter season (off-peak/monsoon/peak): ")
     ,
}
user_df = pd.DataFrame([user_data])


for col in categorical_columns:
  if user_df[col].isnull().sum()>0:
    mode_value=user_df[col].mode()[0]
    user_df[col].fillna(mode_value,inplace=True)

# Step 3: Predict using your trained model
predicted_output = model.predict(user_df)

# Step 4: Display the output
target_cols = ['distance_km','food_cost_per_day', 'accommodation_cost_per_night', 'activities_cost', 'transport_cost_per_km']
for col, value in zip(target_cols, predicted_output[0]):
    print(f"{col}: {round(value, 2)}")


