In [98]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error


In [99]:
# Load your datasets
ratings_data = pd.read_csv("ratings.csv")
package_data = pd.read_csv("package.csv")


In [100]:
# Convert 'Price' to numeric after removing commas
package_data['Price'] = package_data['Price'].str.replace(',', '').astype(int)

In [101]:
# Create a 'Package' column in package_data that matches the format in ratings_data
package_data['Package'] = 's' + package_data['S.N'].astype(str)

# Merge the datasets on 'Package'
combined_data = pd.merge(ratings_data, package_data, on='Package')


In [102]:
# Model Training with only Duration and Price as features
X = combined_data[['Duration', 'Price']]
y = combined_data['Rating']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)


Mean Squared Error: 2.3178719763255997


In [103]:
# Save the model to a .pkl file
model_filename = 'travel.pkl'
with open(model_filename, 'wb') as file:
    pickle.dump(model, file)

print(f"Model saved to {model_filename}")

Model saved to travel.pkl


In [104]:
def recommend(duration, price):
    # Create DataFrame for all packages
    all_packages = package_data.copy()
    all_packages['PredictedRating'] = model.predict(all_packages[['Duration', 'Price']])
    
    # Sort the packages by their predicted rating in descending order
    sorted_packages = all_packages.sort_values(by='PredictedRating', ascending=False)

    # Filter packages close to the specified duration and price
    # You might want to define a tolerance for duration and price to widen the search
    duration_tolerance = 0  # For example, +/- 3 days
    price_tolerance = 5000  # For example, +/- 5000 units of currency

    # Find packages within the specified tolerances
    close_matches = sorted_packages.loc[(abs(sorted_packages['Duration'] - duration) <= duration_tolerance) & 
                                        (abs(sorted_packages['Price'] - price) <= price_tolerance)]

    # Select the top 5 packages
    top_5_packages = close_matches.head(5)

    if not top_5_packages.empty:
        return top_5_packages
    else:
        return "No matching packages found."

In [105]:
# Example usage
top_packages = recommend(3, 20000)
print(top_packages)

    S.N            Travel Company Destination  Price  Duration Package  \
8     9      Mountain Top Travels   Muktinath  15000         3      s9   
7     8      Mountain Top Travels      Jomsom  15000         3      s8   
88   89  Gaurab Tours and Travels   Muktinath  15000         3     s89   
87   88  Gaurab Tours and Travels      Jomsom  15000         3     s88   

    PredictedRating  
8          3.581142  
7          3.581142  
88         3.581142  
87         3.581142  
