<a href="https://colab.research.google.com/github/mathu3004/Pearl_Path/blob/E_Personalized_Itinerary_Generator/model_training_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
!pip install googlemaps



In [57]:
import pandas as pd
import numpy as np
import googlemaps
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load Datasets
user_input = pd.read_excel('/content/preprocessed_user_inputs.xlsx')
hotels = pd.read_excel('/content/preprocessed_hotel_data.xlsx')
restaurants = pd.read_excel('/content/preprocessed_colombo_restaurant.xlsx')
vacation_rentals = pd.read_excel('/content/preprocessed_vacation_rentals.xlsx')
attractions = pd.read_excel('/content/processed_attractions_colombo.xlsx')

# Preprocess User Data
def preprocess_user_data(df):
    df['budget_per_day'] = pd.to_numeric(df['budget_per_day'], errors='coerce')
    df['days'] = pd.to_numeric(df['days'], errors='coerce')

    # One-hot encode categorical features
    categorical_features = ['accommodation_type', 'food_preference', 'cuisine',
                            'activities', 'activity_time_preference', 'transport_mode', 'children_pets']
    ohe = OneHotEncoder(handle_unknown='ignore')
    encoded_features = ohe.fit_transform(df[categorical_features]).toarray()
    encoded_df = pd.DataFrame(encoded_features, columns=ohe.get_feature_names_out(categorical_features))

    df = pd.concat([df, encoded_df], axis=1)
    df = df.drop(columns=categorical_features + ['name'])
    return df

# Preprocess Hotel Dataset
def preprocess_hotel_data(df):
    df = df[['name', 'category', 'pricerange', 'rawranking']].copy()

    # Fill missing values
    df = df.fillna({'pricerange': 'unknown', 'rawranking': 0})

    # One-hot encode categorical features
    ohe = OneHotEncoder(handle_unknown='ignore')
    encoded_features = ohe.fit_transform(df[['category', 'pricerange']]).toarray()
    encoded_df = pd.DataFrame(encoded_features, columns=ohe.get_feature_names_out(['category', 'pricerange']))

    df = pd.concat([df, encoded_df], axis=1)
    df = df.drop(columns=['category', 'pricerange'])

    # Ensure 'suitability' column exists
    df['suitability'] = np.where(df['rawranking'] > df['rawranking'].median(), 1, 0)

    return df


# Preprocess Restaurant Dataset
def preprocess_restaurant_data(df):
    df = df[['name', 'category', 'rating', 'rawranking', 'cuisines']].copy()

    # Fill missing values
    df = df.fillna({'rating': 0, 'rawranking': 0})

    # One-hot encode categorical features
    ohe = OneHotEncoder(handle_unknown='ignore')
    encoded_features = ohe.fit_transform(df[['category', 'cuisines']]).toarray()
    encoded_df = pd.DataFrame(encoded_features, columns=ohe.get_feature_names_out(['category', 'cuisines']))

    df = pd.concat([df, encoded_df], axis=1)
    df = df.drop(columns=['category', 'cuisines'])

    # Ensure 'suitability' exists
    if 'suitability' not in df.columns:
        df['suitability'] = np.where(df['rating'] > df['rating'].median(), 1, 0)

    return df

# Preprocess Vacation Rentals Dataset
def preprocess_vacation_rental_data(df):
    df = df[['name', 'category']].copy()

    # One-hot encode categorical features
    ohe = OneHotEncoder(handle_unknown='ignore')
    encoded_features = ohe.fit_transform(df[['category']]).toarray()
    encoded_df = pd.DataFrame(encoded_features, columns=ohe.get_feature_names_out(['category']))

    df = pd.concat([df, encoded_df], axis=1)
    df = df.drop(columns=['category'])

    # Ensure 'suitability' exists
    if 'suitability' not in df.columns:
        df['suitability'] = 1  # Assume all are suitable by default

    return df

# Preprocess Attractions Dataset
def preprocess_attraction_data(df):
    df = df[['name', 'address']].copy()
    df = df.dropna()
    return df


In [58]:
# Preprocess Data
user_data = preprocess_user_data(user_input)
hotels_processed = preprocess_hotel_data(hotels)
restaurants_processed = preprocess_restaurant_data(restaurants)
vacation_rentals_processed = preprocess_vacation_rental_data(vacation_rentals)

# Check and Ensure 'suitability' Column Exists
datasets = {
    "hotel": hotels_processed,
    "restaurant": restaurants_processed,
    "vacation_rentals": vacation_rentals_processed
}

for dataset_name, dataset in datasets.items():
    if 'suitability' not in dataset.columns:
        raise ValueError(f"Missing 'suitability' column in {dataset_name} dataset")

def train_model(df, target_column):
    # Drop non-numeric columns
    df = df.select_dtypes(include=[np.number])

    # Ensure the target column is included
    if target_column not in df.columns:
        raise ValueError(f"Target column '{target_column}' is missing from dataset")

    X = df.drop(columns=[target_column])  # Features
    y = df[target_column]  # Target

    # Check if dataset is valid
    if X.empty:
        raise ValueError("No valid numeric features for training.")

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = RandomForestClassifier(random_state=42)
    model.fit(X_train, y_train)

    # Evaluate model accuracy
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Model Accuracy for {target_column}: {accuracy:.2f}")

    return model


# Train models
hotel_model = train_model(hotels_processed, 'suitability')
restaurant_model = train_model(restaurants_processed, 'suitability')
rental_model = train_model(vacation_rentals_processed, 'suitability')

Model Accuracy for suitability: 1.00
Model Accuracy for suitability: 1.00
Model Accuracy for suitability: 1.00


In [59]:
def generate_itinerary_with_ml(user_data, hotels, restaurants, rentals, attractions,
                               hotel_model, restaurant_model, rental_model,
                               hotel_change_interval=3, rental_change_interval=3):

    itinerary = []
    selected_hotels = []
    selected_rentals = []
    selected_restaurants = set()
    selected_attractions = set()

    current_hotel = None
    current_rental = None
    accommodation_type = user_data['accommodation_type'].iloc[0]

    # Ensure the accommodation_type is either 'hotel' or 'rental', default to 'rental' if neither
    if accommodation_type not in ['hotel', 'rental']:
        accommodation_type = 'rental'

    # Get expected feature names from models
    hotel_features = hotel_model.feature_names_in_
    restaurant_features = restaurant_model.feature_names_in_
    rental_features = rental_model.feature_names_in_

    for day in range(int(user_data['days'].iloc[0])):
        selected_hotel = None
        selected_rental = None

        if accommodation_type == 'hotel':
            if day % hotel_change_interval == 0:
                hotel = hotels.copy()
                missing_cols = [col for col in hotel_features if col not in hotel.columns]
                hotel = hotel.reindex(columns=hotel.columns.tolist() + missing_cols, fill_value=0)

                hotel['predicted_suitability'] = hotel_model.predict(hotel[hotel_features])
                hotel = hotel[~hotel.index.isin(selected_hotels)]

                if not hotel.empty:
                    selected_hotel = hotel.sort_values(by='predicted_suitability', ascending=False).iloc[0]
                    selected_hotels.append(selected_hotel.name)
                    current_hotel = selected_hotel
                else:
                    selected_hotel = current_hotel
            else:
                selected_hotel = current_hotel

        elif accommodation_type == 'rental':
            if day % rental_change_interval == 0:
                rental = rentals.copy()
                missing_cols = [col for col in rental_features if col not in rental.columns]
                rental = rental.reindex(columns=rental.columns.tolist() + missing_cols, fill_value=0)

                rental['predicted_suitability'] = rental_model.predict(rental[rental_features])
                rental = rental[~rental.index.isin(selected_rentals)]

                if not rental.empty:
                    selected_rental = rental.sort_values(by='predicted_suitability', ascending=False).iloc[0]
                    selected_rentals.append(selected_rental.name)
                    current_rental = selected_rental
                else:
                    selected_rental = current_rental
            else:
                selected_rental = current_rental

        # Restaurant selection logic
        selected_restaurants_list = []
        restaurant = restaurants.copy()
        missing_cols = [col for col in restaurant_features if col not in restaurant.columns]
        restaurant = restaurant.reindex(columns=restaurant.columns.tolist() + missing_cols, fill_value=0)

        restaurant['predicted_suitability'] = restaurant_model.predict(restaurant[restaurant_features])
        restaurant = restaurant[~restaurant.index.isin(selected_restaurants)]

        if not restaurant.empty:
            selected_restaurants_list = restaurant.sort_values(by='predicted_suitability', ascending=False).head(3).to_dict('records')
            for res in selected_restaurants_list:
                selected_restaurants.add(res['name'])

        # Select Attractions (Two per day)
        selected_attractions_list = []
        available_attractions = attractions[~attractions['name'].isin(selected_attractions)]

        if not available_attractions.empty:
            selected_attractions_list = available_attractions.sample(n=min(2, len(available_attractions))).to_dict('records')
            for att in selected_attractions_list:
                selected_attractions.add(att['name'])

        # Create the daily plan
        daily_plan = {
            'accommodation_type': accommodation_type,
            'hotel': selected_hotel.to_dict() if selected_hotel is not None else {},
            'rental': selected_rental.to_dict() if selected_rental is not None else {},
            'breakfast': selected_restaurants_list[0] if len(selected_restaurants_list) > 0 else {},
            'attraction_1': selected_attractions_list[0] if len(selected_attractions_list) > 0 else {},
            'lunch': selected_restaurants_list[1] if len(selected_restaurants_list) > 1 else {},
            'attraction_2': selected_attractions_list[1] if len(selected_attractions_list) > 1 else {},
            'dinner': selected_restaurants_list[2] if len(selected_restaurants_list) > 2 else {}
        }
        itinerary.append(daily_plan)

    return itinerary


# Generate Itineraries
for index, user in user_input.iterrows():
    destination = user['destination']

    if destination.lower() == 'colombo':
        user_data = user_input.iloc[[index]]  # Select specific user's data

        hotel_interval = user.get('hotel_change_interval', 3)
        rental_interval = user.get('rental_change_interval', 3)

        itinerary = generate_itinerary_with_ml(
            user_data, hotels, restaurants, vacation_rentals, attractions,
            hotel_model, restaurant_model, rental_model,
            hotel_change_interval=hotel_interval, rental_change_interval=rental_interval
        )

        print(f"Itinerary for User {index + 1} (Destination: Colombo):")
        for day, plan in enumerate(itinerary, start=1):
            print(f"  Day {day}:")
            print(f"    Accommodation Type: {plan.get('accommodation_type', 'N/A')}")
            print(f"    Hotel: {plan.get('hotel', {}).get('name', 'N/A')}")
            print(f"    Rental: {plan.get('rental', {}).get('name', 'N/A')}")
            print(f"    Restaurant: {plan.get('breakfast', {}).get('name', 'N/A')}, "
                  f"{plan.get('lunch', {}).get('name', 'N/A')}, {plan.get('dinner', {}).get('name', 'N/A')}")
            print(f"    Attraction 1: {plan.get('attraction_1', {}).get('name', 'N/A')}")
            print(f"    Attraction 2: {plan.get('attraction_2', {}).get('name', 'N/A')}")
            print("\n")
    else:
        print(f"Itinerary for User {index + 1} (Destination: {destination}): Info not available\n")


Itinerary for User 1 (Destination: Ella): Info not available

Itinerary for User 2 (Destination: Nuwara Eliya): Info not available

Itinerary for User 3 (Destination: Ella): Info not available

Itinerary for User 4 (Destination: Ella): Info not available

Itinerary for User 5 (Destination: Dambulla): Info not available

Itinerary for User 6 (Destination: Ella): Info not available

Itinerary for User 7 (Destination: Nuwara Eliya): Info not available

Itinerary for User 8 (Destination: Colombo):
  Day 1:
    Accommodation Type: rental
    Hotel: N/A
    Rental: Hotel, beach and Water Sports
    Restaurant: Yi Jing, Pizza Buona, Cafe Ivy
    Attraction 1: Escape The Room Sri Lanka
    Attraction 2: Good Market


  Day 2:
    Accommodation Type: rental
    Hotel: N/A
    Rental: Hotel, beach and Water Sports
    Restaurant: Yi Jing, Pizza Buona, Cafe Ivy
    Attraction 1: Tuk Tuk CeylonRide
    Attraction 2: Vision of Travel


  Day 3:
    Accommodation Type: rental
    Hotel: N/A
    Rent

In [51]:
import googlemaps

# Initialize Google Maps API
gmaps = googlemaps.Client(key="AIzaSyBmfj-U3U-CXmdqT6bpn_WdIQUub-JO0gk")

def get_travel_route(origin, destination, mode="driving"):
    """Fetch travel route from Google Maps Directions API."""
    if not origin or not destination:
        return "No route available"

    try:
        directions = gmaps.directions(origin, destination, mode=mode)
        if directions:
            route = directions[0]['legs'][0]
            return {
                "distance": route['distance']['text'],
                "duration": route['duration']['text'],
                "steps": [step['html_instructions'] for step in route['steps']]
            }
    except Exception as e:
        return f"Error fetching route: {e}"

    return "No route found"

def generate_itinerary_with_ml(user_data, hotels, restaurants, rentals, attractions,
                               hotel_model, restaurant_model, rental_model,
                               hotel_change_interval=3, rental_change_interval=3):

    itinerary = []
    selected_hotels = []
    selected_restaurants = set()
    selected_rentals = []
    selected_attractions = set()

    current_hotel = None
    current_rental = None

    # Get expected feature names from models
    hotel_features = hotel_model.feature_names_in_
    restaurant_features = restaurant_model.feature_names_in_
    rental_features = rental_model.feature_names_in_

    for day in range(int(user_data['days'].iloc[0])):
        # Hotel selection logic
        if day % hotel_change_interval == 0:
            hotel = hotels.copy()

            missing_cols = [col for col in hotel_features if col not in hotel.columns]
            hotel = hotel.reindex(columns=hotel.columns.tolist() + missing_cols, fill_value=0)

            hotel['predicted_suitability'] = hotel_model.predict(hotel[hotel_features])
            hotel = hotel[~hotel.index.isin(selected_hotels)]

            if not hotel.empty:
                selected_hotel = hotel.sort_values(by='predicted_suitability', ascending=False).iloc[0]
                selected_hotels.append(selected_hotel.name)
                current_hotel = selected_hotel
            else:
                selected_hotel = current_hotel
        else:
            selected_hotel = current_hotel

        # Restaurant selection logic (Breakfast, Lunch, Dinner)
        selected_restaurants_list = []
        restaurant = restaurants.copy()

        missing_cols = [col for col in restaurant_features if col not in restaurant.columns]
        restaurant = restaurant.reindex(columns=restaurant.columns.tolist() + missing_cols, fill_value=0)

        restaurant['predicted_suitability'] = restaurant_model.predict(restaurant[restaurant_features])
        restaurant = restaurant[~restaurant.index.isin(selected_restaurants)]

        if not restaurant.empty:
            selected_restaurants_list = restaurant.sort_values(by='predicted_suitability', ascending=False).head(3).to_dict('records')
            for res in selected_restaurants_list:
                selected_restaurants.add(res['name'])

        # Vacation rental selection logic
        if day % rental_change_interval == 0:
            rental = rentals.copy()

            missing_cols = [col for col in rental_features if col not in rental.columns]
            rental = rental.reindex(columns=rental.columns.tolist() + missing_cols, fill_value=0)

            rental['predicted_suitability'] = rental_model.predict(rental[rental_features])
            rental = rental[~rental.index.isin(selected_rentals)]

            if not rental.empty:
                selected_rental = rental.sort_values(by='predicted_suitability', ascending=False).iloc[0]
                selected_rentals.append(selected_rental.name)
                current_rental = selected_rental
            else:
                selected_rental = current_rental
        else:
            selected_rental = current_rental

        # Select Attractions (Two per day)
        selected_attractions_list = []
        available_attractions = attractions[~attractions['name'].isin(selected_attractions)]

        if not available_attractions.empty:
            selected_attractions_list = available_attractions.sample(n=min(2, len(available_attractions))).to_dict('records')
            for att in selected_attractions_list:
                selected_attractions.add(att['name'])

        # Get travel routes
        travel_routes = {
            "hotel_to_breakfast": get_travel_route(selected_hotel.get('name'), selected_restaurants_list[0].get('name')),
            "breakfast_to_attraction1": get_travel_route(selected_restaurants_list[0].get('name'), selected_attractions_list[0].get('name')),
            "attraction1_to_lunch": get_travel_route(selected_attractions_list[0].get('name'), selected_restaurants_list[1].get('name')),
            "lunch_to_attraction2": get_travel_route(selected_restaurants_list[1].get('name'), selected_attractions_list[1].get('name')),
            "attraction2_to_dinner": get_travel_route(selected_attractions_list[1].get('name'), selected_restaurants_list[2].get('name')),
            "dinner_to_hotel": get_travel_route(selected_restaurants_list[2].get('name'), selected_hotel.get('name'))
        }

        # Create the daily plan
        daily_plan = {
            'hotel': selected_hotel.to_dict() if selected_hotel is not None else {},
            'breakfast': selected_restaurants_list[0] if len(selected_restaurants_list) > 0 else {},
            'attraction_1': selected_attractions_list[0] if len(selected_attractions_list) > 0 else {},
            'lunch': selected_restaurants_list[1] if len(selected_restaurants_list) > 1 else {},
            'attraction_2': selected_attractions_list[1] if len(selected_attractions_list) > 1 else {},
            'dinner': selected_restaurants_list[2] if len(selected_restaurants_list) > 2 else {},
            'rental': selected_rental.to_dict() if selected_rental is not None else {},
            'travel_routes': travel_routes
        }
        itinerary.append(daily_plan)

    return itinerary

# Generate Itineraries
for index, user in user_input.iterrows():
    destination = user['destination']

    if destination.lower() == 'colombo':
        user_data = user_input.iloc[[index]]  # Select specific user's data

        hotel_interval = user.get('hotel_change_interval', 3)
        rental_interval = user.get('rental_change_interval', 3)

        itinerary = generate_itinerary_with_ml(
            user_data, hotels, restaurants, vacation_rentals, attractions,
            hotel_model, restaurant_model, rental_model,
            hotel_change_interval=hotel_interval, rental_change_interval=rental_interval
        )

        print(f"Itinerary for User {index + 1} (Destination: Colombo):")
        for day, plan in enumerate(itinerary, start=1):
            print(f"  Day {day}:")
            print(f"    Hotel: {plan['hotel'].get('name', 'N/A')}")
            print(f"    Travel Routes: {plan['travel_routes']}")
            print("\n")
    else:
        print(f"Itinerary for User {index + 1} (Destination: {destination}): Info not available\n")


Itinerary for User 1 (Destination: Ella): Info not available

Itinerary for User 2 (Destination: Nuwara Eliya): Info not available

Itinerary for User 3 (Destination: Ella): Info not available

Itinerary for User 4 (Destination: Ella): Info not available

Itinerary for User 5 (Destination: Dambulla): Info not available

Itinerary for User 6 (Destination: Ella): Info not available

Itinerary for User 7 (Destination: Nuwara Eliya): Info not available

Itinerary for User 8 (Destination: Colombo):
  Day 1:
    Hotel: Rockwell Colombo
    Travel Routes: {'hotel_to_breakfast': 'Error fetching route: NOT_FOUND', 'breakfast_to_attraction1': 'Error fetching route: NOT_FOUND', 'attraction1_to_lunch': 'Error fetching route: NOT_FOUND', 'lunch_to_attraction2': 'Error fetching route: NOT_FOUND', 'attraction2_to_dinner': {'distance': '7.5 km', 'duration': '19 mins', 'steps': ['Head <b>west</b> on <b>Salalihini Mawatha</b> toward <b>Siri Sumana Mawatha</b>', 'Turn <b>right</b> at Bus Stop onto <b>Sir