In [1]:
# Import Statements
import re
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq

In [2]:
# Load and preprocess the dataset
df_hotels = pd.read_csv("hotels_data.csv").drop_duplicates(subset=['Hotel_Name']).reset_index(drop=True)

In [3]:
# Fill missing values
df_hotels['Hotel_Name'] = df_hotels['Hotel_Name'].fillna("")
df_hotels['combined_amenities'] = df_hotels['combined_amenities'].fillna("No amenities listed")
df_hotels['City'] = df_hotels['City'].fillna("")
df_hotels['Category'] = df_hotels['Category'].fillna("General")
df_hotels['rating'] = df_hotels['rating'].fillna(0).astype(float)
df_hotels['ranking'] = df_hotels['ranking'].fillna("Unknown ranking")

In [4]:
# Process the ranking column
def process_ranking(ranking):
    """Extract numeric rank from strings like '27 of 90 hotels in Lahore'."""
    match = re.search(r"(\d+) of (\d+)", ranking)
    if match:
        rank, total = map(int, match.groups())
        return rank / total  # Normalize rank to a scale of 0 to 1 (lower is better)
    return 1.0  # Default for missing or invalid rankings

df_hotels['normalized_ranking'] = df_hotels['ranking'].apply(process_ranking)

In [5]:
# Initialize TF-IDF vectorizers
tfidf_title = TfidfVectorizer(stop_words="english")
tfidf_amenities = TfidfVectorizer(stop_words="english")
tfidf_city = TfidfVectorizer(stop_words="english")
tfidf_category = TfidfVectorizer(stop_words="english")

In [6]:
# Fit TF-IDF on dataset columns
tfidf_matrix_title = tfidf_title.fit_transform(df_hotels['Hotel_Name'])
tfidf_matrix_amenities = tfidf_amenities.fit_transform(df_hotels['combined_amenities'])
tfidf_matrix_city = tfidf_city.fit_transform(df_hotels['City'])
tfidf_matrix_category = tfidf_category.fit_transform(df_hotels['Category'])

In [34]:
# Recommendation function
def get_recommendations(query_title, query_city, query_amenities, query_category, df_hotels=df_hotels):
    # Transform user inputs
    query_vector_title = tfidf_title.transform([query_title])
    query_vector_amenities = tfidf_amenities.transform([query_amenities])
    query_vector_city = tfidf_city.transform([query_city])
    query_vector_category = tfidf_category.transform([query_category])
    
    # Compute cosine similarities
    title_sim_scores = cosine_similarity(query_vector_title, tfidf_matrix_title).flatten()
    amenities_sim_scores = cosine_similarity(query_vector_amenities, tfidf_matrix_amenities).flatten()
    city_sim_scores = cosine_similarity(query_vector_city, tfidf_matrix_city).flatten()
    category_sim_scores = cosine_similarity(query_vector_category, tfidf_matrix_category).flatten()
    
    # Normalize scores
    scaler = MinMaxScaler()
    title_sim_scores = scaler.fit_transform(title_sim_scores.reshape(-1, 1)).flatten()
    amenities_sim_scores = scaler.fit_transform(amenities_sim_scores.reshape(-1, 1)).flatten()
    city_sim_scores = scaler.fit_transform(city_sim_scores.reshape(-1, 1)).flatten()
    category_sim_scores = scaler.fit_transform(category_sim_scores.reshape(-1, 1)).flatten()
    
    # Weighted combination of scores
    df_hotels['similarity_score'] = (
        0.3 * title_sim_scores +
        0.3 * city_sim_scores +
        0.2 * amenities_sim_scores +
        0.2 * category_sim_scores
    )
    
    # Final score combining similarity, rating, and ranking
    df_hotels['final_score'] = (
        0.6 * df_hotels['similarity_score'] +
        0.3 * (df_hotels['rating'] / 5) -  # Normalize ratings to a scale of 0-1
        0.1 * df_hotels['normalized_ranking']  # Penalize higher rankings
    )
    
    # Get top recommendations
    top_results = df_hotels.nlargest(10, 'final_score')

    # Initialize the ChatGroq model
    llm = ChatGroq(model_name="mixtral-8x7b-32768", temperature=0.7, groq_api_key="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")
    
    # Define the system and human templates
    system = "You are a professional and user-friendly assistant that provides detailed yet concise hotel descriptions in paragraph form. Your responses should be engaging, precise, and relatable, highlighting key details provided in the variables and emphasizing user preferences in a natural, conversational style."
    human = (
        "{hotel_name} is situated in {city}, boasting a commendable rating of {rating}/5.0 and ranked {ranking}. It offers amenities such as {amenities}, aligning with user preferences for a tailored experience."
        "Your task is to craft a single, well-structured paragraph that emphasizes the details provided in the variables, presenting a catchy and personalized description that feels relatable to the user."
    )
    
    # Create the prompt template
    prompt = ChatPromptTemplate.from_messages([("system", system), ("human", human)])
    
    # Function to generate description using ChatGroq
    def generate_description(llm, prompt, hotel_data):
        # try:
            # Prepare the input data for the prompt
            input_data = {
                "hotel_name": hotel_data["Hotel_Name"],
                "city": hotel_data["City"],
                "rating": hotel_data["rating"],
                "ranking": hotel_data["ranking"],
                "amenities": ", ".join(hotel_data["combined_amenities"].split(", ")),
            }
            
            # Create a chain from the prompt and model, and invoke it
            chain = prompt | llm
            response = chain.invoke(input_data)
            
            # Return the generated text
            return (response.content)
            # return response.get("text", "No text returned by the model.")
        # except Exception as e:
            # return f"Error generating description: {str(e)}"

    # Test the function with ChatGroq
    if not top_results.empty:
        # Generate a descriptive text for the first hotel
        first_hotel = top_results.iloc[0]
        description = generate_description(llm, prompt, first_hotel)
    
        # Return the first recommendation as text and the rest as a table
        remaining_hotels = top_results.iloc[1:][['Hotel_Name', 'ID','City', 'Category', 'rating', 'ranking', 'combined_amenities']]
        return description, remaining_hotels
    
    return "No results found for the given query."

In [35]:
# Test the function
description, remaining_hotels = get_recommendations(
    query_title="Best Hotel",
    query_city="Karachi",
    query_amenities="Laundary",
    query_category="Hotel"
)

In [36]:
# Display results
print(description)
remaining_hotels

Welcome to the Hotel Pearl Inn, a 5.0/5.0 rated B&B nestled in the heart of Karachi, ranked #3 of 171 Inns, offering a truly personalized experience tailored to your preferences. This non-smoking hotel boasts elegant suites with air conditioning, blackout curtains, and a flatscreen TV for your utmost comfort. Enjoy a complimentary welcome drink, bottled water, and toiletries, along with a turndown service for that extra touch. Stay connected with free internet, wifi, and a convenient in-room telephone. The hotel offers a range of room options, including extra-long beds, soundproof rooms, and allergy-free rooms, ensuring a restful stay. Enjoy the convenience of 24-hour check-in, express check-in/check-out, and a 24-hour front desk. Savor a complimentary breakfast, served as a buffet, with a variety of options to suit your taste. For business travelers, the hotel provides meeting rooms, conference facilities, and dry cleaning services. For leisure, indulge in the rooftop bar and terrace,

Unnamed: 0,Hotel_Name,ID,City,Category,rating,ranking,combined_amenities
1411,Hotel White Palace,17562226.0,Karachi,HOTEL,5.0,#9 of 171 B&Bs / Inns in Karachi,"['Internet', 'Kids Activities', 'Room service'..."
1197,Karachi Guest House,4501299.0,Karachi,HOTEL,5.0,#10 of 171 B&Bs / Inns in Karachi,[]
457,Royal Residency Guest House,9863778.0,Karachi,HOTEL,5.0,#9 of 118 Specialty lodging in Karachi,"['Internet', 'Room service', 'Free Internet', ..."
1414,Taj Exotica Beach Resort,10810403.0,Karachi,HOTEL,5.0,#11 of 118 Specialty lodging in Karachi,"['Kids Activities', 'Suites', 'Room service', ..."
445,Legacy Homes,1231393.0,Karachi,HOTEL,5.0,#29 of 200 hotels in Karachi,[]
456,New Mashwani Guest House,16869436.0,Karachi,HOTEL,5.0,#31 of 200 hotels in Karachi,"['Internet', 'Shuttle Bus Service', 'Suites', ..."
1234,Prime Dha Guest House,15843978.0,Karachi,HOTEL,5.0,#2 of 12 guest houses in Karachi,"['Internet', 'Kids Activities', 'Suites', 'Roo..."
1400,Movenpick Hotel Karachi,5953967.0,Karachi,HOTEL,4.5,#1 of 200 hotels in Karachi,"['Free parking', 'Fitness center', 'Pool', 'In..."
1403,Hotel Excelsior,20881175.0,Karachi,HOTEL,4.5,#8 of 200 hotels in Karachi,"['Free Wifi', 'Air conditioning', 'Safe', 'Des..."


In [37]:
related_hotels_to = int(input("Enter selected hotel id: "))

Enter selected hotel id:  20881175


In [38]:
selected_hotel_details = df_hotels.loc[df_hotels['ID'] == related_hotels_to]

In [39]:
selected_hotel_details.iloc[0]["City"]

'Karachi'

In [40]:
description, remaining_hotels = get_recommendations(
    query_title="",
    query_city=selected_hotel_details.iloc[0]["City"],
    query_amenities=", ".join(selected_hotel_details.iloc[0]["combined_amenities"]),
    query_category=selected_hotel_details.iloc[0]["Category"]
)

In [41]:
# Display results for related hotels
print(description)
remaining_hotels

Welcome to the Hotel Pearl Inn, a luxurious B&B nestled in the heart of Karachi that's sure to exceed your expectations. Ranked an impressive #3 out of 171 B&Bs/Inns and boasting a perfect 5.0/5.0 rating, you're in for a real treat. This non-smoking hotel offers a range of amenities tailored to your needs, including family and smoking rooms, a rooftop bar, and even an allergy-free room. With 24-hour check-in and security, express check-in/check-out, and a concierge, you'll experience seamless service from the moment you arrive. Enjoy the convenience of room service, dry cleaning, laundry, and ironing services, as well as a gift shop, convenience store, and secured parking. Stay connected with free internet, wifi, and a flatscreen TV, or unwind in your room with a microwave, refrigerator, and complimentary toiletries. Delight in a complimentary welcome drink, breakfast included, and a range of dining options at the restaurant, coffee shop, or rooftop terrace. The hotel also offers airpo

Unnamed: 0,Hotel_Name,ID,City,Category,rating,ranking,combined_amenities
1411,Hotel White Palace,17562226.0,Karachi,HOTEL,5.0,#9 of 171 B&Bs / Inns in Karachi,"['Internet', 'Kids Activities', 'Room service'..."
1197,Karachi Guest House,4501299.0,Karachi,HOTEL,5.0,#10 of 171 B&Bs / Inns in Karachi,[]
457,Royal Residency Guest House,9863778.0,Karachi,HOTEL,5.0,#9 of 118 Specialty lodging in Karachi,"['Internet', 'Room service', 'Free Internet', ..."
1414,Taj Exotica Beach Resort,10810403.0,Karachi,HOTEL,5.0,#11 of 118 Specialty lodging in Karachi,"['Kids Activities', 'Suites', 'Room service', ..."
445,Legacy Homes,1231393.0,Karachi,HOTEL,5.0,#29 of 200 hotels in Karachi,[]
456,New Mashwani Guest House,16869436.0,Karachi,HOTEL,5.0,#31 of 200 hotels in Karachi,"['Internet', 'Shuttle Bus Service', 'Suites', ..."
1234,Prime Dha Guest House,15843978.0,Karachi,HOTEL,5.0,#2 of 12 guest houses in Karachi,"['Internet', 'Kids Activities', 'Suites', 'Roo..."
1196,Mashwani Guest House,14257444.0,Karachi,HOTEL,5.0,#10 of 49 hotels in Karachi,"['Internet', 'Suites', 'Room service', 'Free I..."
1330,Sea Shell Inn,10065975.0,Karachi,HOTEL,5.0,#4 of 16 small hotels in Karachi,"['Kids Activities', 'Air conditioning', 'Refri..."
