In [80]:
import pandas as pd 
import numpy as np 
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [81]:
# Read the CSV file into a Pandas DataFrame
df = pd.read_csv('/Users/rewanabdelqader/Collage/Semster 8/GraduationProject/Code/GraduationProject/DataSets/places_data.csv')

# Split the data into two separate columns
df[['place_name', 'city']] = df['place_name'].str.split(' - ', expand=True)
# Print the updated DataFrame
df.head(5)

Unnamed: 0,place_id,place_name,place_type,popularity,rating_count,average_rating,budget,keywords,city
0,130,Aqua Blu Sharm El Sheikh,['outdoor'],3690,500,1.87,2430,"['Movies and TV', 'Outdoor activities', 'Pet l...",
1,128,El Tanboura Hall,"['outdoor', 'indoor']",9221,100,1.52,207,"['Food and drink', 'Outdoor activities', 'Art ...",Cairo
2,931,Qasr Al Sarab Desert Resort,['outdoor'],8629,4000,3.34,425,"['Pet lovers', 'Science and nature', 'Technolo...",
3,838,Exit Egypt,['outdoor'],9253,3000,2.33,2874,"['Fashion', 'Travel', 'Food and drink', 'Art a...",Cairo
4,754,"Terrace Cafe & Bar, Cairo","['indoor', 'outdoor']",5284,100,3.54,1281,"['Dancing', 'Outdoor activities', 'Music', 'Te...",


In [82]:


# add the budget and keywords features to the dataset
df['budget'] = df['budget'].astype(float)
df['keywords'] = df['keywords'].apply(lambda x: ' '.join(x))
df['features'] = df['keywords'] + ' ' + df['budget'].astype(str)

# create a tf-idf matrix for the features column
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['features'])

In [83]:
# Compute the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
# Construct a reverse map of indices and place names
indices = pd.Series(df.index, index=df['place_name']).drop_duplicates()

In [84]:
# define the recommendation function
def get_recommendations(place_name, city=None, budget=None, cosine_sim=cosine_sim):
    # filter the dataset based on the city and budget
    df_filtered = df.copy()

    if city:
        df_filtered = df_filtered[df_filtered['city'] == city]

    if budget:
        df_filtered = df_filtered[df_filtered['budget'] <= budget]

    # add the budget and keywords features to the filtered dataset
    df_filtered['budget'] = df_filtered['budget'].astype(float)
    df_filtered['keywords'] = df_filtered['keywords'].apply(lambda x: ' '.join(x))
    df_filtered['features'] = df_filtered['keywords'] + ' ' + df_filtered['budget'].astype(str)

    # create a tf-idf matrix for the features column in the filtered dataset
    tfidf_matrix_filtered = tfidf.transform(df_filtered['features'])

    # get the indices of the specified place in the filtered dataset
    indices = pd.Series(df_filtered.index, index=df_filtered['place_name']).to_dict()

    # compute the pairwise similarity scores for the features
    sim_scores = list(enumerate(cosine_sim[indices[place_name]]))

    # sort the places based on the similarity scores and average rating
    sim_scores = sorted(sim_scores, key=lambda x: (x[1], df_filtered.loc[x[0], 'average_rating']), reverse=True)
    
    # get the indices of the top 10 most similar places
    sim_indices = [i for i, _ in sim_scores[1:11]]

    # return the details of the top 10 most similar places
    return df_filtered.loc[sim_indices, ['place_name', 'city', 'budget', 'average_rating', 'keywords']]

# example usage
get_recommendations('The Sunken City of Heracleion')

Unnamed: 0,place_name,city,budget,average_rating,keywords
677,Escape the Crate Egypt,Cairo,175.0,3.14,"[ ' C a r s ' , ' S c ..."
615,The Sunken City of Heracleion,,175.0,1.66,"[ ' F a s h i o n ' , ..."
937,Hilton Cairo Zamalek Residences,Cairo,2730.0,5.0,"[ ' R e a d i n g ' , ..."
202,"6IX Degrees, Cairo",,560.0,4.99,[ ' P a r e n t i n g ' ...
843,Rawabet Theater,Cairo,1350.0,4.99,[ ' M u s i c ' ]
745,The Chamber of Secrets Egypt,Cairo,299.0,4.97,[ ' O u t d o o r a c ...
781,Downtown Mall,,579.0,4.96,"[ ' T r a v e l ' , ' ..."
849,Marina Land,,2778.0,4.96,[ ' H i s t o r y a n ...
994,Siwa Mountain,,127.0,4.96,[ ' H e a l t h a n d ...
406,Al Marassi Beach,,774.0,4.95,[ ' H i s t o r y a n ...


In [85]:

def recommend_places(place_type, budget):
    # Filter the data based on the place type and budget
    filtered_data = df[(df['place_type'].apply(lambda x: place_type in x)) & (df['budget'] <= budget)]
    
    # Sort the filtered data by popularity and average rating
    sorted_data = filtered_data.sort_values(['popularity', 'average_rating'], ascending=[False, False])
    
    # Select the top 3 places
    top_places = sorted_data.head(3)
    
    # Return the recommended places
    return top_places['place_name'].tolist()


# Call the function to get the recommended places
recommendations = recommend_places('outdoor', 3000)

print("Here are your recommended places:")
for place in recommendations:
    print(place)

Here are your recommended places:
Almaza City Center
Al-Obour City Park
Al Ahly Sports Club


In [86]:

def recommend_places(place_type, budget):
    # Filter the data based on the place type and budget
    filtered_data = df[(df['place_type'].apply(lambda x: place_type in x)) & (df['budget'] <= budget)]
    
    # Sort the filtered data by popularity and average rating
    sorted_data = filtered_data.sort_values(['popularity', 'average_rating'], ascending=[False, False])
    
    # Select the top 3 places
    top_places = sorted_data.head(3)
    
    # Return the recommended places in the desired format
    return top_places[['place_name', 'city', 'budget', 'place_type', 'popularity', 'average_rating', 'keywords']].sort_values('average_rating', ascending=False)

# Call the function to get the recommended places
recommendations = recommend_places('outdoor', 3000)

print("Here are your recommended places:")
print(recommendations.to_string(index=False))

Here are your recommended places:
         place_name  city  budget            place_type  popularity  average_rating                                                                                                                                                                                                                                                                                                                                                                keywords
 Almaza City Center  None  2726.0           ['outdoor']        9965            2.68                                                                                                                                                                                                                                                                                                                 [ ' T r a v e l ' ,   ' D I Y   a n d   c r a f t s ' ]
Al Ahly Sports Club Cairo  1739.0           ['outdoor']        9911           

In [87]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the dataset

# Create a tf-idf vectorizer
tfidf = TfidfVectorizer(stop_words='english')

# Compute the cosine similarity matrix
tfidf_matrix = tfidf.fit_transform(df['keywords'] + ' ' + df['budget'].astype(str))
cosine_sim = cosine_similarity(tfidf_matrix)

def get_recommendations(place_name, city=None, budget=None, place_type=None, popularity=None, keywords=None, cosine_sim=None):
    # filter the dataset based on the specified criteria
    df_filtered = df.copy()

    if city:
        df_filtered = df_filtered[df_filtered['city'] == city]

    if budget:
        df_filtered = df_filtered[df_filtered['budget'] <= budget]

    if place_type:
        df_filtered = df_filtered[df_filtered['place_type'].apply(lambda x: place_type in x)]

    if popularity:
        df_filtered = df_filtered[df_filtered['popularity'] >= popularity]

    if keywords:
        df_filtered = df_filtered[df_filtered['keywords'].apply(lambda x: all(keyword in x for keyword in keywords))]

    # compute the cosine similarity between the specified place and the filtered dataset
    place_idx = df[df['place_name'] == place_name].index[0]
    if cosine_sim is None:
        # Compute the cosine similarity matrix if not provided
        tfidf_matrix_filtered = tfidf.transform(df_filtered['keywords'] + ' ' + df_filtered['budget'].astype(str))
        cosine_sim = cosine_similarity(tfidf_matrix_filtered)
    
    sim_scores = list(enumerate(cosine_sim[place_idx]))
    sim_indices = [i[0] for i in sorted(sim_scores, key=lambda x: x[1], reverse=True) if i[0] != place_idx]

    # return the recommended places
    return df_filtered.loc[sim_indices, ['place_name', 'city', 'budget', 'place_type', 'popularity', 'average_rating', 'keywords']].sort_values('average_rating', ascending=False)

# Call the function to get the recommended places
recommendations = get_recommendations('The Sunken City of Heracleion', cosine_sim=cosine_sim)

print("Here are your recommended places:")
print(recommendations.to_string(index=False))

Here are your recommended places:
                                        place_name                            city  budget            place_type  popularity  average_rating                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  keywords
                   Hilton Cairo Zamalek Residences                           Cairo  2730.0            ['indoor']        7448            5.00                                                   