# **Data Loading**

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

In [2]:
df = pd.read_csv('Indian_Dataset.csv')
df

Unnamed: 0.1,Unnamed: 0,Zone,State,City,Name,Type,Establishment Year,time needed to visit in hrs,Google review rating,Entrance Fee in INR,Airport with 50km Radius,Weekly Off,Significance,DSLR Allowed,Number of google review in lakhs,Best Time to visit
0,0,Northern,Delhi,Delhi,India Gate,War Memorial,1921,0.5,4.6,0,Yes,,Historical,Yes,2.60,Evening
1,1,Northern,Delhi,Delhi,Humayun's Tomb,Tomb,1572,2.0,4.5,30,Yes,,Historical,Yes,0.40,Afternoon
2,2,Northern,Delhi,Delhi,Akshardham Temple,Temple,2005,5.0,4.6,60,Yes,,Religious,No,0.40,Afternoon
3,3,Northern,Delhi,Delhi,Waste to Wonder Park,Theme Park,2019,2.0,4.1,50,Yes,Monday,Environmental,Yes,0.27,Evening
4,4,Northern,Delhi,Delhi,Jantar Mantar,Observatory,1724,2.0,4.2,15,Yes,,Scientific,Yes,0.31,Morning
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
320,320,Western,Gujarat,Gandhinagar,Akshardham,Temple,1992,3.0,4.6,0,Yes,Monday,Religious,No,0.18,All
321,321,Central,Uttar Pradesh,Agra,Agra Fort,Fort,1565,2.0,4.5,40,Yes,,Historical,Yes,1.30,Afternoon
322,322,Central,Madhya Pradesh,Bhopal,Madhya Pradesh Tribal Museum,Museum,2013,2.0,4.7,10,Yes,Monday,Cultural,Yes,0.15,All
323,323,Northern,Rajasthan,Jaipur,City Palace,Palace,1727,2.0,4.4,200,Yes,,Historical,Yes,0.51,Morning


In [3]:
df['Weekly Off'].fillna("No", inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Weekly Off'].fillna("No", inplace=True)


In [4]:
df

Unnamed: 0.1,Unnamed: 0,Zone,State,City,Name,Type,Establishment Year,time needed to visit in hrs,Google review rating,Entrance Fee in INR,Airport with 50km Radius,Weekly Off,Significance,DSLR Allowed,Number of google review in lakhs,Best Time to visit
0,0,Northern,Delhi,Delhi,India Gate,War Memorial,1921,0.5,4.6,0,Yes,No,Historical,Yes,2.60,Evening
1,1,Northern,Delhi,Delhi,Humayun's Tomb,Tomb,1572,2.0,4.5,30,Yes,No,Historical,Yes,0.40,Afternoon
2,2,Northern,Delhi,Delhi,Akshardham Temple,Temple,2005,5.0,4.6,60,Yes,No,Religious,No,0.40,Afternoon
3,3,Northern,Delhi,Delhi,Waste to Wonder Park,Theme Park,2019,2.0,4.1,50,Yes,Monday,Environmental,Yes,0.27,Evening
4,4,Northern,Delhi,Delhi,Jantar Mantar,Observatory,1724,2.0,4.2,15,Yes,No,Scientific,Yes,0.31,Morning
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
320,320,Western,Gujarat,Gandhinagar,Akshardham,Temple,1992,3.0,4.6,0,Yes,Monday,Religious,No,0.18,All
321,321,Central,Uttar Pradesh,Agra,Agra Fort,Fort,1565,2.0,4.5,40,Yes,No,Historical,Yes,1.30,Afternoon
322,322,Central,Madhya Pradesh,Bhopal,Madhya Pradesh Tribal Museum,Museum,2013,2.0,4.7,10,Yes,Monday,Cultural,Yes,0.15,All
323,323,Northern,Rajasthan,Jaipur,City Palace,Palace,1727,2.0,4.4,200,Yes,No,Historical,Yes,0.51,Morning


# **Content Based Filtering**

In [5]:
df['features'] =df['Name'] + ' ' + df['Zone'] + ' ' + df['State'] + ' ' + df['City'] + " " + df['Type'] + " " + df['Establishment Year'] + " " + df['Weekly Off'] + " " + df['Significance'] + " " + df['DSLR Allowed'] + " " + df['Best Time to visit']

vectorizer = TfidfVectorizer(stop_words='english')

destination_features = vectorizer.fit_transform(df['features'])

cosine_sim = cosine_similarity(destination_features, destination_features)

In [6]:
print(df.columns)

Index(['Unnamed: 0', 'Zone', 'State', 'City', 'Name', 'Type',
       'Establishment Year', 'time needed to visit in hrs',
       'Google review rating', 'Entrance Fee in INR',
       'Airport with 50km Radius', 'Weekly Off', 'Significance',
       'DSLR Allowed', 'Number of google review in lakhs',
       'Best Time to visit', 'features'],
      dtype='object')


In [7]:
from IPython.display import display  # For displaying tables in Google Colab

def recommend_similar_places(place_name, df, cosine_sim, top_n=10):
    try:
        idx = df[df['Name'].str.lower() == place_name.lower()].index[0]
        sim_scores = list(enumerate(cosine_sim[idx]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        top_destinations_idx = [i[0] for i in sim_scores[1:top_n+1]]

        recommendations = df.iloc[top_destinations_idx][[
            'Name', 'State', 'City', 'Type', 'Establishment Year', 'time needed to visit in hrs', 'Google review rating', 'Entrance Fee in INR', 'Airport with 50km Radius', 'Weekly Off', 'Significance', 'DSLR Allowed', 'Best Time to visit', 'features'
        ]]

        recommendations.reset_index(drop=True, inplace=True)  # Remove index
        display(recommendations)  # Display in table format

    except IndexError:
        print("Place not found! Please enter a valid place name.")

# Example: Get 10 recommendations similar to "India Gate"
recommend_similar_places('India Gate', df, cosine_sim, top_n=10)


Unnamed: 0,Name,State,City,Type,Establishment Year,time needed to visit in hrs,Google review rating,Entrance Fee in INR,Airport with 50km Radius,Weekly Off,Significance,DSLR Allowed,Best Time to visit,features
0,War Memorial,Andhra Pradesh,Visakhapatnam,War Memorial,Unknown,1.0,4.6,0,Yes,No,Historical,Yes,All,War Memorial Southern Andhra Pradesh Visakhapa...
1,Dras War Memorial,Ladakh,Dras,War Memorial,Unknown,1.0,4.8,0,No,No,Historical,Yes,All,Dras War Memorial Northern Ladakh Dras War Mem...
2,Lotus Temple,Delhi,Delhi,Temple,1986,1.0,4.5,0,Yes,Monday,Religious,Yes,Evening,Lotus Temple Northern Delhi Delhi Temple 1986 ...
3,Kargil War Memorial,Ladakh,Kargil,War Memorial,24,1.0,4.8,0,No,No,Historical,Yes,All,Kargil War Memorial Northern Ladakh Kargil War...
4,Red Fort,Delhi,Delhi,Fort,1648,2.0,4.5,35,Yes,No,Historical,Yes,Afternoon,Red Fort Northern Delhi Delhi Fort 1648 No His...
5,Akshardham Temple,Delhi,Delhi,Temple,2005,5.0,4.6,60,Yes,No,Religious,No,Afternoon,Akshardham Temple Northern Delhi Delhi Temple ...
6,Qutub Minar,Delhi,Delhi,Monument,1192,1.0,4.5,35,Yes,No,Historical,Yes,Afternoon,Qutub Minar Northern Delhi Delhi Monument 1192...
7,Lodhi Garden,Delhi,Delhi,Park,1500,1.0,4.5,0,Yes,No,Botanical,Yes,All,Lodhi Garden Northern Delhi Delhi Park 1500 No...
8,Jama Masjid,Delhi,New Delhi,Mosque,1656,1.0,4.5,0,Yes,No,Historical,Yes,All,Jama Masjid Northern Delhi New Delhi Mosque 16...
9,Garden of Five Senses,Delhi,Delhi,Park,2003,2.0,4.1,35,Yes,No,Botanical,Yes,Morning,Garden of Five Senses Northern Delhi Delhi Par...


# **Rule Based Filtering**

In [8]:
from IPython.display import display

def filter_places(user_input, df):

    filtered_df = df.copy()

    # Apply filters based on user input
    if 'Zone' in user_input:
        filtered_df = filtered_df[filtered_df['Zone'] == user_input['Zone']]

    if 'State' in user_input:
        filtered_df = filtered_df[filtered_df['State'] == user_input['State']]

    if 'City' in user_input:
        filtered_df = filtered_df[filtered_df['City'] == user_input['City']]

    if 'Type' in user_input:
        filtered_df = filtered_df[filtered_df['Type'] == user_input['Type']]

    if 'time needed to visit in hrs' in user_input:
        min_time = float(user_input['time needed to visit in hrs'])
        filtered_df = filtered_df[filtered_df['time needed to visit in hrs'] >= min_time]

    if 'Google review rating' in user_input:
        min_rating = float(user_input['Google review rating'])
        filtered_df = filtered_df[filtered_df['Google review rating'] >= min_rating]

    if 'Entrance Fee in INR' in user_input:
        max_fee = int(user_input['Entrance Fee in INR'])
        filtered_df = filtered_df[filtered_df['Entrance Fee in INR'] <= max_fee]

    if 'DSLR Allowed' in user_input:
        filtered_df = filtered_df[filtered_df['DSLR Allowed'].str.lower() == user_input['DSLR Allowed'].lower()]

    if 'Best Time to visit' in user_input:
        filtered_df = filtered_df[filtered_df['Best Time to visit'].str.lower() == user_input['Best Time to visit'].lower()]

    # If no filters provided, sort by rating * number of reviews
    if not user_input:
        filtered_df['popularity_score'] = filtered_df['Google review rating'] * filtered_df['Number of google review in lakhs']
        filtered_df = filtered_df.sort_values(by='popularity_score', ascending=False)

    # Return final filtered results
    return filtered_df[['Name', 'Zone', 'State', 'City', 'Type', 'Google review rating', 'Entrance Fee in INR', 'DSLR Allowed', 'Best Time to visit']]

# Example User Input
user_input = {
    'State': 'Delhi',
    'Google review rating': '4',
    'Best Time to visit': 'morning'
}

# Get recommendations
recommended_places = filter_places(user_input, df)
recommended_places.reset_index(drop=True, inplace=True)
display(recommended_places)

Unnamed: 0,Name,Zone,State,City,Type,Google review rating,Entrance Fee in INR,DSLR Allowed,Best Time to visit
0,Jantar Mantar,Northern,Delhi,Delhi,Observatory,4.2,15,Yes,Morning
1,Garden of Five Senses,Northern,Delhi,Delhi,Park,4.1,35,Yes,Morning
2,Rail Museum,Northern,Delhi,New Delhi,Museum,4.4,50,Yes,Morning
