In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import NearestNeighbors
import warnings
warnings.filterwarnings('always')
warnings.filterwarnings('ignore')

In [2]:
real_dataset=pd.read_csv("/kaggle/input/place-to-travel/place.csv")
real_dataset.head() # print 5 rows

# Drop unused columns
# Drop irrelevant columns
columns_to_drop = [
    'Establishment Year', 'DSLR Allowed', 'Airport with 50km Radius',
    'Weekly Off', 'Significance', 'Best Time to visit'
]
real_dataset = real_dataset.drop(columns=columns_to_drop)


# Display basic summary
print("Summary", real_dataset.dtypes)

Summary Unnamed: 0                            int64
Zone                                 object
State                                object
City                                 object
Name                                 object
Type                                 object
time needed to visit in hrs         float64
Google review rating                float64
Entrance Fee in INR                   int64
Number of google review in lakhs    float64
dtype: object


In [3]:
real_dataset.head(5)

Unnamed: 0.1,Unnamed: 0,Zone,State,City,Name,Type,time needed to visit in hrs,Google review rating,Entrance Fee in INR,Number of google review in lakhs
0,0,Northern,Delhi,Delhi,India Gate,War Memorial,0.5,4.6,0,2.6
1,1,Northern,Delhi,Delhi,Humayun's Tomb,Tomb,2.0,4.5,30,0.4
2,2,Northern,Delhi,Delhi,Akshardham Temple,Temple,5.0,4.6,60,0.4
3,3,Northern,Delhi,Delhi,Waste to Wonder Park,Theme Park,2.0,4.1,50,0.27
4,4,Northern,Delhi,Delhi,Jantar Mantar,Observatory,2.0,4.2,15,0.31


In [4]:
# Convert data types to become lighter dataset
real_dataset['time needed to visit in hrs'] = real_dataset['time needed to visit in hrs'].astype('float32')
real_dataset['Google review rating'] = real_dataset['Google review rating'].astype('float32')
real_dataset['Number of google review in lakhs'] = real_dataset['Number of google review in lakhs'].astype('float32')
real_dataset['Entrance Fee in INR'] = real_dataset['Entrance Fee in INR'].astype('int32')

# Display basic summary
print("Summary for converted dataset", real_dataset.dtypes)

Summary for converted dataset Unnamed: 0                            int64
Zone                                 object
State                                object
City                                 object
Name                                 object
Type                                 object
time needed to visit in hrs         float32
Google review rating                float32
Entrance Fee in INR                   int32
Number of google review in lakhs    float32
dtype: object


In [5]:
real_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 325 entries, 0 to 324
Data columns (total 10 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   Unnamed: 0                        325 non-null    int64  
 1   Zone                              325 non-null    object 
 2   State                             325 non-null    object 
 3   City                              325 non-null    object 
 4   Name                              325 non-null    object 
 5   Type                              325 non-null    object 
 6   time needed to visit in hrs       325 non-null    float32
 7   Google review rating              325 non-null    float32
 8   Entrance Fee in INR               325 non-null    int32  
 9   Number of google review in lakhs  325 non-null    float32
dtypes: float32(3), int32(1), int64(1), object(5)
memory usage: 20.4+ KB


In [6]:
real_dataset = real_dataset.sample(n=500, random_state=1, replace=True)
number_of_rows = real_dataset.shape[0]
print("Number of rows in the DataFrame:", number_of_rows)

Number of rows in the DataFrame: 500


In [7]:
# Apply one-hot encoding to the categorical columns
df_encoded = pd.get_dummies(real_dataset, columns=['Zone', 'State', 'City', 'Type'])

# Display the first few rows of the encoded DataFrame
df_encoded.head()

Unnamed: 0.1,Unnamed: 0,Name,time needed to visit in hrs,Google review rating,Entrance Fee in INR,Number of google review in lakhs,Zone_Central,Zone_Eastern,Zone_North Eastern,Zone_Northern,...,Type_Trekking,Type_Urban Development Project,Type_Valley,Type_Viewpoint,Type_Village,Type_Vineyard,Type_War Memorial,Type_Waterfall,Type_Wildlife Sanctuary,Type_Zoo
37,37,Qutb Shahi Tombs,1.0,4.4,25,0.2,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
235,235,Sanaghagara Waterfall,1.0,4.4,0,0.055,False,True,False,False,...,False,False,False,False,False,False,False,True,False,False
72,72,Rann Utsav,3.0,4.9,7500,0.1,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
255,255,Uppalapadu Bird Sanctuary,1.0,4.4,10,0.7,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
203,203,Kargil War Memorial,1.0,4.8,0,0.011,False,False,False,True,...,False,False,False,False,False,False,True,False,False,False


In [8]:
X = df_encoded.drop(columns=['Name'])
y = df_encoded['Name']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
model = NearestNeighbors(n_neighbors=10)
model.fit(X_train)

In [10]:
def recommend_places(place_name):
    # Check if the place exists in the dataset
    if place_name in df_encoded['Name'].values:
        # Find the index of the input place
        place_index = df_encoded[df_encoded['Name'] == place_name].index[0]
        
        # Get the feature vector for the input place
        place_vector = X.iloc[place_index].values.reshape(1, -1)
        
        # Find nearest neighbors
        distances, indices = model.kneighbors(place_vector)
        
        # Create a DataFrame to hold distances and recommended place names
        recommendations = pd.DataFrame({
            'Place': df_encoded.iloc[indices[0]]['Name'].values,
            'Distance': distances[0]
        })
        
        # Add original details from the real dataset
        recommendations['Zone'] = real_dataset.iloc[indices[0]]['Zone'].values
        recommendations['State'] = real_dataset.iloc[indices[0]]['State'].values
        recommendations['City'] = real_dataset.iloc[indices[0]]['City'].values
        recommendations['Type'] = real_dataset.iloc[indices[0]]['Type'].values
        recommendations['Time Needed (hrs)'] = real_dataset.iloc[indices[0]]['time needed to visit in hrs'].values
        recommendations['Google Review Rating'] = real_dataset.iloc[indices[0]]['Google review rating'].values
        recommendations['Entrance Fee (INR)'] = real_dataset.iloc[indices[0]]['Entrance Fee in INR'].values
        recommendations['Number of Reviews (in lakhs)'] = real_dataset.iloc[indices[0]]['Number of google review in lakhs'].values
        
        # Sort the recommendations by distance in ascending order (most similar first)
        recommendations = recommendations.sort_values(by='Distance', ascending=True)

        # Get details of the input place
        input_place_details = real_dataset[real_dataset['Name'] == place_name]
        
        # Create a DataFrame for the input place details
        input_place_info = pd.DataFrame({
            'Place': input_place_details['Name'].values,
            'Zone': input_place_details['Zone'].values,
            'State': input_place_details['State'].values,
            'City': input_place_details['City'].values,
            'Type': input_place_details['Type'].values,
            'Time Needed (hrs)': input_place_details['time needed to visit in hrs'].values,
            'Google Review Rating': input_place_details['Google review rating'].values,
            'Entrance Fee (INR)': input_place_details['Entrance Fee in INR'].values,
            'Number of Reviews (in lakhs)': input_place_details['Number of google review in lakhs'].values
        })

        # Print details of the input place
        print("Details of the input place:")
        print(input_place_info.head()) 

        # Filter recommendations to only include places of the same type
        same_zone_recommendations = recommendations[recommendations['Zone'] == input_place_info['Zone'].values[0]]
        
        return same_zone_recommendations[['Place', 'Distance', 'Zone', 'State', 'City', 'Type', 'Time Needed (hrs)', 'Google Review Rating', 'Entrance Fee (INR)', 'Number of Reviews (in lakhs)']].reset_index(drop=True)
    else:
        return "Place not found."


In [11]:
recommend_places("Marble Palace")

Details of the input place:
           Place     Zone        State     City    Type  Time Needed (hrs)  \
0  Marble Palace  Eastern  West Bengal  Kolkata  Palace                1.0   
1  Marble Palace  Eastern  West Bengal  Kolkata  Palace                1.0   

   Google Review Rating  Entrance Fee (INR)  Number of Reviews (in lakhs)  
0                   4.4                   0                           0.1  
1                   4.4                   0                           0.1  


Unnamed: 0,Place,Distance,Zone,State,City,Type,Time Needed (hrs),Google Review Rating,Entrance Fee (INR),Number of Reviews (in lakhs)
0,Alipore Zoological Gardens,0.0,Eastern,West Bengal,Kolkata,Zoo,2.0,4.3,25,0.66
1,Tsomgo Lake,0.0,Eastern,Sikkim,Gangtok,Lake,2.0,4.5,0,0.15
2,Buddha Park,19.094722,Eastern,Sikkim,Ravangla,Park,1.0,4.8,50,0.1
