In [None]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction import text
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler, OneHotEncoder

In [None]:
data = pd.read_csv("TripAdvisor_RestauarantRecommendation.csv")
print(data.head())

                            Name       Street Address  \
0  Betty Lou's Seafood and Grill     318 Columbus Ave   
1              Coach House Diner        55 State Rt 4   
2               Table Talk Diner  2521 South Rd Ste C   
3                    Sixty Vines     3701 Dallas Pkwy   
4                   The Clam Bar    3914 Brewerton Rd   

                       Location                                          Type  \
0  San Francisco, CA 94133-3908   Seafood, Vegetarian Friendly, Vegan Options   
1     Hackensack, NJ 07601-6337          Diner, American, Vegetarian Friendly   
2   Poughkeepsie, NY 12601-5476          American, Diner, Vegetarian Friendly   
3          Plano, TX 75093-7777       American, Wine Bar, Vegetarian Friendly   
4            Syracuse, NY 13212                        American, Bar, Seafood   

            Reviews No of Reviews  \
0  4.5 of 5 bubbles   243 reviews   
1    4 of 5 bubbles    84 reviews   
2    4 of 5 bubbles   256 reviews   
3  4.5 of 5 bubbles   

In [None]:
data.info()
data.drop(['Contact Number','Price_Range'],axis=1)
print(data.columns)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3062 entries, 0 to 3061
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Name              3062 non-null   object
 1   Street Address    3062 non-null   object
 2   Location          3062 non-null   object
 3   Type              3049 non-null   object
 4   Reviews           3062 non-null   object
 5   No of Reviews     3062 non-null   object
 6   Comments          2447 non-null   object
 7   Contact Number    3062 non-null   object
 8   Trip_advisor Url  3062 non-null   object
 9   Menu              3062 non-null   object
 10  Price_Range       3062 non-null   object
dtypes: object(11)
memory usage: 263.3+ KB
Index(['Name', 'Street Address', 'Location', 'Type', 'Reviews',
       'No of Reviews', 'Comments', 'Contact Number', 'Trip_advisor Url',
       'Menu', 'Price_Range'],
      dtype='object')


In [None]:
data = data[["Name", "Type","Reviews"]]
print(data.head())

                            Name  \
0  Betty Lou's Seafood and Grill   
1              Coach House Diner   
2               Table Talk Diner   
3                    Sixty Vines   
4                   The Clam Bar   

                                           Type           Reviews  
0   Seafood, Vegetarian Friendly, Vegan Options  4.5 of 5 bubbles  
1          Diner, American, Vegetarian Friendly    4 of 5 bubbles  
2          American, Diner, Vegetarian Friendly    4 of 5 bubbles  
3       American, Wine Bar, Vegetarian Friendly  4.5 of 5 bubbles  
4                        American, Bar, Seafood    4 of 5 bubbles  


In [None]:
print(data.isnull().sum())

Name        0
Type       13
Reviews     0
dtype: int64


In [None]:
data = data.dropna()
print(data.isnull().sum())

Name       0
Type       0
Reviews    0
dtype: int64


In [None]:
data['Reviews'].unique()

array(['4.5 of 5 bubbles', '4 of 5 bubbles', '5 of 5 bubbles',
       '3.5 of 5 bubbles', '3 of 5 bubbles'], dtype=object)

In [None]:
tfidf_vectorizer = TfidfVectorizer(stop_words='english')

In [None]:
tfidf_matrix = tfidf_vectorizer.fit_transform(data['Type'])
print(tfidf_matrix.shape)


(3049, 112)


In [None]:
similarity = cosine_similarity(tfidf_matrix)

In [None]:
indices = pd.Series(data.index, index=data['Name']).drop_duplicates()

In [None]:
def restaurant_recommendation(name, similarity = similarity):
    index = indices[name]
    similarity_scores = list(enumerate(similarity[index]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    similarity_scores = similarity_scores[0:10]
    restaurantindices = [i[0] for i in similarity_scores]
    return data['Name'].iloc[restaurantindices]

Recommended_restaurant = input("Enter a restaurant name: ")
print(restaurant_recommendation(Recommended_restaurant))

Enter a restaurant name: Market Grill
23                   The Lion's Share
154                        Houlihan's
518            Midgley's Public House
568                 Aspen Creek Grill
770              Pete's Sunset Grille
1190     Paul Martin's American Grill
1581                   Aviation Grill
1872                   Aviation Grill
2193                Crest Bar & Grill
2612    Tahoe Joe's Famous Steakhouse
Name: Name, dtype: object
