##### Data From :  https://www.kaggle.com/datasets/siddharthmandgi/tripadvisor-restaurant-recommendation-data-usa?resource=download

### Import necessary Python libraries and the dataset

In [1]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction import text
from sklearn.metrics.pairwise import cosine_similarity

data = pd.read_csv("TripAdvisor_RestauarantRecommendation.csv")
print(data.head())

                            Name       Street Address  \
0  Betty Lou's Seafood and Grill     318 Columbus Ave   
1              Coach House Diner        55 State Rt 4   
2               Table Talk Diner  2521 South Rd Ste C   
3                    Sixty Vines     3701 Dallas Pkwy   
4                   The Clam Bar    3914 Brewerton Rd   

                       Location                                          Type  \
0  San Francisco, CA 94133-3908   Seafood, Vegetarian Friendly, Vegan Options   
1     Hackensack, NJ 07601-6337          Diner, American, Vegetarian Friendly   
2   Poughkeepsie, NY 12601-5476          American, Diner, Vegetarian Friendly   
3          Plano, TX 75093-7777       American, Wine Bar, Vegetarian Friendly   
4            Syracuse, NY 13212                        American, Bar, Seafood   

            Reviews No of Reviews  \
0  4.5 of 5 bubbles   243 reviews   
1    4 of 5 bubbles    84 reviews   
2    4 of 5 bubbles   256 reviews   
3  4.5 of 5 bubbles   

### Select two columns from the dataset for the rest of the task (Name, Type)

In [2]:
data = data[["Name", "Type"]]
print(data.head())

                            Name                                          Type
0  Betty Lou's Seafood and Grill   Seafood, Vegetarian Friendly, Vegan Options
1              Coach House Diner          Diner, American, Vegetarian Friendly
2               Table Talk Diner          American, Diner, Vegetarian Friendly
3                    Sixty Vines       American, Wine Bar, Vegetarian Friendly
4                   The Clam Bar                        American, Bar, Seafood


### Check for null values

In [3]:
print(data.isnull().sum())

Name     0
Type    13
dtype: int64


### Delete rows w/ null values

In [4]:
data = data.dropna()

### Use the Type column as the feature to recommend similar restaurants to the customer

In [5]:
feature = data["Type"].tolist()
tfidf = text.TfidfVectorizer(input=feature, stop_words="english")
tfidf_matrix = tfidf.fit_transform(feature)
similarity = cosine_similarity(tfidf_matrix)

### Set the name of the restaurant as an index

In [6]:
indices = pd.Series(data.index, index=data['Name']).drop_duplicates()

### Write a function to recommend similar restaurants

In [12]:
def restaurant_recommendation(name, similarity = similarity):
    index = indices[name]
    similarity_scores = list(enumerate(similarity[index]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    similarity_scores = similarity_scores[0:10]
    restaurantindices = [i[0] for i in similarity_scores]
    return data['Name'].iloc[restaurantindices]

print(restaurant_recommendation("The Clam Bar"))

4                            The Clam Bar
57                        Buzzard Billy's
62                     Cedars at Pier One
66     Maxie's Supper Club and Oyster Bar
122               Duke's Huntington Beach
189              River Station Restaurant
247                       Fish City Grill
304                    Tony's On the Pier
374                         Pacific Grill
393              Anthony's Woodfire Grill
Name: Name, dtype: object
