# **RECOMMENDER SYSTEM**

**THIS NOTEBOOK PROVIDES A RECOMMENDER SYSTEM WHICH DISPLAYS THE RECOMMENDATIONOF SIMILAR RESTAURANTS WHEN GIVEN A RESTAURANT NAME.**

In [None]:
import numpy as np
import pandas as pd

In [None]:
df=pd.read_csv("../input/zomato-bangalore-restaurants/zomato.csv")
df.head()

In [None]:
df.info()

In [None]:
df=df.drop(['url','dish_liked','phone'],axis=1) 

In [None]:
df.duplicated().sum()
df.drop_duplicates(inplace=True)

In [None]:
df.isnull().sum()
df.dropna(how='any',inplace=True)
df.info()

In [None]:
df.columns

In [None]:
df = df.rename(columns={'approx_cost(for two people)':'cost','listed_in(type)':'type',
                                  'listed_in(city)':'city'})
df.columns

In [None]:
df['cost'] = df['cost'].astype(str) #Changing the cost to string
df['cost'] = df['cost'].apply(lambda x: x.replace(',','.')) #Using lambda function to replace ',' from cost
df['cost'] = df['cost'].astype(float) # Changing the cost to Float
df.info()

In [None]:
df['rate'].unique()

In [None]:
df = df.loc[df.rate !='NEW']
df = df.loc[df.rate !='-'].reset_index(drop=True)
remove_slash = lambda x: x.replace('/5', '') if type(x) == np.str else x
df.rate = df.rate.apply(remove_slash).str.strip().astype('float')
df['rate'].head()

In [None]:
df.name = df.name.apply(lambda x:x.title())
df.online_order.replace(('Yes','No'),(True, False),inplace=True)
df.book_table.replace(('Yes','No'),(True, False),inplace=True)
df.cost.unique()

In [None]:
df.isnull().sum()

In [None]:
restaurants = list(df['name'].unique())
df['Mean Rating'] = 0

for i in range(len(restaurants)):
    df['Mean Rating'][df['name'] == restaurants[i]] = df['rate'][df['name'] == restaurants[i]].mean()

In [None]:
df.head()

In [None]:
df["reviews_list"] = df["reviews_list"].str.lower()
df[['reviews_list', 'cuisines']].sample(5)

In [None]:
import re
rest = df['reviews_list']
for i in range(len(rest)):
    review= re.sub('[^a-z0-9]', ' ', rest[i])
   

In [None]:
df['reviews_list']=review
df['reviews_list'].sample(5)

In [None]:
from nltk.corpus import stopwords
STOPWORDS = set(stopwords.words('english'))
def remove_stopwords(text):
    return " ".join([word for word in str(text).split() if word not in STOPWORDS])

df["reviews_list"] = df["reviews_list"].apply(lambda text: remove_stopwords(text))

In [None]:
def remove_urls(text):
    url_pattern = re.compile(r'https?://\S+|www\.\S+')
    return url_pattern.sub(r'', text)

df["reviews_list"] = df["reviews_list"].apply(lambda text: remove_urls(text))

In [None]:
restaurant_names = list(df['name'].unique())
restaurant_names

In [None]:
df=df.drop(['address','rest_type', 'type', 'menu_item', 'votes'],axis=1)
df.head()

In [None]:
df_percent = df.sample(frac=0.6)

In [None]:
df_percent.shape

In [None]:
df_percent.set_index('name', inplace=True)
indices = pd.Series(df_percent.index)

In [None]:
from sklearn.metrics.pairwise import linear_kernel
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer(analyzer='word', ngram_range=(1, 2), min_df=0, stop_words='english')
tfidf_matrix = tfidf.fit_transform(df_percent['reviews_list'])

cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)

In [None]:


def recommend(name, cosine_similarities = cosine_similarities):
    recommend_restaurant = []
    idx = indices[indices == name].index[0]
    score_series = pd.Series(cosine_similarities[idx]).sort_values(ascending=False)
    top30_indexes = list(score_series.iloc[0:31].index)
    
    for each in top30_indexes:
        recommend_restaurant.append(list(df_percent.index)[each])
    
    df_new = pd.DataFrame(columns=['cuisines', 'Mean Rating', 'cost'])
    
    for each in recommend_restaurant:
        df_new = df_new.append(pd.DataFrame(df_percent[['cuisines','Mean Rating', 'cost']][df_percent.index == each].sample()))
    
    df_new = df_new.drop_duplicates(subset=['cuisines','Mean Rating', 'cost'], keep=False)
    df_new = df_new.sort_values(by='Mean Rating', ascending=False).head(10)
    
    print('TOP %s RESTAURANTS LIKE %s WITH SIMILAR REVIEWS: ' % (str(len(df_new)), name))
    
    return df_new

In [None]:
df_percent[df_percent.index == 'Jalsa'].head()

In [None]:
recommend('Jalsa')