In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity as cs

df = pd.read_csv('1.csv')

In [2]:
df.columns

Index(['Unnamed: 0', 'name', 'online_order', 'book_table', 'rate', 'votes',
       'location', 'rest_type', 'dish_liked', 'cuisines', 'approx_cost',
       'reviews_list', 'menu_item', 'type', 'neighbourhood'],
      dtype='object')

In [3]:
df = df[['name','rate','location','dish_liked','cuisines','approx_cost','neighbourhood']]

In [4]:
df.drop_duplicates('name', inplace=True)
df.dropna(subset=['cuisines'], inplace=True)
df.isnull().sum()

name                0
rate             2454
location            0
dish_liked       5870
cuisines            0
approx_cost        36
neighbourhood       0
dtype: int64

In [5]:
import string
PUNCT_TO_REMOVE = string.punctuation
def remove_punctuation(text):
    """custom function to remove the punctuation"""
    return text.translate(str.maketrans('', '', PUNCT_TO_REMOVE))

df["cuisines"] = df["cuisines"].apply(lambda text: remove_punctuation(text))

In [6]:
df['addr'] = df['neighbourhood'] +' '+ df['location']
df.head()

Unnamed: 0,name,rate,location,dish_liked,cuisines,approx_cost,neighbourhood,addr
0,Jalsa,4.1,Banashankari,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...",North Indian Mughlai Chinese,800.0,Banashankari,Banashankari Banashankari
1,Spice Elephant,4.1,Banashankari,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...",Chinese North Indian Thai,800.0,Banashankari,Banashankari Banashankari
2,San Churro Cafe,3.8,Banashankari,"Churros, Cannelloni, Minestrone Soup, Hot Choc...",Cafe Mexican Italian,800.0,Banashankari,Banashankari Banashankari
3,Addhuri Udupi Bhojana,3.7,Banashankari,Masala Dosa,South Indian North Indian,300.0,Banashankari,Banashankari Banashankari
4,Grand Village,3.8,Basavanagudi,"Panipuri, Gol Gappe",North Indian Rajasthani,600.0,Banashankari,Banashankari Basavanagudi


In [7]:
df.set_index('name', inplace=True)
indices = pd.Series(df.index)

In [8]:
tfidf = TfidfVectorizer(analyzer='word', ngram_range=(1, 2), min_df=0, stop_words='english')
tfidf_cuisine_matrix = tfidf.fit_transform(df['cuisines'])
tfidf_location_matrix = tfidf.fit_transform(df['addr'])

In [9]:
cuisine_similarity = cs(tfidf_cuisine_matrix,tfidf_cuisine_matrix)
location_similarity = cs(tfidf_location_matrix,tfidf_location_matrix)

In [10]:
print(df[df.index == '7 Hills Garden Restaurant']['approx_cost'].values)

[400.]


In [21]:
def recommend(name):
    recommendations = []

    idx = indices[indices == name].index[0]

    cuisine_series = pd.Series(cuisine_similarity[idx]).sort_values(ascending=False)
    top_cuisine = list(cuisine_series.iloc[0:2000].index)

    location_series = pd.Series(location_similarity[idx]).sort_values(ascending=False)
    top_location = list(location_series.iloc[0:1000].index)

    for i in top_cuisine:
        for j in top_location:
            if i == j:
                recommendations.append(list(df.index)[i])

    df_new = pd.DataFrame(columns=['cuisines','rate', 'approx_cost', 'addr'])
    
    for each in recommendations:
        df_new = df_new.append(pd.DataFrame(df[['cuisines','rate', 'approx_cost', 'addr']][df.index == each].sample()))
    
    df_new = df_new.drop_duplicates()

    df_new['cost_diff'] = df_new['approx_cost'] - (df[df.index == name]['approx_cost'].values[0])
    drop_names = df_new[(df_new['cost_diff'] > 201) | (df_new['cost_diff'] < -201)].index
    df_new.drop(drop_names, inplace=True)

    df_new = df_new.sort_values(by='rate', ascending=False)

    df_new.to_csv('temp.csv')
    df_new.drop(['cost_diff'], axis=1, inplace=True)
    return df_new.head(10)

In [23]:
recommend('Jalsa')

Unnamed: 0,cuisines,rate,approx_cost,addr
Empire Restaurant,North Indian Mughlai South Indian Chinese,4.4,750.0,Banashankari Jayanagar
Meghana Foods,Biryani North Indian Chinese,4.4,600.0,Banashankari Jayanagar
The Royal Corner - Pai Viceroy,North Indian Chinese,4.2,900.0,Banashankari Jayanagar
Kapoor's Cafe,North Indian,4.2,800.0,Banashankari Jayanagar
Jalsa,North Indian Mughlai Chinese,4.1,800.0,Banashankari Banashankari
Hara Fine Dine,North Indian Chinese,4.0,800.0,Banashankari Banashankari
The Kebab Room - Restaurant & Brewery,Mughlai North Indian,4.0,1000.0,Indiranagar Indiranagar
Mint and Mustard,North Indian Chinese,4.0,750.0,Banashankari Basavanagudi
1947,North Indian Chinese,4.0,850.0,Banashankari Banashankari
Silbatti,North Indian,4.0,750.0,HSR HSR
