# Importing libraries

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Loading Dataset

In [None]:
zomato_df = pd.read_csv("C:/Users/NAMIRA/Downloads/archive (7)/zomato.csv")

# Data Preprocessing

In [None]:
zomato_df.head()

In [9]:
zomato_df.shape

(51717, 17)

# Dropping Unnecessary Columns

In [None]:
zomato_df= zomato_df.drop(columns=['url','address','phone','menu_item','reviews_list','dish_liked'])

In [None]:
zomato_df.columns

# Dropping Duplicate Values

In [None]:
zomato_df.drop_duplicates()


In [None]:
zomato_df.shape

In [None]:
zomato_df.isna().sum()

# Removing '/'  from Rate Column

In [None]:
def handleRate(val):
    if val=='NEW' or val=='-':
        return np.nan
    else:
        value = str(val).split('/')
        return value[0]
zomato_df['rate']= zomato_df['rate'].apply(handleRate)

In [None]:
zomato_df['rate'].unique()

In [None]:
zomato_df['rate']= zomato_df['rate'].astype('float')

In [None]:
zomato_df['rate'] = pd.to_numeric(zomato_df['rate'])

# Filling null values in Rate Column with mean

In [None]:
zomato_df['rate'].fillna(zomato_df['rate'].mean(),inplace=True)

In [None]:
zomato_df.dtypes

In [None]:
zomato_df['rate'].isna().sum()

In [None]:
zomato_df['rate'].unique()

In [None]:
zomato_df.dropna(inplace=True)

In [None]:
zomato_df.shape

# Renaming Columns

In [None]:
zomato_df.rename(columns={'approx_cost(for two people)':'cost2plates','listed_in(type)':'type'},inplace=True)

In [None]:
zomato_df.drop(columns='listed_in(city)',inplace=True)

In [None]:
zomato_df.head()

In [None]:
zomato_df['cost2plates'].value_counts()

# Removing ',' from cost2plates column

In [None]:
def handleCost(value):
    if ',' in str(value):
        return float(value.replace(',',''))
    else:
        return float(value)
zomato_df['cost2plates']= zomato_df['cost2plates'].apply(handleCost)
zomato_df.head()

# Making rest_type less than 1000 in frequency as others

In [None]:
zomato_df['rest_type'].value_counts()

In [None]:
rest_types=zomato_df['rest_type'].value_counts()

In [None]:
rest_types_less_than_1000 = rest_types[rest_types<1000]

In [None]:
type(rest_types_less_than_1000)

In [None]:
def handleRestType(val):
    if val in rest_types_less_than_1000:
        return "others"
    else:
        return val
    
zomato_df['rest_type'] = zomato_df['rest_type'].apply(handleRestType)

In [None]:
zomato_df['rest_type'].value_counts()

# Making cuisines less than 5000 in frequency as others

In [None]:
zomato_df['cuisines'].value_counts()

In [None]:
cuisine_type = zomato_df['cuisines'].value_counts()

In [None]:
cuisine_type

In [None]:
cuisines_less_than_500 = cuisine_types[cuisine_type<500]

In [None]:
cuisines_less_than_500

In [None]:
def HandleCuisines(val):
    if val in cuisines_less_than_500:
        return "others"
    else:
        return val
zomato_df['cuisines'] = zomato_df['cuisines'].apply(HandleCuisines)

In [None]:
zomato_df['cuisines'].value_counts()

In [None]:
zomato_df.head()

# Cleaning location column

In [None]:
location_types = zomato_df['location'].value_counts()

In [None]:
location_types

In [None]:
location_types_less_than_500 = location_types[location_types<500]

In [None]:
def handleLocation(val):
    if val in location_types_less_than_500:
        return "others"
    else:
        return val
zomato_df['location'] = zomato_df['location'].apply(handleLocation)

In [None]:
zomato_df['location'].value_counts()

# Visualizing the dataset


# Restaurants per Region


In [None]:
plt.figure(figsize=(15,10))
ax= sns.countplot(zomato_df['location'])
plt.xticks(rotation=90)

Observation: Locations such as Shanti Nagar and Lavelle Rd have the least number of restaurants and hence could be profitable opening a restaurant in these regions

# Visualizing how many restaurants have booking and online order facility

In [None]:
plt.rcParams["figure.figsize"] = [12,8]
plt.subplot(1,2,1)
sns.countplot(zomato_df['online_order'])
plt.subplot(1,2,2)
sns.countplot(zomato_df['book_table'])
plt.show()

Observation: Online ordering facility is available in almost every restaurant and table booking is a rare facility

# Impacts of online order and table booking on Ratings

In [None]:
plt.rcParams["figure.figsize"] = [12,8]
plt.subplot(1,2,1)
sns.boxplot(x='online_order',y='rate', data=zomato_df)
plt.subplot(1,2,2)
sns.boxplot(x='book_table',y='rate', data=zomato_df)

Observation: Not much of an impact of online ordering facility on rating however, table booking facility leads substantial increase in ratings

# Impact of Online order facility on location


In [None]:
df1 = zomato_df.groupby(['location','online_order'])['name'].count()
df1.to_csv('location_online.csv')
df1 = pd.read_csv('location_online.csv')
df1 = pd.pivot_table(df1,values=None,index=['location'],columns=['online_order'],fill_value=0,aggfunc=np.sum)

In [None]:
df1

In [None]:
df1.plot(kind='bar',figsize=(15,8))

Observation: If a restaurant is opened in locations such as BTM, HSR, online ordering facility is a must

# Impact of Book Table facility on location

In [None]:
df = zomato_df.groupby(['location','book_table'])['name'].count()
df.to_csv('loc_table.csv')
df = pd.read_csv('loc_table.csv')
df = pd.pivot_table(df,values=None,index=['location'],columns=['book_table'],fill_value=0,aggfunc=np.sum)

In [None]:
df

In [None]:
df.plot(kind='bar',figsize=(15,8))

Observation: Book Table facility is not available in much restaurants 

# Restaurant's Type affecting their Rates

In [None]:
plt.figure(figsize=(14,8))
sns.boxplot(x='type',y='rate',data=zomato_df)

Observation: Types of Restaurants such as Buffet, Drinks & nightlife and Pubs and bars are said to have higher ratings as compared to others

#  Types of Restaurants per location 

In [None]:
df3 = zomato_df.groupby(['location','type'])['name'].count()
df3.to_csv('loc_type.csv')
df3 = pd.read_csv('loc_type.csv')
df3 = pd.pivot_table(df3,values=None,index=['location'],columns=['type'],fill_value=0,aggfunc=np.sum)

In [None]:
df3

In [None]:
df3.plot(kind='bar',figsize=(36,8))

Observation:Delivery is the most common type of restaurant at most of the locations

# Votes per Location

In [None]:
df4=zomato_df[['location','votes']]
df5 = df4.groupby(['location'])['votes'].sum()
df5= df5.to_frame()
df5= df5.sort_values('votes',ascending=False)
df5.head()

In [None]:
plt.figure(figsize=(15,8))
sns.barplot(df5.index,df5['votes'])
plt.xticks(rotation=95)

Observation: Koramangala Block is seen to have the highest number of votes

# Votes per Cuisines 

In [None]:
df6=zomato_df[['cuisines','votes']]
df7 = df6.groupby(['cuisines'])['votes'].sum()
df7= df7.to_frame()
df7= df7.sort_values('votes',ascending=False)
df7.head()

In [None]:
plt.figure(figsize=(15,8))
df7=df7.iloc[1:,:]
sns.barplot(df7.index,df7['votes'])
plt.xticks(rotation=95)

Observation: North Indian cuisine is the most demanding cuisine with highest votes