In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

### Downnload dataset using [this](https://https://www.kaggle.com/datasets/himanshupoddar/zomato-bangalore-restaurants?select=zomato.csv) link

In [None]:
df = pd.read_csv("A:\Code\GitHub\Zomato\zomato.csv")

In [None]:
df.head(2)

In [None]:
df.shape

In [None]:
#dropping unnecessary columns

df = df.drop(['url', 'address', 'phone', 'menu_item', 'dish_liked', 'reviews_list'], axis = 1)
df.head()

In [None]:
# dropping duplicates

df.drop_duplicates(inplace = True)
df.shape

In [None]:
# cleaning rate column

df['rate'].unique()


In [None]:
# Removing "NEW" , "-" and "/5" from Rate Column


def rate(value):
    if(value=='NEW' or value=='-'):
        return np.nan
    else:
        value = str(value).split('/')
        value = value[0]
        return float(value)
    
df['rate'] = df['rate'].apply(rate)
df['rate'].head()

In [None]:
# filling null values with mean of 'Rate' column
df['rate'].fillna(df['rate'].mean(), inplace = True)
df['rate'].isnull().sum()

In [None]:
df.info()

In [None]:
# Dropping Null Values
df.dropna(inplace = True)

In [None]:
df.head()

In [None]:
# renaming column
df.rename(columns = {'approx_cost(for two people)':'Cost2plates', 'listed_in(type)':'Type'}, inplace = True)
df.head()

In [None]:
df['location'].unique()


In [None]:
#Listed in(city) and location, both are there, lets keep only one

In [None]:
df = df.drop(['listed_in(city)'], axis = 1)

In [None]:
df['Cost2plates'].unique()

In [None]:
# Removing , from Cost2Plates Column

In [None]:
def comma(value):
    value = str(value)
    if ',' in value:
        value = value.replace(',', '')
        return float(value)
    else:
        return float(value)
    
df['Cost2plates'] = df['Cost2plates'].apply(comma)
df['Cost2plates'].unique()

In [None]:
df.head()

In [None]:
#Cleaning Rest Type Column

rest_types = df['rest_type'].value_counts(ascending  = False)
rest_types

In [None]:
rest_types_lessthan1000 = rest_types[rest_types<1000]
rest_types_lessthan1000

In [None]:
#Making Rest Types less than 1000 in frequency as others

In [None]:
def handle_rest_type(value):
    if(value in rest_types_lessthan1000):
        return 'others'
    else:
        return value
        
df['rest_type'] = df['rest_type'].apply(handle_rest_type)
df['rest_type'].value_counts()

In [None]:
#Cleaning Location Column

In [None]:
location = df['location'].value_counts(ascending  = False)
location_lessthan300 = location[location<300]

In [None]:
def handle_location(value):
    if(value in location_lessthan300):
        return 'others'
    else:
        return value
        
df['location'] = df['location'].apply(handle_location)
df['location'].value_counts()

In [None]:
#Cleaning Cuisines Column

In [None]:
cusines = df['cuisines'].value_counts(ascending  = False)
cusines_lessthan100 = cusines[cusines<300]

In [None]:
def handle_cusines(value):
    if(value in cusines_lessthan100):
        return 'others'
    else:
        return value
        
df['cuisines'] = df['cuisines'].apply(handle_cusines)
df['cuisines'].value_counts()

In [None]:
df.head()

In [None]:
# Visualization

In [None]:
#Count Plot of Various Locations
plt.figure(figsize = (16,10))
ax = sns.countplot(df['location'])
plt.xticks(rotation=90)

In [None]:
#Visualizing Online Order
plt.figure(figsize = (8,5))
ax = sns.countplot(df['online_order'])

In [None]:
#Visualizing Book Table
plt.figure(figsize = (8,5))
ax = sns.countplot(df['book_table'])

In [None]:
#Visualizing Online Order vs Rate
plt.figure(figsize = (6,6))
sns.boxplot(x = 'online_order', y = 'rate', data = df)

In [None]:
#Visualizing Book table vs Rate
plt.figure(figsize = (6,6))
sns.boxplot(x = 'book_table', y = 'rate', data = df)

In [None]:
#Visualizing Online Order Facility, Location Wise
df1 = df.groupby(['location','online_order'])['name'].count()
df1.to_csv('location_online.csv')
df1 = pd.read_csv('location_online.csv')
df1 = pd.pivot_table(df1, values=None, index=['location'], columns=['online_order'], fill_value=0, aggfunc=np.sum)
df1

In [None]:
df1.plot(kind = 'bar', figsize = (15,8))


In [None]:
# Visualizing Book Table Facility, Location Wise

In [None]:
df2 = df.groupby(['location','book_table'])['name'].count()
df2.to_csv('location_booktable.csv')
df2 = pd.read_csv('location_booktable.csv')
df2 = pd.pivot_table(df2, values=None, index=['location'], columns=['book_table'], fill_value=0, aggfunc=np.sum)
df2

In [None]:
#Visualizing Types of Restaurents vs Rate
plt.figure(figsize = (14, 8))
sns.boxplot(x = 'Type', y = 'rate', data = df, palette = 'inferno')

In [None]:
#Grouping Types of Restaurents, location wise
df3 = df.groupby(['location','Type'])['name'].count()
df3.to_csv('location_Type.csv')
df3 = pd.read_csv('location_Type.csv')
df3 = pd.pivot_table(df3, values=None, index=['location'], columns=['Type'], fill_value=0, aggfunc=np.sum)
df3

In [None]:
df3.plot(kind = 'bar', figsize = (36,8))

In [None]:
#No. of Votes, Location Wise
df4 = df[['location', 'votes']]
df4.drop_duplicates()
df5 = df4.groupby(['location'])['votes'].sum()
df5 = df5.to_frame()
df5 = df5.sort_values('votes', ascending=False)
df5.head()

In [None]:
plt.figure(figsize = (15,8))
sns.barplot(df5.index , df5['votes'])
plt.xticks(rotation = 90 )

# INFERENCE
BTM has more number of restaurants compared to all other locations.

If anyone want to open a restaurants, they should target for Old Airport Road and St.Marks Road, because those areas have least number of restaurants.

Most of the restaurants are having online order facility

Most of the restaurants are not having book table feature

The resturants having online order and book table facility has higher ratings compared to not having facility,if the restaurants need more rating, then they should provide online order and book table facility

People are like to give more ratings for Drink & Nightlife, Pubs and bars type restaurants

Pubs and Bars types restaurants will make more money in Shivajinagar.

Preople love to vote the restaurants which are in Koramagala

