#  **Zomato Restraunts Bangalore**
#  **Exploratory Data Analysis**

This is my first Kaggle Notebook EDA

Contents

**1. Data Collection :**
   * Loading the data from Kaggle Datasets

**2. Data Wrangling :**
   * Remoivng Columns.
   * Removing Duplicates.
   * Renaming the columns.
   * Remove the Null values from the dataset
   * Altering individual columns
   * Some Transformations

**3. Data Visualization**
   * Popular Restraunts in Bangalore
   * Restaurants delivering Online or not
   * Restaurants allowing table booking or not
   * Table booking Rate vs Rate
   * Best Location
   * Relation between Location and Rating
   * Restaurant Type
   * Types of Services

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd 
import numpy as np


In [None]:
df = pd.read_csv('../input/zomato-bangalore-restaurants/zomato.csv')

df.head(3)

In [None]:
df.columns

In [None]:
df.info()

In [None]:
#checking for the null values
df.isnull().sum()

In [None]:
df.shape

### **Removing Columns**

In [None]:
# dropping columns 
zomato_n = df.drop(['url','phone','dish_liked'],axis = 1)
zomato_n.head()

In [None]:
zomato_n.isnull().sum()

### **Removing Duplicates**

In [None]:
# removing duplicates 
zomato_n.duplicated().sum()
zomato_n.drop_duplicates(inplace = True)
zomato_n.head()

In [None]:
zomato_n.shape

### **Renaming Columns**

In [None]:
zomato_n = zomato_n.rename(columns = {'rate':'rating',
                                      'listed_in(type)':'type',
                                      'listed_in(city)':'city',
                                      'rest_type': 'restaur_type',
                                      'approx_cost(for two people)':'cost'})
zomato_n.columns

In [None]:
zomato_n.head(2)

### **Removing ',' from Cost Column**

In [None]:
zomato_n['cost'] = zomato_n['cost'].astype(str)
zomato_n['cost'] = zomato_n['cost'].replace(',','',regex = True)
zomato_n['cost'].unique()
zomato_n['cost'] = zomato_n['cost'].astype(float)

In [None]:
zomato_n.info()

### **Removing 'Null Values' from the dataset**

In [None]:
# Removing NaN
zomato_n.isnull().sum()
zomato_n.dropna(how = 'any',inplace = True)
zomato_n.info()

### **Altering Rating Column**

In [None]:
zomato_n['rating'] = zomato_n['rating'].replace('/5','',regex = True)
zomato_n['rating'] = zomato_n['rating'].replace('NEW','-',regex = True)

zomato_n = zomato_n.loc[zomato_n.rating !='-'].reset_index(drop=True)

zomato_n['rating'] = zomato_n['rating'].astype(float)

In [None]:
zomato_n['rating'].unique()

## **Some more transformations**

In [None]:
zomato_n.online_order=zomato_n.online_order.map({"Yes":1,"No":0}).astype(int)
zomato_n.book_table=zomato_n.book_table.map({"Yes":1,"No":0}).astype(int)
zomato_n.head()

# Dataset Details 

In [None]:
zomato_n.info()

In [None]:
zomato_n.describe()

In [None]:
zomato_n.describe(include = object)

## **Correlation**

In [None]:
plt.figure(figsize = (12,6))
nc = zomato_n.corr()
sns.heatmap(nc,linewidth = 0.3,annot = True,cmap = 'RdGy_r')
plt.yticks(rotation = 0)
plt.show()

# **Data Visualization**

## **Popular Restaurants in Bangalore**

In [None]:
plt.figure(figsize = (12,6))
sns.set_style('darkgrid')
ch = zomato_n['name'].value_counts()[:10]

sns.barplot(x= ch,y = ch.index, palette = 'mako')
plt.title('Top 10 restaurants in Bangalore')
plt.xlabel('Number of outlets')
plt.ylabel('Name of Restaurants')

**As per our observation top three Restraunts in Bangalore are as follows:**
* Cafe Coffee Day
* Onesta
* Empire Restaurant




## **Restraunts delivering online or not**

In [None]:
plt.figure(figsize = (10,6))
sns.countplot(zomato_n['online_order'],palette = 'dark:salmon_r')

plt.title('Restraunts allowing online orders in Bangalore')
plt.ylabel('Number of outlets') 
plt.show()

More than 25k deliveries has been done via online mode.Which means people of bangalore are preferring online service ragter than visiting a restaurant.


## **Restraunts allowing advanced table booking or not**

In [None]:
plt.figure(figsize = (10,6))
sns.countplot(zomato_n['book_table'],palette = 'YlOrBr')

plt.title('Restraunts allowing online orders in Bangalore')
plt.ylabel('Number of outlets') 
plt.xlabel('')
plt.show()

There is a large number of restaurants who are not allowing Table booking.


##  **Table booking allowed vs rating**

In [None]:
# plt.figure(figsize = (50,15))
# sns.barplot(x='rating', y= zomato_n.index,data = zomato_n,palette = 'viridis')

# plt.figure(figsize = (25,25))

sns.set_style('white')
y=pd.crosstab(zomato_n.rating,zomato_n.book_table)
y.plot(kind="bar",stacked=True)

plt.title('Plot of Ratings vs permission of table booking')
plt.ylabel('') 
plt.xlabel('Ratings')
plt.show()


##  **Best Locations**

In [None]:
plt.figure(figsize = (12,6))
sns.set_style('darkgrid')
ch = zomato_n['location'].value_counts()[:10]

sns.barplot(x= ch,y = ch.index,palette = 'viridis')
plt.title('Top 10 restaurant locations in Bangalore')
plt.xlabel('Number of outlets')
plt.ylabel('Location')
plt.show

BTM is most favoured location being visited by the people of Bangalore followed by Koramangala 5th block, HSR and so on.

## **Location and Rating**

In [None]:
plt.figure(figsize = (20,10))
sns.set_style('whitegrid')

# ch = zomato_n['location'].value_counts()[:10]
# rt = zomato_n['rating']

sns.barplot(x= 'location', y ='rating', data = zomato_n, palette = 'magma')

plt.title('Plot of Location vs Rating')
plt.xlabel('Ratings')
plt.xticks(rotation = 90)
plt.ylabel('Locations')
plt.show()

## **Restraunt Type**

In [None]:
plt.figure(figsize = (12,6))

rt = zomato_n['restaur_type'].value_counts()[:15]
sns.barplot(x= rt,y = rt.index,palette = 'icefire')
plt.xlabel('')
plt.show()

## **Services "Type"**

In [None]:
plt.figure(figsize = (12,6))

rt = zomato_n['type'].value_counts()[:15]
sns.barplot(y= rt, x = rt.index, palette = "magma")

plt.title('Bar Plot of Services Type')
plt.ylabel(' ') 
plt.xlabel(' ')
plt.show()

## **Services "Type"**

In [None]:
plt.subplots(2,2,figsize=(10,8))


plt.subplot(2,2,1)
sns.countplot(zomato_n['online_order'])

plt.subplot(2,2,2)
sns.countplot(zomato_n['book_table'])

plt.subplot(2,2,3)
sns.distplot(zomato_n['rating'],kde=True)

# plt.subplot(3,2,5)
# sns.distplot(zomato_n['votes'],kde=True)

plt.subplot(2,2,4)
sns.distplot(zomato_n['cost'])

# plt.subplot(3,2,6)
# sns.countplot(zomato_n['type'])

plt.tight_layout()
plt.show()